LLVM  7.0.0svn
IndVarSimplify.cpp
Go to the documentation of this file.
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This transformation analyzes and transforms the induction variables (and
11 // computations derived from them) into simpler forms suitable for subsequent
12 // analysis and transformation.
13 //
14 // If the trip count of a loop is computable, this pass also makes the following
15 // changes:
16 // 1. The exit condition for the loop is canonicalized to compare the
17 // induction value against the exit value. This turns loops like:
18 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
19 // 2. Any use outside of the loop of an expression derived from the indvar
20 // is changed to compute the derived value outside of the loop, eliminating
21 // the dependence on the exit value of the induction variable. If the only
22 // purpose of the loop is to compute the exit value of some derived
23 // expression, this transformation will make the loop dead.
24 //
25 //===----------------------------------------------------------------------===//
26 
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/DenseMap.h"
32 #include "llvm/ADT/None.h"
33 #include "llvm/ADT/Optional.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
47 #include "llvm/IR/BasicBlock.h"
48 #include "llvm/IR/Constant.h"
49 #include "llvm/IR/ConstantRange.h"
50 #include "llvm/IR/Constants.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Dominators.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/IRBuilder.h"
56 #include "llvm/IR/InstrTypes.h"
57 #include "llvm/IR/Instruction.h"
58 #include "llvm/IR/Instructions.h"
59 #include "llvm/IR/IntrinsicInst.h"
60 #include "llvm/IR/Intrinsics.h"
61 #include "llvm/IR/Module.h"
62 #include "llvm/IR/Operator.h"
63 #include "llvm/IR/PassManager.h"
64 #include "llvm/IR/PatternMatch.h"
65 #include "llvm/IR/Type.h"
66 #include "llvm/IR/Use.h"
67 #include "llvm/IR/User.h"
68 #include "llvm/IR/Value.h"
69 #include "llvm/IR/ValueHandle.h"
70 #include "llvm/Pass.h"
71 #include "llvm/Support/Casting.h"
73 #include "llvm/Support/Compiler.h"
74 #include "llvm/Support/Debug.h"
78 #include "llvm/Transforms/Scalar.h"
83 #include <cassert>
84 #include <cstdint>
85 #include <utility>
86 
87 using namespace llvm;
88 
89 #define DEBUG_TYPE "indvars"
90 
91 STATISTIC(NumWidened , "Number of indvars widened");
92 STATISTIC(NumReplaced , "Number of exit values replaced");
93 STATISTIC(NumLFTR , "Number of loop exit tests replaced");
94 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
95 STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
96 
97 // Trip count verification can be enabled by default under NDEBUG if we
98 // implement a strong expression equivalence checker in SCEV. Until then, we
99 // use the verify-indvars flag, which may assert in some cases.
101  "verify-indvars", cl::Hidden,
102  cl::desc("Verify the ScalarEvolution result after running indvars"));
103 
105 
107  "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
108  cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
109  cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
110  clEnumValN(OnlyCheapRepl, "cheap",
111  "only replace exit value when the cost is cheap"),
112  clEnumValN(AlwaysRepl, "always",
113  "always replace exit value whenever possible")));
114 
116  "indvars-post-increment-ranges", cl::Hidden,
117  cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
118  cl::init(true));
119 
120 static cl::opt<bool>
121 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
122  cl::desc("Disable Linear Function Test Replace optimization"));
123 
124 namespace {
125 
126 struct RewritePhi;
127 
128 class IndVarSimplify {
129  LoopInfo *LI;
130  ScalarEvolution *SE;
131  DominatorTree *DT;
132  const DataLayout &DL;
133  TargetLibraryInfo *TLI;
134  const TargetTransformInfo *TTI;
135 
137  bool Changed = false;
138 
139  bool isValidRewrite(Value *FromVal, Value *ToVal);
140 
141  void handleFloatingPointIV(Loop *L, PHINode *PH);
142  void rewriteNonIntegerIVs(Loop *L);
143 
144  void simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
145 
146  bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
147  void rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
148  void rewriteFirstIterationLoopExitValues(Loop *L);
149 
150  Value *linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
151  PHINode *IndVar, SCEVExpander &Rewriter);
152 
153  void sinkUnusedInvariants(Loop *L);
154 
155  Value *expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
156  Instruction *InsertPt, Type *Ty);
157 
158 public:
159  IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
160  const DataLayout &DL, TargetLibraryInfo *TLI,
161  TargetTransformInfo *TTI)
162  : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
163 
164  bool run(Loop *L);
165 };
166 
167 } // end anonymous namespace
168 
169 /// Return true if the SCEV expansion generated by the rewriter can replace the
170 /// original value. SCEV guarantees that it produces the same value, but the way
171 /// it is produced may be illegal IR. Ideally, this function will only be
172 /// called for verification.
173 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
174  // If an SCEV expression subsumed multiple pointers, its expansion could
175  // reassociate the GEP changing the base pointer. This is illegal because the
176  // final address produced by a GEP chain must be inbounds relative to its
177  // underlying object. Otherwise basic alias analysis, among other things,
178  // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
179  // producing an expression involving multiple pointers. Until then, we must
180  // bail out here.
181  //
182  // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
183  // because it understands lcssa phis while SCEV does not.
184  Value *FromPtr = FromVal;
185  Value *ToPtr = ToVal;
186  if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
187  FromPtr = GEP->getPointerOperand();
188  }
189  if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
190  ToPtr = GEP->getPointerOperand();
191  }
192  if (FromPtr != FromVal || ToPtr != ToVal) {
193  // Quickly check the common case
194  if (FromPtr == ToPtr)
195  return true;
196 
197  // SCEV may have rewritten an expression that produces the GEP's pointer
198  // operand. That's ok as long as the pointer operand has the same base
199  // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
200  // base of a recurrence. This handles the case in which SCEV expansion
201  // converts a pointer type recurrence into a nonrecurrent pointer base
202  // indexed by an integer recurrence.
203 
204  // If the GEP base pointer is a vector of pointers, abort.
205  if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
206  return false;
207 
208  const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
209  const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
210  if (FromBase == ToBase)
211  return true;
212 
213  LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
214  << " != " << *ToBase << "\n");
215 
216  return false;
217  }
218  return true;
219 }
220 
221 /// Determine the insertion point for this user. By default, insert immediately
222 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
223 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
224 /// common dominator for the incoming blocks.
226  DominatorTree *DT, LoopInfo *LI) {
227  PHINode *PHI = dyn_cast<PHINode>(User);
228  if (!PHI)
229  return User;
230 
231  Instruction *InsertPt = nullptr;
232  for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
233  if (PHI->getIncomingValue(i) != Def)
234  continue;
235 
236  BasicBlock *InsertBB = PHI->getIncomingBlock(i);
237  if (!InsertPt) {
238  InsertPt = InsertBB->getTerminator();
239  continue;
240  }
241  InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
242  InsertPt = InsertBB->getTerminator();
243  }
244  assert(InsertPt && "Missing phi operand");
245 
246  auto *DefI = dyn_cast<Instruction>(Def);
247  if (!DefI)
248  return InsertPt;
249 
250  assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
251 
252  auto *L = LI->getLoopFor(DefI->getParent());
253  assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
254 
255  for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
256  if (LI->getLoopFor(DTN->getBlock()) == L)
257  return DTN->getBlock()->getTerminator();
258 
259  llvm_unreachable("DefI dominates InsertPt!");
260 }
261 
262 //===----------------------------------------------------------------------===//
263 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
264 //===----------------------------------------------------------------------===//
265 
266 /// Convert APF to an integer, if possible.
267 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
268  bool isExact = false;
269  // See if we can convert this to an int64_t
270  uint64_t UIntVal;
271  if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
272  APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
273  !isExact)
274  return false;
275  IntVal = UIntVal;
276  return true;
277 }
278 
279 /// If the loop has floating induction variable then insert corresponding
280 /// integer induction variable if possible.
281 /// For example,
282 /// for(double i = 0; i < 10000; ++i)
283 /// bar(i)
284 /// is converted into
285 /// for(int i = 0; i < 10000; ++i)
286 /// bar((double)i);
287 void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
288  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
289  unsigned BackEdge = IncomingEdge^1;
290 
291  // Check incoming value.
292  auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
293 
294  int64_t InitValue;
295  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
296  return;
297 
298  // Check IV increment. Reject this PN if increment operation is not
299  // an add or increment value can not be represented by an integer.
300  auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
301  if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return;
302 
303  // If this is not an add of the PHI with a constantfp, or if the constant fp
304  // is not an integer, bail out.
305  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
306  int64_t IncValue;
307  if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
308  !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
309  return;
310 
311  // Check Incr uses. One user is PN and the other user is an exit condition
312  // used by the conditional terminator.
313  Value::user_iterator IncrUse = Incr->user_begin();
314  Instruction *U1 = cast<Instruction>(*IncrUse++);
315  if (IncrUse == Incr->user_end()) return;
316  Instruction *U2 = cast<Instruction>(*IncrUse++);
317  if (IncrUse != Incr->user_end()) return;
318 
319  // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
320  // only used by a branch, we can't transform it.
322  if (!Compare)
323  Compare = dyn_cast<FCmpInst>(U2);
324  if (!Compare || !Compare->hasOneUse() ||
325  !isa<BranchInst>(Compare->user_back()))
326  return;
327 
328  BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
329 
330  // We need to verify that the branch actually controls the iteration count
331  // of the loop. If not, the new IV can overflow and no one will notice.
332  // The branch block must be in the loop and one of the successors must be out
333  // of the loop.
334  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
335  if (!L->contains(TheBr->getParent()) ||
336  (L->contains(TheBr->getSuccessor(0)) &&
337  L->contains(TheBr->getSuccessor(1))))
338  return;
339 
340  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
341  // transform it.
342  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
343  int64_t ExitValue;
344  if (ExitValueVal == nullptr ||
345  !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
346  return;
347 
348  // Find new predicate for integer comparison.
350  switch (Compare->getPredicate()) {
351  default: return; // Unknown comparison.
352  case CmpInst::FCMP_OEQ:
353  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
354  case CmpInst::FCMP_ONE:
355  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
356  case CmpInst::FCMP_OGT:
357  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
358  case CmpInst::FCMP_OGE:
359  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
360  case CmpInst::FCMP_OLT:
361  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
362  case CmpInst::FCMP_OLE:
363  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
364  }
365 
366  // We convert the floating point induction variable to a signed i32 value if
367  // we can. This is only safe if the comparison will not overflow in a way
368  // that won't be trapped by the integer equivalent operations. Check for this
369  // now.
370  // TODO: We could use i64 if it is native and the range requires it.
371 
372  // The start/stride/exit values must all fit in signed i32.
373  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
374  return;
375 
376  // If not actually striding (add x, 0.0), avoid touching the code.
377  if (IncValue == 0)
378  return;
379 
380  // Positive and negative strides have different safety conditions.
381  if (IncValue > 0) {
382  // If we have a positive stride, we require the init to be less than the
383  // exit value.
384  if (InitValue >= ExitValue)
385  return;
386 
387  uint32_t Range = uint32_t(ExitValue-InitValue);
388  // Check for infinite loop, either:
389  // while (i <= Exit) or until (i > Exit)
390  if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
391  if (++Range == 0) return; // Range overflows.
392  }
393 
394  unsigned Leftover = Range % uint32_t(IncValue);
395 
396  // If this is an equality comparison, we require that the strided value
397  // exactly land on the exit value, otherwise the IV condition will wrap
398  // around and do things the fp IV wouldn't.
399  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
400  Leftover != 0)
401  return;
402 
403  // If the stride would wrap around the i32 before exiting, we can't
404  // transform the IV.
405  if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
406  return;
407  } else {
408  // If we have a negative stride, we require the init to be greater than the
409  // exit value.
410  if (InitValue <= ExitValue)
411  return;
412 
413  uint32_t Range = uint32_t(InitValue-ExitValue);
414  // Check for infinite loop, either:
415  // while (i >= Exit) or until (i < Exit)
416  if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
417  if (++Range == 0) return; // Range overflows.
418  }
419 
420  unsigned Leftover = Range % uint32_t(-IncValue);
421 
422  // If this is an equality comparison, we require that the strided value
423  // exactly land on the exit value, otherwise the IV condition will wrap
424  // around and do things the fp IV wouldn't.
425  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
426  Leftover != 0)
427  return;
428 
429  // If the stride would wrap around the i32 before exiting, we can't
430  // transform the IV.
431  if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
432  return;
433  }
434 
436 
437  // Insert new integer induction variable.
438  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
439  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
440  PN->getIncomingBlock(IncomingEdge));
441 
442  Value *NewAdd =
443  BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
444  Incr->getName()+".int", Incr);
445  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
446 
447  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
448  ConstantInt::get(Int32Ty, ExitValue),
449  Compare->getName());
450 
451  // In the following deletions, PN may become dead and may be deleted.
452  // Use a WeakTrackingVH to observe whether this happens.
453  WeakTrackingVH WeakPH = PN;
454 
455  // Delete the old floating point exit comparison. The branch starts using the
456  // new comparison.
457  NewCompare->takeName(Compare);
458  Compare->replaceAllUsesWith(NewCompare);
460 
461  // Delete the old floating point increment.
462  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
464 
465  // If the FP induction variable still has uses, this is because something else
466  // in the loop uses its value. In order to canonicalize the induction
467  // variable, we chose to eliminate the IV and rewrite it in terms of an
468  // int->fp cast.
469  //
470  // We give preference to sitofp over uitofp because it is faster on most
471  // platforms.
472  if (WeakPH) {
473  Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
474  &*PN->getParent()->getFirstInsertionPt());
475  PN->replaceAllUsesWith(Conv);
477  }
478  Changed = true;
479 }
480 
481 void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
482  // First step. Check to see if there are any floating-point recurrences.
483  // If there are, change them into integer recurrences, permitting analysis by
484  // the SCEV routines.
485  BasicBlock *Header = L->getHeader();
486 
488  for (PHINode &PN : Header->phis())
489  PHIs.push_back(&PN);
490 
491  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
492  if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
493  handleFloatingPointIV(L, PN);
494 
495  // If the loop previously had floating-point IV, ScalarEvolution
496  // may not have been able to compute a trip count. Now that we've done some
497  // re-writing, the trip count may be computable.
498  if (Changed)
499  SE->forgetLoop(L);
500 }
501 
502 namespace {
503 
504 // Collect information about PHI nodes which can be transformed in
505 // rewriteLoopExitValues.
506 struct RewritePhi {
507  PHINode *PN;
508 
509  // Ith incoming value.
510  unsigned Ith;
511 
512  // Exit value after expansion.
513  Value *Val;
514 
515  // High Cost when expansion.
516  bool HighCost;
517 
518  RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
519  : PN(P), Ith(I), Val(V), HighCost(H) {}
520 };
521 
522 } // end anonymous namespace
523 
524 Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
525  Loop *L, Instruction *InsertPt,
526  Type *ResultTy) {
527  // Before expanding S into an expensive LLVM expression, see if we can use an
528  // already existing value as the expansion for S.
529  if (Value *ExistingValue = Rewriter.getExactExistingExpansion(S, InsertPt, L))
530  if (ExistingValue->getType() == ResultTy)
531  return ExistingValue;
532 
533  // We didn't find anything, fall back to using SCEVExpander.
534  return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
535 }
536 
537 //===----------------------------------------------------------------------===//
538 // rewriteLoopExitValues - Optimize IV users outside the loop.
539 // As a side effect, reduces the amount of IV processing within the loop.
540 //===----------------------------------------------------------------------===//
541 
542 /// Check to see if this loop has a computable loop-invariant execution count.
543 /// If so, this means that we can compute the final value of any expressions
544 /// that are recurrent in the loop, and substitute the exit values from the loop
545 /// into any instructions outside of the loop that use the final values of the
546 /// current expressions.
547 ///
548 /// This is mostly redundant with the regular IndVarSimplify activities that
549 /// happen later, except that it's more powerful in some cases, because it's
550 /// able to brute-force evaluate arbitrary instructions as long as they have
551 /// constant operands at the beginning of the loop.
552 void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
553  // Check a pre-condition.
554  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
555  "Indvars did not preserve LCSSA!");
556 
557  SmallVector<BasicBlock*, 8> ExitBlocks;
558  L->getUniqueExitBlocks(ExitBlocks);
559 
560  SmallVector<RewritePhi, 8> RewritePhiSet;
561  // Find all values that are computed inside the loop, but used outside of it.
562  // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
563  // the exit blocks of the loop to find them.
564  for (BasicBlock *ExitBB : ExitBlocks) {
565  // If there are no PHI nodes in this exit block, then no values defined
566  // inside the loop are used on this path, skip it.
567  PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
568  if (!PN) continue;
569 
570  unsigned NumPreds = PN->getNumIncomingValues();
571 
572  // Iterate over all of the PHI nodes.
573  BasicBlock::iterator BBI = ExitBB->begin();
574  while ((PN = dyn_cast<PHINode>(BBI++))) {
575  if (PN->use_empty())
576  continue; // dead use, don't replace it
577 
578  if (!SE->isSCEVable(PN->getType()))
579  continue;
580 
581  // It's necessary to tell ScalarEvolution about this explicitly so that
582  // it can walk the def-use list and forget all SCEVs, as it may not be
583  // watching the PHI itself. Once the new exit value is in place, there
584  // may not be a def-use connection between the loop and every instruction
585  // which got a SCEVAddRecExpr for that loop.
586  SE->forgetValue(PN);
587 
588  // Iterate over all of the values in all the PHI nodes.
589  for (unsigned i = 0; i != NumPreds; ++i) {
590  // If the value being merged in is not integer or is not defined
591  // in the loop, skip it.
592  Value *InVal = PN->getIncomingValue(i);
593  if (!isa<Instruction>(InVal))
594  continue;
595 
596  // If this pred is for a subloop, not L itself, skip it.
597  if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
598  continue; // The Block is in a subloop, skip it.
599 
600  // Check that InVal is defined in the loop.
601  Instruction *Inst = cast<Instruction>(InVal);
602  if (!L->contains(Inst))
603  continue;
604 
605  // Okay, this instruction has a user outside of the current loop
606  // and varies predictably *inside* the loop. Evaluate the value it
607  // contains when the loop exits, if possible.
608  const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
609  if (!SE->isLoopInvariant(ExitValue, L) ||
610  !isSafeToExpand(ExitValue, *SE))
611  continue;
612 
613  // Computing the value outside of the loop brings no benefit if :
614  // - it is definitely used inside the loop in a way which can not be
615  // optimized away.
616  // - no use outside of the loop can take advantage of hoisting the
617  // computation out of the loop
618  if (ExitValue->getSCEVType()>=scMulExpr) {
619  unsigned NumHardInternalUses = 0;
620  unsigned NumSoftExternalUses = 0;
621  unsigned NumUses = 0;
622  for (auto IB = Inst->user_begin(), IE = Inst->user_end();
623  IB != IE && NumUses <= 6; ++IB) {
624  Instruction *UseInstr = cast<Instruction>(*IB);
625  unsigned Opc = UseInstr->getOpcode();
626  NumUses++;
627  if (L->contains(UseInstr)) {
628  if (Opc == Instruction::Call || Opc == Instruction::Ret)
629  NumHardInternalUses++;
630  } else {
631  if (Opc == Instruction::PHI) {
632  // Do not count the Phi as a use. LCSSA may have inserted
633  // plenty of trivial ones.
634  NumUses--;
635  for (auto PB = UseInstr->user_begin(),
636  PE = UseInstr->user_end();
637  PB != PE && NumUses <= 6; ++PB, ++NumUses) {
638  unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
639  if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
640  NumSoftExternalUses++;
641  }
642  continue;
643  }
644  if (Opc != Instruction::Call && Opc != Instruction::Ret)
645  NumSoftExternalUses++;
646  }
647  }
648  if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses)
649  continue;
650  }
651 
652  bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
653  Value *ExitVal =
654  expandSCEVIfNeeded(Rewriter, ExitValue, L, Inst, PN->getType());
655 
656  LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
657  << '\n'
658  << " LoopVal = " << *Inst << "\n");
659 
660  if (!isValidRewrite(Inst, ExitVal)) {
661  DeadInsts.push_back(ExitVal);
662  continue;
663  }
664 
665  // Collect all the candidate PHINodes to be rewritten.
666  RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
667  }
668  }
669  }
670 
671  bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
672 
673  // Transformation.
674  for (const RewritePhi &Phi : RewritePhiSet) {
675  PHINode *PN = Phi.PN;
676  Value *ExitVal = Phi.Val;
677 
678  // Only do the rewrite when the ExitValue can be expanded cheaply.
679  // If LoopCanBeDel is true, rewrite exit value aggressively.
680  if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
681  DeadInsts.push_back(ExitVal);
682  continue;
683  }
684 
685  Changed = true;
686  ++NumReplaced;
687  Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
688  PN->setIncomingValue(Phi.Ith, ExitVal);
689 
690  // If this instruction is dead now, delete it. Don't do it now to avoid
691  // invalidating iterators.
692  if (isInstructionTriviallyDead(Inst, TLI))
693  DeadInsts.push_back(Inst);
694 
695  // Replace PN with ExitVal if that is legal and does not break LCSSA.
696  if (PN->getNumIncomingValues() == 1 &&
697  LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
698  PN->replaceAllUsesWith(ExitVal);
699  PN->eraseFromParent();
700  }
701  }
702 
703  // The insertion point instruction may have been deleted; clear it out
704  // so that the rewriter doesn't trip over it later.
705  Rewriter.clearInsertPoint();
706 }
707 
708 //===---------------------------------------------------------------------===//
709 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
710 // they will exit at the first iteration.
711 //===---------------------------------------------------------------------===//
712 
713 /// Check to see if this loop has loop invariant conditions which lead to loop
714 /// exits. If so, we know that if the exit path is taken, it is at the first
715 /// loop iteration. This lets us predict exit values of PHI nodes that live in
716 /// loop header.
717 void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
718  // Verify the input to the pass is already in LCSSA form.
719  assert(L->isLCSSAForm(*DT));
720 
721  SmallVector<BasicBlock *, 8> ExitBlocks;
722  L->getUniqueExitBlocks(ExitBlocks);
723  auto *LoopHeader = L->getHeader();
724  assert(LoopHeader && "Invalid loop");
725 
726  for (auto *ExitBB : ExitBlocks) {
727  // If there are no more PHI nodes in this exit block, then no more
728  // values defined inside the loop are used on this path.
729  for (PHINode &PN : ExitBB->phis()) {
730  for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
731  IncomingValIdx != E; ++IncomingValIdx) {
732  auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
733 
734  // We currently only support loop exits from loop header. If the
735  // incoming block is not loop header, we need to recursively check
736  // all conditions starting from loop header are loop invariants.
737  // Additional support might be added in the future.
738  if (IncomingBB != LoopHeader)
739  continue;
740 
741  // Get condition that leads to the exit path.
742  auto *TermInst = IncomingBB->getTerminator();
743 
744  Value *Cond = nullptr;
745  if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
746  // Must be a conditional branch, otherwise the block
747  // should not be in the loop.
748  Cond = BI->getCondition();
749  } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
750  Cond = SI->getCondition();
751  else
752  continue;
753 
754  if (!L->isLoopInvariant(Cond))
755  continue;
756 
757  auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
758 
759  // Only deal with PHIs.
760  if (!ExitVal)
761  continue;
762 
763  // If ExitVal is a PHI on the loop header, then we know its
764  // value along this exit because the exit can only be taken
765  // on the first iteration.
766  auto *LoopPreheader = L->getLoopPreheader();
767  assert(LoopPreheader && "Invalid loop");
768  int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
769  if (PreheaderIdx != -1) {
770  assert(ExitVal->getParent() == LoopHeader &&
771  "ExitVal must be in loop header");
772  PN.setIncomingValue(IncomingValIdx,
773  ExitVal->getIncomingValue(PreheaderIdx));
774  }
775  }
776  }
777  }
778 }
779 
780 /// Check whether it is possible to delete the loop after rewriting exit
781 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
782 /// aggressively.
783 bool IndVarSimplify::canLoopBeDeleted(
784  Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
785  BasicBlock *Preheader = L->getLoopPreheader();
786  // If there is no preheader, the loop will not be deleted.
787  if (!Preheader)
788  return false;
789 
790  // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
791  // We obviate multiple ExitingBlocks case for simplicity.
792  // TODO: If we see testcase with multiple ExitingBlocks can be deleted
793  // after exit value rewriting, we can enhance the logic here.
794  SmallVector<BasicBlock *, 4> ExitingBlocks;
795  L->getExitingBlocks(ExitingBlocks);
796  SmallVector<BasicBlock *, 8> ExitBlocks;
797  L->getUniqueExitBlocks(ExitBlocks);
798  if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
799  return false;
800 
801  BasicBlock *ExitBlock = ExitBlocks[0];
802  BasicBlock::iterator BI = ExitBlock->begin();
803  while (PHINode *P = dyn_cast<PHINode>(BI)) {
804  Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
805 
806  // If the Incoming value of P is found in RewritePhiSet, we know it
807  // could be rewritten to use a loop invariant value in transformation
808  // phase later. Skip it in the loop invariant check below.
809  bool found = false;
810  for (const RewritePhi &Phi : RewritePhiSet) {
811  unsigned i = Phi.Ith;
812  if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
813  found = true;
814  break;
815  }
816  }
817 
818  Instruction *I;
819  if (!found && (I = dyn_cast<Instruction>(Incoming)))
820  if (!L->hasLoopInvariantOperands(I))
821  return false;
822 
823  ++BI;
824  }
825 
826  for (auto *BB : L->blocks())
827  if (llvm::any_of(*BB, [](Instruction &I) {
828  return I.mayHaveSideEffects();
829  }))
830  return false;
831 
832  return true;
833 }
834 
835 //===----------------------------------------------------------------------===//
836 // IV Widening - Extend the width of an IV to cover its widest uses.
837 //===----------------------------------------------------------------------===//
838 
839 namespace {
840 
841 // Collect information about induction variables that are used by sign/zero
842 // extend operations. This information is recorded by CollectExtend and provides
843 // the input to WidenIV.
844 struct WideIVInfo {
845  PHINode *NarrowIV = nullptr;
846 
847  // Widest integer type created [sz]ext
848  Type *WidestNativeType = nullptr;
849 
850  // Was a sext user seen before a zext?
851  bool IsSigned = false;
852 };
853 
854 } // end anonymous namespace
855 
856 /// Update information about the induction variable that is extended by this
857 /// sign or zero extend operation. This is used to determine the final width of
858 /// the IV before actually widening it.
859 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
860  const TargetTransformInfo *TTI) {
861  bool IsSigned = Cast->getOpcode() == Instruction::SExt;
862  if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
863  return;
864 
865  Type *Ty = Cast->getType();
866  uint64_t Width = SE->getTypeSizeInBits(Ty);
867  if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
868  return;
869 
870  // Check that `Cast` actually extends the induction variable (we rely on this
871  // later). This takes care of cases where `Cast` is extending a truncation of
872  // the narrow induction variable, and thus can end up being narrower than the
873  // "narrow" induction variable.
874  uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
875  if (NarrowIVWidth >= Width)
876  return;
877 
878  // Cast is either an sext or zext up to this point.
879  // We should not widen an indvar if arithmetics on the wider indvar are more
880  // expensive than those on the narrower indvar. We check only the cost of ADD
881  // because at least an ADD is required to increment the induction variable. We
882  // could compute more comprehensively the cost of all instructions on the
883  // induction variable when necessary.
884  if (TTI &&
887  Cast->getOperand(0)->getType())) {
888  return;
889  }
890 
891  if (!WI.WidestNativeType) {
892  WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
893  WI.IsSigned = IsSigned;
894  return;
895  }
896 
897  // We extend the IV to satisfy the sign of its first user, arbitrarily.
898  if (WI.IsSigned != IsSigned)
899  return;
900 
901  if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
902  WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
903 }
904 
905 namespace {
906 
907 /// Record a link in the Narrow IV def-use chain along with the WideIV that
908 /// computes the same value as the Narrow IV def. This avoids caching Use*
909 /// pointers.
910 struct NarrowIVDefUse {
911  Instruction *NarrowDef = nullptr;
912  Instruction *NarrowUse = nullptr;
913  Instruction *WideDef = nullptr;
914 
915  // True if the narrow def is never negative. Tracking this information lets
916  // us use a sign extension instead of a zero extension or vice versa, when
917  // profitable and legal.
918  bool NeverNegative = false;
919 
920  NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
921  bool NeverNegative)
922  : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
923  NeverNegative(NeverNegative) {}
924 };
925 
926 /// The goal of this transform is to remove sign and zero extends without
927 /// creating any new induction variables. To do this, it creates a new phi of
928 /// the wider type and redirects all users, either removing extends or inserting
929 /// truncs whenever we stop propagating the type.
930 class WidenIV {
931  // Parameters
932  PHINode *OrigPhi;
933  Type *WideType;
934 
935  // Context
936  LoopInfo *LI;
937  Loop *L;
938  ScalarEvolution *SE;
939  DominatorTree *DT;
940 
941  // Does the module have any calls to the llvm.experimental.guard intrinsic
942  // at all? If not we can avoid scanning instructions looking for guards.
943  bool HasGuards;
944 
945  // Result
946  PHINode *WidePhi = nullptr;
947  Instruction *WideInc = nullptr;
948  const SCEV *WideIncExpr = nullptr;
950 
952  SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
953 
954  enum ExtendKind { ZeroExtended, SignExtended, Unknown };
955 
956  // A map tracking the kind of extension used to widen each narrow IV
957  // and narrow IV user.
958  // Key: pointer to a narrow IV or IV user.
959  // Value: the kind of extension used to widen this Instruction.
960  DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
961 
962  using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
963 
964  // A map with control-dependent ranges for post increment IV uses. The key is
965  // a pair of IV def and a use of this def denoting the context. The value is
966  // a ConstantRange representing possible values of the def at the given
967  // context.
968  DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
969 
970  Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
971  Instruction *UseI) {
972  DefUserPair Key(Def, UseI);
973  auto It = PostIncRangeInfos.find(Key);
974  return It == PostIncRangeInfos.end()
976  : Optional<ConstantRange>(It->second);
977  }
978 
979  void calculatePostIncRanges(PHINode *OrigPhi);
980  void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
981 
982  void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
983  DefUserPair Key(Def, UseI);
984  auto It = PostIncRangeInfos.find(Key);
985  if (It == PostIncRangeInfos.end())
986  PostIncRangeInfos.insert({Key, R});
987  else
988  It->second = R.intersectWith(It->second);
989  }
990 
991 public:
992  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
994  bool HasGuards)
995  : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
996  L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
997  HasGuards(HasGuards), DeadInsts(DI) {
998  assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
999  ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
1000  }
1001 
1002  PHINode *createWideIV(SCEVExpander &Rewriter);
1003 
1004 protected:
1005  Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1006  Instruction *Use);
1007 
1008  Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1009  Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1010  const SCEVAddRecExpr *WideAR);
1011  Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1012 
1013  ExtendKind getExtendKind(Instruction *I);
1014 
1015  using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1016 
1017  WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1018 
1019  WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1020 
1021  const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1022  unsigned OpCode) const;
1023 
1024  Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1025 
1026  bool widenLoopCompare(NarrowIVDefUse DU);
1027 
1028  void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1029 };
1030 
1031 } // end anonymous namespace
1032 
1033 /// Perform a quick domtree based check for loop invariance assuming that V is
1034 /// used within the loop. LoopInfo::isLoopInvariant() seems gratuitous for this
1035 /// purpose.
1036 static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
1037  Instruction *Inst = dyn_cast<Instruction>(V);
1038  if (!Inst)
1039  return true;
1040 
1041  return DT->properlyDominates(Inst->getParent(), L->getHeader());
1042 }
1043 
1044 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1045  bool IsSigned, Instruction *Use) {
1046  // Set the debug location and conservative insertion point.
1047  IRBuilder<> Builder(Use);
1048  // Hoist the insertion point into loop preheaders as far as possible.
1049  for (const Loop *L = LI->getLoopFor(Use->getParent());
1050  L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
1051  L = L->getParentLoop())
1053 
1054  return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1055  Builder.CreateZExt(NarrowOper, WideType);
1056 }
1057 
1058 /// Instantiate a wide operation to replace a narrow operation. This only needs
1059 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1060 /// 0 for any operation we decide not to clone.
1061 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1062  const SCEVAddRecExpr *WideAR) {
1063  unsigned Opcode = DU.NarrowUse->getOpcode();
1064  switch (Opcode) {
1065  default:
1066  return nullptr;
1067  case Instruction::Add:
1068  case Instruction::Mul:
1069  case Instruction::UDiv:
1070  case Instruction::Sub:
1071  return cloneArithmeticIVUser(DU, WideAR);
1072 
1073  case Instruction::And:
1074  case Instruction::Or:
1075  case Instruction::Xor:
1076  case Instruction::Shl:
1077  case Instruction::LShr:
1078  case Instruction::AShr:
1079  return cloneBitwiseIVUser(DU);
1080  }
1081 }
1082 
1083 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1084  Instruction *NarrowUse = DU.NarrowUse;
1085  Instruction *NarrowDef = DU.NarrowDef;
1086  Instruction *WideDef = DU.WideDef;
1087 
1088  LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1089 
1090  // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1091  // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1092  // invariant and will be folded or hoisted. If it actually comes from a
1093  // widened IV, it should be removed during a future call to widenIVUse.
1094  bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1095  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1096  ? WideDef
1097  : createExtendInst(NarrowUse->getOperand(0), WideType,
1098  IsSigned, NarrowUse);
1099  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1100  ? WideDef
1101  : createExtendInst(NarrowUse->getOperand(1), WideType,
1102  IsSigned, NarrowUse);
1103 
1104  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1105  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1106  NarrowBO->getName());
1107  IRBuilder<> Builder(NarrowUse);
1108  Builder.Insert(WideBO);
1109  WideBO->copyIRFlags(NarrowBO);
1110  return WideBO;
1111 }
1112 
1113 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1114  const SCEVAddRecExpr *WideAR) {
1115  Instruction *NarrowUse = DU.NarrowUse;
1116  Instruction *NarrowDef = DU.NarrowDef;
1117  Instruction *WideDef = DU.WideDef;
1118 
1119  LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1120 
1121  unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1122 
1123  // We're trying to find X such that
1124  //
1125  // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1126  //
1127  // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1128  // and check using SCEV if any of them are correct.
1129 
1130  // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1131  // correct solution to X.
1132  auto GuessNonIVOperand = [&](bool SignExt) {
1133  const SCEV *WideLHS;
1134  const SCEV *WideRHS;
1135 
1136  auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1137  if (SignExt)
1138  return SE->getSignExtendExpr(S, Ty);
1139  return SE->getZeroExtendExpr(S, Ty);
1140  };
1141 
1142  if (IVOpIdx == 0) {
1143  WideLHS = SE->getSCEV(WideDef);
1144  const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1145  WideRHS = GetExtend(NarrowRHS, WideType);
1146  } else {
1147  const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1148  WideLHS = GetExtend(NarrowLHS, WideType);
1149  WideRHS = SE->getSCEV(WideDef);
1150  }
1151 
1152  // WideUse is "WideDef `op.wide` X" as described in the comment.
1153  const SCEV *WideUse = nullptr;
1154 
1155  switch (NarrowUse->getOpcode()) {
1156  default:
1157  llvm_unreachable("No other possibility!");
1158 
1159  case Instruction::Add:
1160  WideUse = SE->getAddExpr(WideLHS, WideRHS);
1161  break;
1162 
1163  case Instruction::Mul:
1164  WideUse = SE->getMulExpr(WideLHS, WideRHS);
1165  break;
1166 
1167  case Instruction::UDiv:
1168  WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1169  break;
1170 
1171  case Instruction::Sub:
1172  WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1173  break;
1174  }
1175 
1176  return WideUse == WideAR;
1177  };
1178 
1179  bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1180  if (!GuessNonIVOperand(SignExtend)) {
1181  SignExtend = !SignExtend;
1182  if (!GuessNonIVOperand(SignExtend))
1183  return nullptr;
1184  }
1185 
1186  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1187  ? WideDef
1188  : createExtendInst(NarrowUse->getOperand(0), WideType,
1189  SignExtend, NarrowUse);
1190  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1191  ? WideDef
1192  : createExtendInst(NarrowUse->getOperand(1), WideType,
1193  SignExtend, NarrowUse);
1194 
1195  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1196  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1197  NarrowBO->getName());
1198 
1199  IRBuilder<> Builder(NarrowUse);
1200  Builder.Insert(WideBO);
1201  WideBO->copyIRFlags(NarrowBO);
1202  return WideBO;
1203 }
1204 
1205 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1206  auto It = ExtendKindMap.find(I);
1207  assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1208  return It->second;
1209 }
1210 
1211 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1212  unsigned OpCode) const {
1213  if (OpCode == Instruction::Add)
1214  return SE->getAddExpr(LHS, RHS);
1215  if (OpCode == Instruction::Sub)
1216  return SE->getMinusSCEV(LHS, RHS);
1217  if (OpCode == Instruction::Mul)
1218  return SE->getMulExpr(LHS, RHS);
1219 
1220  llvm_unreachable("Unsupported opcode.");
1221 }
1222 
1223 /// No-wrap operations can transfer sign extension of their result to their
1224 /// operands. Generate the SCEV value for the widened operation without
1225 /// actually modifying the IR yet. If the expression after extending the
1226 /// operands is an AddRec for this loop, return the AddRec and the kind of
1227 /// extension used.
1228 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1229  // Handle the common case of add<nsw/nuw>
1230  const unsigned OpCode = DU.NarrowUse->getOpcode();
1231  // Only Add/Sub/Mul instructions supported yet.
1232  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1233  OpCode != Instruction::Mul)
1234  return {nullptr, Unknown};
1235 
1236  // One operand (NarrowDef) has already been extended to WideDef. Now determine
1237  // if extending the other will lead to a recurrence.
1238  const unsigned ExtendOperIdx =
1239  DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1240  assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1241 
1242  const SCEV *ExtendOperExpr = nullptr;
1243  const OverflowingBinaryOperator *OBO =
1244  cast<OverflowingBinaryOperator>(DU.NarrowUse);
1245  ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1246  if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1247  ExtendOperExpr = SE->getSignExtendExpr(
1248  SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1249  else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1250  ExtendOperExpr = SE->getZeroExtendExpr(
1251  SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1252  else
1253  return {nullptr, Unknown};
1254 
1255  // When creating this SCEV expr, don't apply the current operations NSW or NUW
1256  // flags. This instruction may be guarded by control flow that the no-wrap
1257  // behavior depends on. Non-control-equivalent instructions can be mapped to
1258  // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1259  // semantics to those operations.
1260  const SCEV *lhs = SE->getSCEV(DU.WideDef);
1261  const SCEV *rhs = ExtendOperExpr;
1262 
1263  // Let's swap operands to the initial order for the case of non-commutative
1264  // operations, like SUB. See PR21014.
1265  if (ExtendOperIdx == 0)
1266  std::swap(lhs, rhs);
1267  const SCEVAddRecExpr *AddRec =
1268  dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1269 
1270  if (!AddRec || AddRec->getLoop() != L)
1271  return {nullptr, Unknown};
1272 
1273  return {AddRec, ExtKind};
1274 }
1275 
1276 /// Is this instruction potentially interesting for further simplification after
1277 /// widening it's type? In other words, can the extend be safely hoisted out of
1278 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1279 /// so, return the extended recurrence and the kind of extension used. Otherwise
1280 /// return {nullptr, Unknown}.
1281 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1282  if (!SE->isSCEVable(DU.NarrowUse->getType()))
1283  return {nullptr, Unknown};
1284 
1285  const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1286  if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1287  SE->getTypeSizeInBits(WideType)) {
1288  // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1289  // index. So don't follow this use.
1290  return {nullptr, Unknown};
1291  }
1292 
1293  const SCEV *WideExpr;
1294  ExtendKind ExtKind;
1295  if (DU.NeverNegative) {
1296  WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1297  if (isa<SCEVAddRecExpr>(WideExpr))
1298  ExtKind = SignExtended;
1299  else {
1300  WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1301  ExtKind = ZeroExtended;
1302  }
1303  } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1304  WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1305  ExtKind = SignExtended;
1306  } else {
1307  WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1308  ExtKind = ZeroExtended;
1309  }
1310  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1311  if (!AddRec || AddRec->getLoop() != L)
1312  return {nullptr, Unknown};
1313  return {AddRec, ExtKind};
1314 }
1315 
1316 /// This IV user cannot be widen. Replace this use of the original narrow IV
1317 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1318 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1319  LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1320  << *DU.NarrowUse << "\n");
1321  IRBuilder<> Builder(
1322  getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
1323  Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1324  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1325 }
1326 
1327 /// If the narrow use is a compare instruction, then widen the compare
1328 // (and possibly the other operand). The extend operation is hoisted into the
1329 // loop preheader as far as possible.
1330 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1331  ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1332  if (!Cmp)
1333  return false;
1334 
1335  // We can legally widen the comparison in the following two cases:
1336  //
1337  // - The signedness of the IV extension and comparison match
1338  //
1339  // - The narrow IV is always positive (and thus its sign extension is equal
1340  // to its zero extension). For instance, let's say we're zero extending
1341  // %narrow for the following use
1342  //
1343  // icmp slt i32 %narrow, %val ... (A)
1344  //
1345  // and %narrow is always positive. Then
1346  //
1347  // (A) == icmp slt i32 sext(%narrow), sext(%val)
1348  // == icmp slt i32 zext(%narrow), sext(%val)
1349  bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1350  if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1351  return false;
1352 
1353  Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1354  unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1355  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1356  assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1357 
1358  // Widen the compare instruction.
1359  IRBuilder<> Builder(
1360  getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
1361  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1362 
1363  // Widen the other operand of the compare, if necessary.
1364  if (CastWidth < IVWidth) {
1365  Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1366  DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1367  }
1368  return true;
1369 }
1370 
1371 /// Determine whether an individual user of the narrow IV can be widened. If so,
1372 /// return the wide clone of the user.
1373 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1374  assert(ExtendKindMap.count(DU.NarrowDef) &&
1375  "Should already know the kind of extension used to widen NarrowDef");
1376 
1377  // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1378  if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1379  if (LI->getLoopFor(UsePhi->getParent()) != L) {
1380  // For LCSSA phis, sink the truncate outside the loop.
1381  // After SimplifyCFG most loop exit targets have a single predecessor.
1382  // Otherwise fall back to a truncate within the loop.
1383  if (UsePhi->getNumOperands() != 1)
1384  truncateIVUse(DU, DT, LI);
1385  else {
1386  // Widening the PHI requires us to insert a trunc. The logical place
1387  // for this trunc is in the same BB as the PHI. This is not possible if
1388  // the BB is terminated by a catchswitch.
1389  if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1390  return nullptr;
1391 
1392  PHINode *WidePhi =
1393  PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1394  UsePhi);
1395  WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1396  IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1397  Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1398  UsePhi->replaceAllUsesWith(Trunc);
1399  DeadInsts.emplace_back(UsePhi);
1400  LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1401  << *WidePhi << "\n");
1402  }
1403  return nullptr;
1404  }
1405  }
1406 
1407  // This narrow use can be widened by a sext if it's non-negative or its narrow
1408  // def was widended by a sext. Same for zext.
1409  auto canWidenBySExt = [&]() {
1410  return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1411  };
1412  auto canWidenByZExt = [&]() {
1413  return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1414  };
1415 
1416  // Our raison d'etre! Eliminate sign and zero extension.
1417  if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1418  (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1419  Value *NewDef = DU.WideDef;
1420  if (DU.NarrowUse->getType() != WideType) {
1421  unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1422  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1423  if (CastWidth < IVWidth) {
1424  // The cast isn't as wide as the IV, so insert a Trunc.
1425  IRBuilder<> Builder(DU.NarrowUse);
1426  NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1427  }
1428  else {
1429  // A wider extend was hidden behind a narrower one. This may induce
1430  // another round of IV widening in which the intermediate IV becomes
1431  // dead. It should be very rare.
1432  LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1433  << " not wide enough to subsume " << *DU.NarrowUse
1434  << "\n");
1435  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1436  NewDef = DU.NarrowUse;
1437  }
1438  }
1439  if (NewDef != DU.NarrowUse) {
1440  LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1441  << " replaced by " << *DU.WideDef << "\n");
1442  ++NumElimExt;
1443  DU.NarrowUse->replaceAllUsesWith(NewDef);
1444  DeadInsts.emplace_back(DU.NarrowUse);
1445  }
1446  // Now that the extend is gone, we want to expose it's uses for potential
1447  // further simplification. We don't need to directly inform SimplifyIVUsers
1448  // of the new users, because their parent IV will be processed later as a
1449  // new loop phi. If we preserved IVUsers analysis, we would also want to
1450  // push the uses of WideDef here.
1451 
1452  // No further widening is needed. The deceased [sz]ext had done it for us.
1453  return nullptr;
1454  }
1455 
1456  // Does this user itself evaluate to a recurrence after widening?
1457  WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1458  if (!WideAddRec.first)
1459  WideAddRec = getWideRecurrence(DU);
1460 
1461  assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1462  if (!WideAddRec.first) {
1463  // If use is a loop condition, try to promote the condition instead of
1464  // truncating the IV first.
1465  if (widenLoopCompare(DU))
1466  return nullptr;
1467 
1468  // This user does not evaluate to a recurrence after widening, so don't
1469  // follow it. Instead insert a Trunc to kill off the original use,
1470  // eventually isolating the original narrow IV so it can be removed.
1471  truncateIVUse(DU, DT, LI);
1472  return nullptr;
1473  }
1474  // Assume block terminators cannot evaluate to a recurrence. We can't to
1475  // insert a Trunc after a terminator if there happens to be a critical edge.
1476  assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1477  "SCEV is not expected to evaluate a block terminator");
1478 
1479  // Reuse the IV increment that SCEVExpander created as long as it dominates
1480  // NarrowUse.
1481  Instruction *WideUse = nullptr;
1482  if (WideAddRec.first == WideIncExpr &&
1483  Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1484  WideUse = WideInc;
1485  else {
1486  WideUse = cloneIVUser(DU, WideAddRec.first);
1487  if (!WideUse)
1488  return nullptr;
1489  }
1490  // Evaluation of WideAddRec ensured that the narrow expression could be
1491  // extended outside the loop without overflow. This suggests that the wide use
1492  // evaluates to the same expression as the extended narrow use, but doesn't
1493  // absolutely guarantee it. Hence the following failsafe check. In rare cases
1494  // where it fails, we simply throw away the newly created wide use.
1495  if (WideAddRec.first != SE->getSCEV(WideUse)) {
1496  LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1497  << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1498  << "\n");
1499  DeadInsts.emplace_back(WideUse);
1500  return nullptr;
1501  }
1502 
1503  ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1504  // Returning WideUse pushes it on the worklist.
1505  return WideUse;
1506 }
1507 
1508 /// Add eligible users of NarrowDef to NarrowIVUsers.
1509 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1510  const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1511  bool NonNegativeDef =
1512  SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1513  SE->getConstant(NarrowSCEV->getType(), 0));
1514  for (User *U : NarrowDef->users()) {
1515  Instruction *NarrowUser = cast<Instruction>(U);
1516 
1517  // Handle data flow merges and bizarre phi cycles.
1518  if (!Widened.insert(NarrowUser).second)
1519  continue;
1520 
1521  bool NonNegativeUse = false;
1522  if (!NonNegativeDef) {
1523  // We might have a control-dependent range information for this context.
1524  if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1525  NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1526  }
1527 
1528  NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1529  NonNegativeDef || NonNegativeUse);
1530  }
1531 }
1532 
1533 /// Process a single induction variable. First use the SCEVExpander to create a
1534 /// wide induction variable that evaluates to the same recurrence as the
1535 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1536 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1537 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1538 ///
1539 /// It would be simpler to delete uses as they are processed, but we must avoid
1540 /// invalidating SCEV expressions.
1541 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1542  // Is this phi an induction variable?
1543  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1544  if (!AddRec)
1545  return nullptr;
1546 
1547  // Widen the induction variable expression.
1548  const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1549  ? SE->getSignExtendExpr(AddRec, WideType)
1550  : SE->getZeroExtendExpr(AddRec, WideType);
1551 
1552  assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1553  "Expect the new IV expression to preserve its type");
1554 
1555  // Can the IV be extended outside the loop without overflow?
1556  AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1557  if (!AddRec || AddRec->getLoop() != L)
1558  return nullptr;
1559 
1560  // An AddRec must have loop-invariant operands. Since this AddRec is
1561  // materialized by a loop header phi, the expression cannot have any post-loop
1562  // operands, so they must dominate the loop header.
1563  assert(
1564  SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1565  SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1566  "Loop header phi recurrence inputs do not dominate the loop");
1567 
1568  // Iterate over IV uses (including transitive ones) looking for IV increments
1569  // of the form 'add nsw %iv, <const>'. For each increment and each use of
1570  // the increment calculate control-dependent range information basing on
1571  // dominating conditions inside of the loop (e.g. a range check inside of the
1572  // loop). Calculated ranges are stored in PostIncRangeInfos map.
1573  //
1574  // Control-dependent range information is later used to prove that a narrow
1575  // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1576  // this on demand because when pushNarrowIVUsers needs this information some
1577  // of the dominating conditions might be already widened.
1578  if (UsePostIncrementRanges)
1579  calculatePostIncRanges(OrigPhi);
1580 
1581  // The rewriter provides a value for the desired IV expression. This may
1582  // either find an existing phi or materialize a new one. Either way, we
1583  // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1584  // of the phi-SCC dominates the loop entry.
1585  Instruction *InsertPt = &L->getHeader()->front();
1586  WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1587 
1588  // Remembering the WideIV increment generated by SCEVExpander allows
1589  // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1590  // employ a general reuse mechanism because the call above is the only call to
1591  // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1592  if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1593  WideInc =
1594  cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1595  WideIncExpr = SE->getSCEV(WideInc);
1596  // Propagate the debug location associated with the original loop increment
1597  // to the new (widened) increment.
1598  auto *OrigInc =
1599  cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1600  WideInc->setDebugLoc(OrigInc->getDebugLoc());
1601  }
1602 
1603  LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1604  ++NumWidened;
1605 
1606  // Traverse the def-use chain using a worklist starting at the original IV.
1607  assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1608 
1609  Widened.insert(OrigPhi);
1610  pushNarrowIVUsers(OrigPhi, WidePhi);
1611 
1612  while (!NarrowIVUsers.empty()) {
1613  NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1614 
1615  // Process a def-use edge. This may replace the use, so don't hold a
1616  // use_iterator across it.
1617  Instruction *WideUse = widenIVUse(DU, Rewriter);
1618 
1619  // Follow all def-use edges from the previous narrow use.
1620  if (WideUse)
1621  pushNarrowIVUsers(DU.NarrowUse, WideUse);
1622 
1623  // widenIVUse may have removed the def-use edge.
1624  if (DU.NarrowDef->use_empty())
1625  DeadInsts.emplace_back(DU.NarrowDef);
1626  }
1627 
1628  // Attach any debug information to the new PHI. Since OrigPhi and WidePHI
1629  // evaluate the same recurrence, we can just copy the debug info over.
1631  llvm::findDbgValues(DbgValues, OrigPhi);
1632  auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(),
1633  ValueAsMetadata::get(WidePhi));
1634  for (auto &DbgValue : DbgValues)
1635  DbgValue->setOperand(0, MDPhi);
1636  return WidePhi;
1637 }
1638 
1639 /// Calculates control-dependent range for the given def at the given context
1640 /// by looking at dominating conditions inside of the loop
1641 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1642  Instruction *NarrowUser) {
1643  using namespace llvm::PatternMatch;
1644 
1645  Value *NarrowDefLHS;
1646  const APInt *NarrowDefRHS;
1647  if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1648  m_APInt(NarrowDefRHS))) ||
1649  !NarrowDefRHS->isNonNegative())
1650  return;
1651 
1652  auto UpdateRangeFromCondition = [&] (Value *Condition,
1653  bool TrueDest) {
1654  CmpInst::Predicate Pred;
1655  Value *CmpRHS;
1656  if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1657  m_Value(CmpRHS))))
1658  return;
1659 
1661  TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1662 
1663  auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1664  auto CmpConstrainedLHSRange =
1665  ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1666  auto NarrowDefRange =
1667  CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
1668 
1669  updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1670  };
1671 
1672  auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1673  if (!HasGuards)
1674  return;
1675 
1676  for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1677  Ctx->getParent()->rend())) {
1678  Value *C = nullptr;
1679  if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1680  UpdateRangeFromCondition(C, /*TrueDest=*/true);
1681  }
1682  };
1683 
1684  UpdateRangeFromGuards(NarrowUser);
1685 
1686  BasicBlock *NarrowUserBB = NarrowUser->getParent();
1687  // If NarrowUserBB is statically unreachable asking dominator queries may
1688  // yield surprising results. (e.g. the block may not have a dom tree node)
1689  if (!DT->isReachableFromEntry(NarrowUserBB))
1690  return;
1691 
1692  for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1693  L->contains(DTB->getBlock());
1694  DTB = DTB->getIDom()) {
1695  auto *BB = DTB->getBlock();
1696  auto *TI = BB->getTerminator();
1697  UpdateRangeFromGuards(TI);
1698 
1699  auto *BI = dyn_cast<BranchInst>(TI);
1700  if (!BI || !BI->isConditional())
1701  continue;
1702 
1703  auto *TrueSuccessor = BI->getSuccessor(0);
1704  auto *FalseSuccessor = BI->getSuccessor(1);
1705 
1706  auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1707  return BBE.isSingleEdge() &&
1708  DT->dominates(BBE, NarrowUser->getParent());
1709  };
1710 
1711  if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1712  UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1713 
1714  if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1715  UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1716  }
1717 }
1718 
1719 /// Calculates PostIncRangeInfos map for the given IV
1720 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1723  Worklist.push_back(OrigPhi);
1724  Visited.insert(OrigPhi);
1725 
1726  while (!Worklist.empty()) {
1727  Instruction *NarrowDef = Worklist.pop_back_val();
1728 
1729  for (Use &U : NarrowDef->uses()) {
1730  auto *NarrowUser = cast<Instruction>(U.getUser());
1731 
1732  // Don't go looking outside the current loop.
1733  auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1734  if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1735  continue;
1736 
1737  if (!Visited.insert(NarrowUser).second)
1738  continue;
1739 
1740  Worklist.push_back(NarrowUser);
1741 
1742  calculatePostIncRange(NarrowDef, NarrowUser);
1743  }
1744  }
1745 }
1746 
1747 //===----------------------------------------------------------------------===//
1748 // Live IV Reduction - Minimize IVs live across the loop.
1749 //===----------------------------------------------------------------------===//
1750 
1751 //===----------------------------------------------------------------------===//
1752 // Simplification of IV users based on SCEV evaluation.
1753 //===----------------------------------------------------------------------===//
1754 
1755 namespace {
1756 
1757 class IndVarSimplifyVisitor : public IVVisitor {
1758  ScalarEvolution *SE;
1759  const TargetTransformInfo *TTI;
1760  PHINode *IVPhi;
1761 
1762 public:
1763  WideIVInfo WI;
1764 
1765  IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1766  const TargetTransformInfo *TTI,
1767  const DominatorTree *DTree)
1768  : SE(SCEV), TTI(TTI), IVPhi(IV) {
1769  DT = DTree;
1770  WI.NarrowIV = IVPhi;
1771  }
1772 
1773  // Implement the interface used by simplifyUsersOfIV.
1774  void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1775 };
1776 
1777 } // end anonymous namespace
1778 
1779 /// Iteratively perform simplification on a worklist of IV users. Each
1780 /// successive simplification may push more users which may themselves be
1781 /// candidates for simplification.
1782 ///
1783 /// Sign/Zero extend elimination is interleaved with IV simplification.
1784 void IndVarSimplify::simplifyAndExtend(Loop *L,
1785  SCEVExpander &Rewriter,
1786  LoopInfo *LI) {
1788 
1789  auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1790  Intrinsic::getName(Intrinsic::experimental_guard));
1791  bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1792 
1793  SmallVector<PHINode*, 8> LoopPhis;
1794  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1795  LoopPhis.push_back(cast<PHINode>(I));
1796  }
1797  // Each round of simplification iterates through the SimplifyIVUsers worklist
1798  // for all current phis, then determines whether any IVs can be
1799  // widened. Widening adds new phis to LoopPhis, inducing another round of
1800  // simplification on the wide IVs.
1801  while (!LoopPhis.empty()) {
1802  // Evaluate as many IV expressions as possible before widening any IVs. This
1803  // forces SCEV to set no-wrap flags before evaluating sign/zero
1804  // extension. The first time SCEV attempts to normalize sign/zero extension,
1805  // the result becomes final. So for the most predictable results, we delay
1806  // evaluation of sign/zero extend evaluation until needed, and avoid running
1807  // other SCEV based analysis prior to simplifyAndExtend.
1808  do {
1809  PHINode *CurrIV = LoopPhis.pop_back_val();
1810 
1811  // Information about sign/zero extensions of CurrIV.
1812  IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1813 
1814  Changed |=
1815  simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1816 
1817  if (Visitor.WI.WidestNativeType) {
1818  WideIVs.push_back(Visitor.WI);
1819  }
1820  } while(!LoopPhis.empty());
1821 
1822  for (; !WideIVs.empty(); WideIVs.pop_back()) {
1823  WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1824  if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1825  Changed = true;
1826  LoopPhis.push_back(WidePhi);
1827  }
1828  }
1829  }
1830 }
1831 
1832 //===----------------------------------------------------------------------===//
1833 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1834 //===----------------------------------------------------------------------===//
1835 
1836 /// Return true if this loop's backedge taken count expression can be safely and
1837 /// cheaply expanded into an instruction sequence that can be used by
1838 /// linearFunctionTestReplace.
1839 ///
1840 /// TODO: This fails for pointer-type loop counters with greater than one byte
1841 /// strides, consequently preventing LFTR from running. For the purpose of LFTR
1842 /// we could skip this check in the case that the LFTR loop counter (chosen by
1843 /// FindLoopCounter) is also pointer type. Instead, we could directly convert
1844 /// the loop test to an inequality test by checking the target data's alignment
1845 /// of element types (given that the initial pointer value originates from or is
1846 /// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
1847 /// However, we don't yet have a strong motivation for converting loop tests
1848 /// into inequality tests.
1850  SCEVExpander &Rewriter) {
1851  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1852  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
1853  BackedgeTakenCount->isZero())
1854  return false;
1855 
1856  if (!L->getExitingBlock())
1857  return false;
1858 
1859  // Can't rewrite non-branch yet.
1860  if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
1861  return false;
1862 
1863  if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
1864  return false;
1865 
1866  return true;
1867 }
1868 
1869 /// Return the loop header phi IFF IncV adds a loop invariant value to the phi.
1871  Instruction *IncI = dyn_cast<Instruction>(IncV);
1872  if (!IncI)
1873  return nullptr;
1874 
1875  switch (IncI->getOpcode()) {
1876  case Instruction::Add:
1877  case Instruction::Sub:
1878  break;
1879  case Instruction::GetElementPtr:
1880  // An IV counter must preserve its type.
1881  if (IncI->getNumOperands() == 2)
1882  break;
1884  default:
1885  return nullptr;
1886  }
1887 
1888  PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
1889  if (Phi && Phi->getParent() == L->getHeader()) {
1890  if (isLoopInvariant(IncI->getOperand(1), L, DT))
1891  return Phi;
1892  return nullptr;
1893  }
1894  if (IncI->getOpcode() == Instruction::GetElementPtr)
1895  return nullptr;
1896 
1897  // Allow add/sub to be commuted.
1898  Phi = dyn_cast<PHINode>(IncI->getOperand(1));
1899  if (Phi && Phi->getParent() == L->getHeader()) {
1900  if (isLoopInvariant(IncI->getOperand(0), L, DT))
1901  return Phi;
1902  }
1903  return nullptr;
1904 }
1905 
1906 /// Return the compare guarding the loop latch, or NULL for unrecognized tests.
1908  assert(L->getExitingBlock() && "expected loop exit");
1909 
1910  BasicBlock *LatchBlock = L->getLoopLatch();
1911  // Don't bother with LFTR if the loop is not properly simplified.
1912  if (!LatchBlock)
1913  return nullptr;
1914 
1916  assert(BI && "expected exit branch");
1917 
1918  return dyn_cast<ICmpInst>(BI->getCondition());
1919 }
1920 
1921 /// linearFunctionTestReplace policy. Return true unless we can show that the
1922 /// current exit test is already sufficiently canonical.
1923 static bool needsLFTR(Loop *L, DominatorTree *DT) {
1924  // Do LFTR to simplify the exit condition to an ICMP.
1925  ICmpInst *Cond = getLoopTest(L);
1926  if (!Cond)
1927  return true;
1928 
1929  // Do LFTR to simplify the exit ICMP to EQ/NE
1930  ICmpInst::Predicate Pred = Cond->getPredicate();
1931  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
1932  return true;
1933 
1934  // Look for a loop invariant RHS
1935  Value *LHS = Cond->getOperand(0);
1936  Value *RHS = Cond->getOperand(1);
1937  if (!isLoopInvariant(RHS, L, DT)) {
1938  if (!isLoopInvariant(LHS, L, DT))
1939  return true;
1940  std::swap(LHS, RHS);
1941  }
1942  // Look for a simple IV counter LHS
1943  PHINode *Phi = dyn_cast<PHINode>(LHS);
1944  if (!Phi)
1945  Phi = getLoopPhiForCounter(LHS, L, DT);
1946 
1947  if (!Phi)
1948  return true;
1949 
1950  // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
1951  int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
1952  if (Idx < 0)
1953  return true;
1954 
1955  // Do LFTR if the exit condition's IV is *not* a simple counter.
1956  Value *IncV = Phi->getIncomingValue(Idx);
1957  return Phi != getLoopPhiForCounter(IncV, L, DT);
1958 }
1959 
1960 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
1961 /// down to checking that all operands are constant and listing instructions
1962 /// that may hide undef.
1964  unsigned Depth) {
1965  if (isa<Constant>(V))
1966  return !isa<UndefValue>(V);
1967 
1968  if (Depth >= 6)
1969  return false;
1970 
1971  // Conservatively handle non-constant non-instructions. For example, Arguments
1972  // may be undef.
1973  Instruction *I = dyn_cast<Instruction>(V);
1974  if (!I)
1975  return false;
1976 
1977  // Load and return values may be undef.
1978  if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
1979  return false;
1980 
1981  // Optimistically handle other instructions.
1982  for (Value *Op : I->operands()) {
1983  if (!Visited.insert(Op).second)
1984  continue;
1985  if (!hasConcreteDefImpl(Op, Visited, Depth+1))
1986  return false;
1987  }
1988  return true;
1989 }
1990 
1991 /// Return true if the given value is concrete. We must prove that undef can
1992 /// never reach it.
1993 ///
1994 /// TODO: If we decide that this is a good approach to checking for undef, we
1995 /// may factor it into a common location.
1996 static bool hasConcreteDef(Value *V) {
1997  SmallPtrSet<Value*, 8> Visited;
1998  Visited.insert(V);
1999  return hasConcreteDefImpl(V, Visited, 0);
2000 }
2001 
2002 /// Return true if this IV has any uses other than the (soon to be rewritten)
2003 /// loop exit test.
2004 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2005  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2006  Value *IncV = Phi->getIncomingValue(LatchIdx);
2007 
2008  for (User *U : Phi->users())
2009  if (U != Cond && U != IncV) return false;
2010 
2011  for (User *U : IncV->users())
2012  if (U != Cond && U != Phi) return false;
2013  return true;
2014 }
2015 
2016 /// Find an affine IV in canonical form.
2017 ///
2018 /// BECount may be an i8* pointer type. The pointer difference is already
2019 /// valid count without scaling the address stride, so it remains a pointer
2020 /// expression as far as SCEV is concerned.
2021 ///
2022 /// Currently only valid for LFTR. See the comments on hasConcreteDef below.
2023 ///
2024 /// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
2025 ///
2026 /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
2027 /// This is difficult in general for SCEV because of potential overflow. But we
2028 /// could at least handle constant BECounts.
2029 static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
2030  ScalarEvolution *SE, DominatorTree *DT) {
2031  uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2032 
2033  Value *Cond =
2034  cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
2035 
2036  // Loop over all of the PHI nodes, looking for a simple counter.
2037  PHINode *BestPhi = nullptr;
2038  const SCEV *BestInit = nullptr;
2039  BasicBlock *LatchBlock = L->getLoopLatch();
2040  assert(LatchBlock && "needsLFTR should guarantee a loop latch");
2041  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2042 
2043  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2044  PHINode *Phi = cast<PHINode>(I);
2045  if (!SE->isSCEVable(Phi->getType()))
2046  continue;
2047 
2048  // Avoid comparing an integer IV against a pointer Limit.
2049  if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2050  continue;
2051 
2052  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2053  if (!AR || AR->getLoop() != L || !AR->isAffine())
2054  continue;
2055 
2056  // AR may be a pointer type, while BECount is an integer type.
2057  // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2058  // AR may not be a narrower type, or we may never exit.
2059  uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2060  if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2061  continue;
2062 
2063  const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2064  if (!Step || !Step->isOne())
2065  continue;
2066 
2067  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2068  Value *IncV = Phi->getIncomingValue(LatchIdx);
2069  if (getLoopPhiForCounter(IncV, L, DT) != Phi)
2070  continue;
2071 
2072  // Avoid reusing a potentially undef value to compute other values that may
2073  // have originally had a concrete definition.
2074  if (!hasConcreteDef(Phi)) {
2075  // We explicitly allow unknown phis as long as they are already used by
2076  // the loop test. In this case we assume that performing LFTR could not
2077  // increase the number of undef users.
2078  if (ICmpInst *Cond = getLoopTest(L)) {
2079  if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT) &&
2080  Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
2081  continue;
2082  }
2083  }
2084  }
2085  const SCEV *Init = AR->getStart();
2086 
2087  if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2088  // Don't force a live loop counter if another IV can be used.
2089  if (AlmostDeadIV(Phi, LatchBlock, Cond))
2090  continue;
2091 
2092  // Prefer to count-from-zero. This is a more "canonical" counter form. It
2093  // also prefers integer to pointer IVs.
2094  if (BestInit->isZero() != Init->isZero()) {
2095  if (BestInit->isZero())
2096  continue;
2097  }
2098  // If two IVs both count from zero or both count from nonzero then the
2099  // narrower is likely a dead phi that has been widened. Use the wider phi
2100  // to allow the other to be eliminated.
2101  else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2102  continue;
2103  }
2104  BestPhi = Phi;
2105  BestInit = Init;
2106  }
2107  return BestPhi;
2108 }
2109 
2110 /// Help linearFunctionTestReplace by generating a value that holds the RHS of
2111 /// the new loop test.
2112 static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
2113  SCEVExpander &Rewriter, ScalarEvolution *SE) {
2114  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2115  assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
2116  const SCEV *IVInit = AR->getStart();
2117 
2118  // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
2119  // finds a valid pointer IV. Sign extend BECount in order to materialize a
2120  // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2121  // the existing GEPs whenever possible.
2122  if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) {
2123  // IVOffset will be the new GEP offset that is interpreted by GEP as a
2124  // signed value. IVCount on the other hand represents the loop trip count,
2125  // which is an unsigned value. FindLoopCounter only allows induction
2126  // variables that have a positive unit stride of one. This means we don't
2127  // have to handle the case of negative offsets (yet) and just need to zero
2128  // extend IVCount.
2129  Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2130  const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
2131 
2132  // Expand the code for the iteration count.
2133  assert(SE->isLoopInvariant(IVOffset, L) &&
2134  "Computed iteration count is not loop invariant!");
2135  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2136  Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
2137 
2138  Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
2139  assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
2140  // We could handle pointer IVs other than i8*, but we need to compensate for
2141  // gep index scaling. See canExpandBackedgeTakenCount comments.
2143  cast<PointerType>(GEPBase->getType())
2144  ->getElementType())->isOne() &&
2145  "unit stride pointer IV must be i8*");
2146 
2147  IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
2148  return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
2149  } else {
2150  // In any other case, convert both IVInit and IVCount to integers before
2151  // comparing. This may result in SCEV expansion of pointers, but in practice
2152  // SCEV will fold the pointer arithmetic away as such:
2153  // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2154  //
2155  // Valid Cases: (1) both integers is most common; (2) both may be pointers
2156  // for simple memset-style loops.
2157  //
2158  // IVInit integer and IVCount pointer would only occur if a canonical IV
2159  // were generated on top of case #2, which is not expected.
2160 
2161  const SCEV *IVLimit = nullptr;
2162  // For unit stride, IVCount = Start + BECount with 2's complement overflow.
2163  // For non-zero Start, compute IVCount here.
2164  if (AR->getStart()->isZero())
2165  IVLimit = IVCount;
2166  else {
2167  assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2168  const SCEV *IVInit = AR->getStart();
2169 
2170  // For integer IVs, truncate the IV before computing IVInit + BECount.
2171  if (SE->getTypeSizeInBits(IVInit->getType())
2172  > SE->getTypeSizeInBits(IVCount->getType()))
2173  IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
2174 
2175  IVLimit = SE->getAddExpr(IVInit, IVCount);
2176  }
2177  // Expand the code for the iteration count.
2178  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2179  IRBuilder<> Builder(BI);
2180  assert(SE->isLoopInvariant(IVLimit, L) &&
2181  "Computed iteration count is not loop invariant!");
2182  // Ensure that we generate the same type as IndVar, or a smaller integer
2183  // type. In the presence of null pointer values, we have an integer type
2184  // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2185  Type *LimitTy = IVCount->getType()->isPointerTy() ?
2186  IndVar->getType() : IVCount->getType();
2187  return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2188  }
2189 }
2190 
2191 /// This method rewrites the exit condition of the loop to be a canonical !=
2192 /// comparison against the incremented loop induction variable. This pass is
2193 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2194 /// determine a loop-invariant trip count of the loop, which is actually a much
2195 /// broader range than just linear tests.
2196 Value *IndVarSimplify::
2197 linearFunctionTestReplace(Loop *L,
2198  const SCEV *BackedgeTakenCount,
2199  PHINode *IndVar,
2200  SCEVExpander &Rewriter) {
2201  assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
2202 
2203  // Initialize CmpIndVar and IVCount to their preincremented values.
2204  Value *CmpIndVar = IndVar;
2205  const SCEV *IVCount = BackedgeTakenCount;
2206 
2207  assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2208 
2209  // If the exiting block is the same as the backedge block, we prefer to
2210  // compare against the post-incremented value, otherwise we must compare
2211  // against the preincremented value.
2212  if (L->getExitingBlock() == L->getLoopLatch()) {
2213  // Add one to the "backedge-taken" count to get the trip count.
2214  // This addition may overflow, which is valid as long as the comparison is
2215  // truncated to BackedgeTakenCount->getType().
2216  IVCount = SE->getAddExpr(BackedgeTakenCount,
2217  SE->getOne(BackedgeTakenCount->getType()));
2218  // The BackedgeTaken expression contains the number of times that the
2219  // backedge branches to the loop header. This is one less than the
2220  // number of times the loop executes, so use the incremented indvar.
2221  CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
2222  }
2223 
2224  Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
2225  assert(ExitCnt->getType()->isPointerTy() ==
2226  IndVar->getType()->isPointerTy() &&
2227  "genLoopLimit missed a cast");
2228 
2229  // Insert a new icmp_ne or icmp_eq instruction before the branch.
2230  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2232  if (L->contains(BI->getSuccessor(0)))
2233  P = ICmpInst::ICMP_NE;
2234  else
2235  P = ICmpInst::ICMP_EQ;
2236 
2237  LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2238  << " LHS:" << *CmpIndVar << '\n'
2239  << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2240  << "\n"
2241  << " RHS:\t" << *ExitCnt << "\n"
2242  << " IVCount:\t" << *IVCount << "\n");
2243 
2244  IRBuilder<> Builder(BI);
2245 
2246  // The new loop exit condition should reuse the debug location of the
2247  // original loop exit condition.
2248  if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2249  Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2250 
2251  // LFTR can ignore IV overflow and truncate to the width of
2252  // BECount. This avoids materializing the add(zext(add)) expression.
2253  unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2254  unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2255  if (CmpIndVarSize > ExitCntSize) {
2256  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2257  const SCEV *ARStart = AR->getStart();
2258  const SCEV *ARStep = AR->getStepRecurrence(*SE);
2259  // For constant IVCount, avoid truncation.
2260  if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
2261  const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt();
2262  APInt Count = cast<SCEVConstant>(IVCount)->getAPInt();
2263  // Note that the post-inc value of BackedgeTakenCount may have overflowed
2264  // above such that IVCount is now zero.
2265  if (IVCount != BackedgeTakenCount && Count == 0) {
2266  Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
2267  ++Count;
2268  }
2269  else
2270  Count = Count.zext(CmpIndVarSize);
2271  APInt NewLimit;
2272  if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
2273  NewLimit = Start - Count;
2274  else
2275  NewLimit = Start + Count;
2276  ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
2277 
2278  LLVM_DEBUG(dbgs() << " Widen RHS:\t" << *ExitCnt << "\n");
2279  } else {
2280  // We try to extend trip count first. If that doesn't work we truncate IV.
2281  // Zext(trunc(IV)) == IV implies equivalence of the following two:
2282  // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If
2283  // one of the two holds, extend the trip count, otherwise we truncate IV.
2284  bool Extended = false;
2285  const SCEV *IV = SE->getSCEV(CmpIndVar);
2286  const SCEV *ZExtTrunc =
2287  SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2288  ExitCnt->getType()),
2289  CmpIndVar->getType());
2290 
2291  if (ZExtTrunc == IV) {
2292  Extended = true;
2293  ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2294  "wide.trip.count");
2295  } else {
2296  const SCEV *SExtTrunc =
2297  SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2298  ExitCnt->getType()),
2299  CmpIndVar->getType());
2300  if (SExtTrunc == IV) {
2301  Extended = true;
2302  ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2303  "wide.trip.count");
2304  }
2305  }
2306 
2307  if (!Extended)
2308  CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2309  "lftr.wideiv");
2310  }
2311  }
2312  Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2313  Value *OrigCond = BI->getCondition();
2314  // It's tempting to use replaceAllUsesWith here to fully replace the old
2315  // comparison, but that's not immediately safe, since users of the old
2316  // comparison may not be dominated by the new comparison. Instead, just
2317  // update the branch to use the new comparison; in the common case this
2318  // will make old comparison dead.
2319  BI->setCondition(Cond);
2320  DeadInsts.push_back(OrigCond);
2321 
2322  ++NumLFTR;
2323  Changed = true;
2324  return Cond;
2325 }
2326 
2327 //===----------------------------------------------------------------------===//
2328 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2329 //===----------------------------------------------------------------------===//
2330 
2331 /// If there's a single exit block, sink any loop-invariant values that
2332 /// were defined in the preheader but not used inside the loop into the
2333 /// exit block to reduce register pressure in the loop.
2334 void IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2335  BasicBlock *ExitBlock = L->getExitBlock();
2336  if (!ExitBlock) return;
2337 
2338  BasicBlock *Preheader = L->getLoopPreheader();
2339  if (!Preheader) return;
2340 
2341  BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2342  BasicBlock::iterator I(Preheader->getTerminator());
2343  while (I != Preheader->begin()) {
2344  --I;
2345  // New instructions were inserted at the end of the preheader.
2346  if (isa<PHINode>(I))
2347  break;
2348 
2349  // Don't move instructions which might have side effects, since the side
2350  // effects need to complete before instructions inside the loop. Also don't
2351  // move instructions which might read memory, since the loop may modify
2352  // memory. Note that it's okay if the instruction might have undefined
2353  // behavior: LoopSimplify guarantees that the preheader dominates the exit
2354  // block.
2355  if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2356  continue;
2357 
2358  // Skip debug info intrinsics.
2359  if (isa<DbgInfoIntrinsic>(I))
2360  continue;
2361 
2362  // Skip eh pad instructions.
2363  if (I->isEHPad())
2364  continue;
2365 
2366  // Don't sink alloca: we never want to sink static alloca's out of the
2367  // entry block, and correctly sinking dynamic alloca's requires
2368  // checks for stacksave/stackrestore intrinsics.
2369  // FIXME: Refactor this check somehow?
2370  if (isa<AllocaInst>(I))
2371  continue;
2372 
2373  // Determine if there is a use in or before the loop (direct or
2374  // otherwise).
2375  bool UsedInLoop = false;
2376  for (Use &U : I->uses()) {
2377  Instruction *User = cast<Instruction>(U.getUser());
2378  BasicBlock *UseBB = User->getParent();
2379  if (PHINode *P = dyn_cast<PHINode>(User)) {
2380  unsigned i =
2381  PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2382  UseBB = P->getIncomingBlock(i);
2383  }
2384  if (UseBB == Preheader || L->contains(UseBB)) {
2385  UsedInLoop = true;
2386  break;
2387  }
2388  }
2389 
2390  // If there is, the def must remain in the preheader.
2391  if (UsedInLoop)
2392  continue;
2393 
2394  // Otherwise, sink it to the exit block.
2395  Instruction *ToMove = &*I;
2396  bool Done = false;
2397 
2398  if (I != Preheader->begin()) {
2399  // Skip debug info intrinsics.
2400  do {
2401  --I;
2402  } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2403 
2404  if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2405  Done = true;
2406  } else {
2407  Done = true;
2408  }
2409 
2410  ToMove->moveBefore(*ExitBlock, InsertPt);
2411  if (Done) break;
2412  InsertPt = ToMove->getIterator();
2413  }
2414 }
2415 
2416 //===----------------------------------------------------------------------===//
2417 // IndVarSimplify driver. Manage several subpasses of IV simplification.
2418 //===----------------------------------------------------------------------===//
2419 
2420 bool IndVarSimplify::run(Loop *L) {
2421  // We need (and expect!) the incoming loop to be in LCSSA.
2422  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2423  "LCSSA required to run indvars!");
2424 
2425  // If LoopSimplify form is not available, stay out of trouble. Some notes:
2426  // - LSR currently only supports LoopSimplify-form loops. Indvars'
2427  // canonicalization can be a pessimization without LSR to "clean up"
2428  // afterwards.
2429  // - We depend on having a preheader; in particular,
2430  // Loop::getCanonicalInductionVariable only supports loops with preheaders,
2431  // and we're in trouble if we can't find the induction variable even when
2432  // we've manually inserted one.
2433  // - LFTR relies on having a single backedge.
2434  if (!L->isLoopSimplifyForm())
2435  return false;
2436 
2437  // If there are any floating-point recurrences, attempt to
2438  // transform them to use integer recurrences.
2439  rewriteNonIntegerIVs(L);
2440 
2441  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2442 
2443  // Create a rewriter object which we'll use to transform the code with.
2444  SCEVExpander Rewriter(*SE, DL, "indvars");
2445 #ifndef NDEBUG
2446  Rewriter.setDebugType(DEBUG_TYPE);
2447 #endif
2448 
2449  // Eliminate redundant IV users.
2450  //
2451  // Simplification works best when run before other consumers of SCEV. We
2452  // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2453  // other expressions involving loop IVs have been evaluated. This helps SCEV
2454  // set no-wrap flags before normalizing sign/zero extension.
2455  Rewriter.disableCanonicalMode();
2456  simplifyAndExtend(L, Rewriter, LI);
2457 
2458  // Check to see if this loop has a computable loop-invariant execution count.
2459  // If so, this means that we can compute the final value of any expressions
2460  // that are recurrent in the loop, and substitute the exit values from the
2461  // loop into any instructions outside of the loop that use the final values of
2462  // the current expressions.
2463  //
2464  if (ReplaceExitValue != NeverRepl &&
2465  !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2466  rewriteLoopExitValues(L, Rewriter);
2467 
2468  // Eliminate redundant IV cycles.
2469  NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2470 
2471  // If we have a trip count expression, rewrite the loop's exit condition
2472  // using it. We can currently only handle loops with a single exit.
2473  if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter) &&
2474  needsLFTR(L, DT)) {
2475  PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
2476  if (IndVar) {
2477  // Check preconditions for proper SCEVExpander operation. SCEV does not
2478  // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
2479  // pass that uses the SCEVExpander must do it. This does not work well for
2480  // loop passes because SCEVExpander makes assumptions about all loops,
2481  // while LoopPassManager only forces the current loop to be simplified.
2482  //
2483  // FIXME: SCEV expansion has no way to bail out, so the caller must
2484  // explicitly check any assumptions made by SCEV. Brittle.
2485  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
2486  if (!AR || AR->getLoop()->getLoopPreheader())
2487  (void)linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
2488  Rewriter);
2489  }
2490  }
2491  // Clear the rewriter cache, because values that are in the rewriter's cache
2492  // can be deleted in the loop below, causing the AssertingVH in the cache to
2493  // trigger.
2494  Rewriter.clear();
2495 
2496  // Now that we're done iterating through lists, clean up any instructions
2497  // which are now dead.
2498  while (!DeadInsts.empty())
2499  if (Instruction *Inst =
2500  dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2502 
2503  // The Rewriter may not be used from this point on.
2504 
2505  // Loop-invariant instructions in the preheader that aren't used in the
2506  // loop may be sunk below the loop to reduce register pressure.
2507  sinkUnusedInvariants(L);
2508 
2509  // rewriteFirstIterationLoopExitValues does not rely on the computation of
2510  // trip count and therefore can further simplify exit values in addition to
2511  // rewriteLoopExitValues.
2512  rewriteFirstIterationLoopExitValues(L);
2513 
2514  // Clean up dead instructions.
2515  Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2516 
2517  // Check a post-condition.
2518  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2519  "Indvars did not preserve LCSSA!");
2520 
2521  // Verify that LFTR, and any other change have not interfered with SCEV's
2522  // ability to compute trip count.
2523 #ifndef NDEBUG
2524  if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2525  SE->forgetLoop(L);
2526  const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2527  if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2528  SE->getTypeSizeInBits(NewBECount->getType()))
2529  NewBECount = SE->getTruncateOrNoop(NewBECount,
2530  BackedgeTakenCount->getType());
2531  else
2532  BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2533  NewBECount->getType());
2534  assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
2535  }
2536 #endif
2537 
2538  return Changed;
2539 }
2540 
2543  LPMUpdater &) {
2544  Function *F = L.getHeader()->getParent();
2545  const DataLayout &DL = F->getParent()->getDataLayout();
2546 
2547  IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
2548  if (!IVS.run(&L))
2549  return PreservedAnalyses::all();
2550 
2551  auto PA = getLoopPassPreservedAnalyses();
2552  PA.preserveSet<CFGAnalyses>();
2553  return PA;
2554 }
2555 
2556 namespace {
2557 
2558 struct IndVarSimplifyLegacyPass : public LoopPass {
2559  static char ID; // Pass identification, replacement for typeid
2560 
2561  IndVarSimplifyLegacyPass() : LoopPass(ID) {
2563  }
2564 
2565  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2566  if (skipLoop(L))
2567  return false;
2568 
2569  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2570  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2571  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2572  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2573  auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
2574  auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2575  auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2576  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2577 
2578  IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
2579  return IVS.run(L);
2580  }
2581 
2582  void getAnalysisUsage(AnalysisUsage &AU) const override {
2583  AU.setPreservesCFG();
2585  }
2586 };
2587 
2588 } // end anonymous namespace
2589 
2591 
2592 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2593  "Induction Variable Simplification", false, false)
2595 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2596  "Induction Variable Simplification", false, false)
2597 
2599  return new IndVarSimplifyLegacyPass();
2600 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
bool hoistIVInc(Instruction *IncV, Instruction *InsertPos)
Utility for hoisting an IV increment.
Induction Variable Simplification
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1846
iterator_range< use_iterator > uses()
Definition: Value.h:354
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:157
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Value * getExactExistingExpansion(const SCEV *S, const Instruction *At, Loop *L)
Try to find existing LLVM IR value for S available at the point At.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT)
Perform a quick domtree based check for loop invariance assuming that V is used within the loop...
MutableArrayRef< T > makeMutableArrayRef(T &OneElt)
Construct a MutableArrayRef from a single element.
Definition: ArrayRef.h:503
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:137
bool isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:183
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:176
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:864
bool isZero() const
Return true if the expression is a constant zero.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:106
bool isHighCostExpansion(const SCEV *Expr, Loop *L, const Instruction *At=nullptr)
Return true for expressions that may incur non-trivial cost to evaluate at runtime.
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:891
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:713
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:901
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
void setDebugType(const char *s)
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
BasicBlock * getSuccessor(unsigned i) const
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1560
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
STATISTIC(NumFunctions, "Total number of functions")
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
Hexagon Common GEP
Value * getCondition() const
static ConstantRange makeAllowedICmpRegion(CmpInst::Predicate Pred, const ConstantRange &Other)
Produce the smallest range such that all values that may satisfy the given predicate with any value c...
This defines the Use class.
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
ReplaceExitVal
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:63
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1493
INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", "Induction Variable Simplification", false, false) INITIALIZE_PASS_END(IndVarSimplifyLegacyPass
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:264
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1069
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:896
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
static Value * genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Help linearFunctionTestReplace by generating a value that holds the RHS of the new loop test...
Interface for visiting interesting IV users that are recognized but not simplified by this utility...
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:134
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
bool isSigned() const
Definition: InstrTypes.h:1054
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:612
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:983
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:684
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:362
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:592
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:731
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:892
This file implements a class to represent arbitrary precision integral constant values and operations...
BlockT * getHeader() const
Definition: LoopInfo.h:100
Key
PAL metadata keys.
void findDbgValues(SmallVectorImpl< DbgValueInst *> &DbgValues, Value *V)
Finds the llvm.dbg.value intrinsics describing a value.
Definition: Local.cpp:1478
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:845
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:182
This node represents a polynomial recurrence on the trip count of the specified loop.
This instruction compares its operands according to the predicate given to the constructor.
static Instruction * getInsertPointForUses(Instruction *User, Value *Def, DominatorTree *DT, LoopInfo *LI)
Determine the insertion point for this user.
static cl::opt< bool > VerifyIndvars("verify-indvars", cl::Hidden, cl::desc("Verify the ScalarEvolution result after running indvars"))
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Examine each PHI in the given block and delete it if it is dead.
const SCEV * getSizeOfExpr(Type *IntTy, Type *AllocTy)
Return an expression for sizeof AllocTy that is type IntTy.
ConstantRange intersectWith(const ConstantRange &CR) const
Return the range that results from the intersection of this range with another range.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:151
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:439
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1556
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:301
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:142
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:127
Value * getOperand(unsigned i) const
Definition: User.h:170
void getUniqueExitBlocks(SmallVectorImpl< BasicBlock *> &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfo.cpp:394
void initializeIndVarSimplifyLegacyPassPass(PassRegistry &)
void clearInsertPoint()
Clear the current insertion point.
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:106
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt...
Definition: PatternMatch.h:177
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:218
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:282
static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE, SCEVExpander &Rewriter)
Return true if this loop&#39;s backedge taken count expression can be safely and cheaply expanded into an...
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Conditional or Unconditional Branch instruction.
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value *> &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:630
Value * getIncomingValueForBlock(const BasicBlock *BB) const
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
const Instruction & front() const
Definition: BasicBlock.h:276
#define H(x, y, z)
Definition: MD5.cpp:57
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:536
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:490
Represent the analysis usage information of a pass.
This file declares a class to represent arbitrary precision floating point values and provide a varie...
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:915
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:885
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl...
Definition: Operator.h:67
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
op_range operands()
Definition: User.h:238
self_iterator getIterator()
Definition: ilist_node.h:82
Class to represent integer types.
Definition: DerivedTypes.h:40
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:76
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:35
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1382
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:429
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
static unsigned getIncomingValueNumForOperand(unsigned i)
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static ICmpInst * getLoopTest(Loop *L)
Return the compare guarding the loop latch, or NULL for unrecognized tests.
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:900
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1552
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:57
signed greater than
Definition: InstrTypes.h:912
#define DEBUG_TYPE
const APFloat & getValueAPF() const
Definition: Constants.h:299
Value * CreateGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1374
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:889
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
unsigned getSCEVType() const
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:192
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
Type * getType() const
Return the LLVM type of this SCEV expression.
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:301
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty)
static ValueAsMetadata * get(Value *V)
Definition: Metadata.cpp:349
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:899
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:244
Module.h This file contains the declarations for the Module class.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:64
Provides information about what library functions are available for the current target.
This class represents a range of values.
Definition: ConstantRange.h:47
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
Definition: DataLayout.h:243
signed less than
Definition: InstrTypes.h:914
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:611
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT)
Return the loop header phi IFF IncV adds a loop invariant value to the phi.
bool isConditional() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:238
static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI)
This IV user cannot be widen.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:772
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
signed less or equal
Definition: InstrTypes.h:915
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class for arbitrary precision integers.
Definition: APInt.h:69
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:399
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property...
Definition: Operator.h:96
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
This class uses information about analyze scalars to rewrite expressions in canonical form...
Represents analyses that only rely on functions&#39; control flow.
Definition: PassManager.h:114
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:523
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:605
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
Virtual Register Rewriter
Definition: VirtRegMap.cpp:222
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:959
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:192
void emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:653
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:62
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:445
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:149
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
user_iterator_impl< User > user_iterator
Definition: Value.h:368
bool mayReadFromMemory() const
Return true if this instruction may read memory.
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:1226
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:893
static bool needsLFTR(Loop *L, DominatorTree *DT)
linearFunctionTestReplace policy.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:320
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>()) const
This is an approximation of reciprocal throughput of a math/logic op.
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:897
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetTransformInfo *TTI=nullptr)
replace congruent phis with their most canonical representative.
const SCEV * getBackedgeTakenCount(const Loop *L)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
This class represents a cast from signed integer to floating point.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:375
bool isOne() const
Return true if the expression is a constant one.
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:888
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:346
LLVM Value Representation.
Definition: Value.h:73
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:898
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond)
Return true if this IV has any uses other than the (soon to be rewritten) loop exit test...
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:86
void disableCanonicalMode()
Disable the behavior of expanding expressions in canonical form rather than in a more literal form...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:547
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:412
A container for analyses that lazily runs them and caches their results.
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
This pass exposes codegen information to IR-level passes.
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
This header defines various interfaces for pass management in LLVM.
void setIncomingValue(unsigned i, Value *V)
#define LLVM_DEBUG(X)
Definition: Debug.h:119
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
static cl::opt< bool > DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization"))
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:890
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:50
bool use_empty() const
Definition: Value.h:322
Pass * createIndVarSimplifyPass()
signed greater or equal
Definition: InstrTypes.h:913
IntegerType * Int32Ty
const BasicBlock * getParent() const
Definition: Instruction.h:67
This class represents a constant integer value.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:974
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property...
Definition: Operator.h:90
user_iterator user_end()
Definition: Value.h:383
static PHINode * FindLoopCounter(Loop *L, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Find an affine IV in canonical form.