LLVM  6.0.0svn
IndVarSimplify.cpp
Go to the documentation of this file.
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This transformation analyzes and transforms the induction variables (and
11 // computations derived from them) into simpler forms suitable for subsequent
12 // analysis and transformation.
13 //
14 // If the trip count of a loop is computable, this pass also makes the following
15 // changes:
16 // 1. The exit condition for the loop is canonicalized to compare the
17 // induction value against the exit value. This turns loops like:
18 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
19 // 2. Any use outside of the loop of an expression derived from the indvar
20 // is changed to compute the derived value outside of the loop, eliminating
21 // the dependence on the exit value of the induction variable. If the only
22 // purpose of the loop is to compute the exit value of some derived
23 // expression, this transformation will make the loop dead.
24 //
25 //===----------------------------------------------------------------------===//
26 
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/Statistic.h"
31 #include "llvm/Analysis/LoopInfo.h"
32 #include "llvm/Analysis/LoopPass.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/CFG.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DataLayout.h"
41 #include "llvm/IR/Dominators.h"
42 #include "llvm/IR/Instructions.h"
43 #include "llvm/IR/IntrinsicInst.h"
44 #include "llvm/IR/LLVMContext.h"
45 #include "llvm/IR/PatternMatch.h"
46 #include "llvm/IR/Type.h"
48 #include "llvm/Support/Debug.h"
50 #include "llvm/Transforms/Scalar.h"
56 using namespace llvm;
57 
58 #define DEBUG_TYPE "indvars"
59 
60 STATISTIC(NumWidened , "Number of indvars widened");
61 STATISTIC(NumReplaced , "Number of exit values replaced");
62 STATISTIC(NumLFTR , "Number of loop exit tests replaced");
63 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
64 STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
65 
66 // Trip count verification can be enabled by default under NDEBUG if we
67 // implement a strong expression equivalence checker in SCEV. Until then, we
68 // use the verify-indvars flag, which may assert in some cases.
70  "verify-indvars", cl::Hidden,
71  cl::desc("Verify the ScalarEvolution result after running indvars"));
72 
74 
76  "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
77  cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
78  cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
79  clEnumValN(OnlyCheapRepl, "cheap",
80  "only replace exit value when the cost is cheap"),
81  clEnumValN(AlwaysRepl, "always",
82  "always replace exit value whenever possible")));
83 
85  "indvars-post-increment-ranges", cl::Hidden,
86  cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
87  cl::init(true));
88 
89 static cl::opt<bool>
90 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
91  cl::desc("Disable Linear Function Test Replace optimization"));
92 
93 namespace {
94 struct RewritePhi;
95 
96 class IndVarSimplify {
97  LoopInfo *LI;
98  ScalarEvolution *SE;
99  DominatorTree *DT;
100  const DataLayout &DL;
101  TargetLibraryInfo *TLI;
102  const TargetTransformInfo *TTI;
103 
105  bool Changed = false;
106 
107  bool isValidRewrite(Value *FromVal, Value *ToVal);
108 
109  void handleFloatingPointIV(Loop *L, PHINode *PH);
110  void rewriteNonIntegerIVs(Loop *L);
111 
112  void simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
113 
114  bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
115  void rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
116  void rewriteFirstIterationLoopExitValues(Loop *L);
117 
118  Value *linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
119  PHINode *IndVar, SCEVExpander &Rewriter);
120 
121  void sinkUnusedInvariants(Loop *L);
122 
123  Value *expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
124  Instruction *InsertPt, Type *Ty);
125 
126 public:
127  IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
128  const DataLayout &DL, TargetLibraryInfo *TLI,
129  TargetTransformInfo *TTI)
130  : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
131 
132  bool run(Loop *L);
133 };
134 }
135 
136 /// Return true if the SCEV expansion generated by the rewriter can replace the
137 /// original value. SCEV guarantees that it produces the same value, but the way
138 /// it is produced may be illegal IR. Ideally, this function will only be
139 /// called for verification.
140 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
141  // If an SCEV expression subsumed multiple pointers, its expansion could
142  // reassociate the GEP changing the base pointer. This is illegal because the
143  // final address produced by a GEP chain must be inbounds relative to its
144  // underlying object. Otherwise basic alias analysis, among other things,
145  // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
146  // producing an expression involving multiple pointers. Until then, we must
147  // bail out here.
148  //
149  // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
150  // because it understands lcssa phis while SCEV does not.
151  Value *FromPtr = FromVal;
152  Value *ToPtr = ToVal;
153  if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
154  FromPtr = GEP->getPointerOperand();
155  }
156  if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
157  ToPtr = GEP->getPointerOperand();
158  }
159  if (FromPtr != FromVal || ToPtr != ToVal) {
160  // Quickly check the common case
161  if (FromPtr == ToPtr)
162  return true;
163 
164  // SCEV may have rewritten an expression that produces the GEP's pointer
165  // operand. That's ok as long as the pointer operand has the same base
166  // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
167  // base of a recurrence. This handles the case in which SCEV expansion
168  // converts a pointer type recurrence into a nonrecurrent pointer base
169  // indexed by an integer recurrence.
170 
171  // If the GEP base pointer is a vector of pointers, abort.
172  if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
173  return false;
174 
175  const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
176  const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
177  if (FromBase == ToBase)
178  return true;
179 
180  DEBUG(dbgs() << "INDVARS: GEP rewrite bail out "
181  << *FromBase << " != " << *ToBase << "\n");
182 
183  return false;
184  }
185  return true;
186 }
187 
188 /// Determine the insertion point for this user. By default, insert immediately
189 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
190 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
191 /// common dominator for the incoming blocks.
193  DominatorTree *DT, LoopInfo *LI) {
194  PHINode *PHI = dyn_cast<PHINode>(User);
195  if (!PHI)
196  return User;
197 
198  Instruction *InsertPt = nullptr;
199  for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
200  if (PHI->getIncomingValue(i) != Def)
201  continue;
202 
203  BasicBlock *InsertBB = PHI->getIncomingBlock(i);
204  if (!InsertPt) {
205  InsertPt = InsertBB->getTerminator();
206  continue;
207  }
208  InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
209  InsertPt = InsertBB->getTerminator();
210  }
211  assert(InsertPt && "Missing phi operand");
212 
213  auto *DefI = dyn_cast<Instruction>(Def);
214  if (!DefI)
215  return InsertPt;
216 
217  assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
218 
219  auto *L = LI->getLoopFor(DefI->getParent());
220  assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
221 
222  for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
223  if (LI->getLoopFor(DTN->getBlock()) == L)
224  return DTN->getBlock()->getTerminator();
225 
226  llvm_unreachable("DefI dominates InsertPt!");
227 }
228 
229 //===----------------------------------------------------------------------===//
230 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
231 //===----------------------------------------------------------------------===//
232 
233 /// Convert APF to an integer, if possible.
234 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
235  bool isExact = false;
236  // See if we can convert this to an int64_t
237  uint64_t UIntVal;
238  if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
239  APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
240  !isExact)
241  return false;
242  IntVal = UIntVal;
243  return true;
244 }
245 
246 /// If the loop has floating induction variable then insert corresponding
247 /// integer induction variable if possible.
248 /// For example,
249 /// for(double i = 0; i < 10000; ++i)
250 /// bar(i)
251 /// is converted into
252 /// for(int i = 0; i < 10000; ++i)
253 /// bar((double)i);
254 ///
255 void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
256  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
257  unsigned BackEdge = IncomingEdge^1;
258 
259  // Check incoming value.
260  auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
261 
262  int64_t InitValue;
263  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
264  return;
265 
266  // Check IV increment. Reject this PN if increment operation is not
267  // an add or increment value can not be represented by an integer.
268  auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
269  if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return;
270 
271  // If this is not an add of the PHI with a constantfp, or if the constant fp
272  // is not an integer, bail out.
273  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
274  int64_t IncValue;
275  if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
276  !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
277  return;
278 
279  // Check Incr uses. One user is PN and the other user is an exit condition
280  // used by the conditional terminator.
281  Value::user_iterator IncrUse = Incr->user_begin();
282  Instruction *U1 = cast<Instruction>(*IncrUse++);
283  if (IncrUse == Incr->user_end()) return;
284  Instruction *U2 = cast<Instruction>(*IncrUse++);
285  if (IncrUse != Incr->user_end()) return;
286 
287  // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
288  // only used by a branch, we can't transform it.
290  if (!Compare)
291  Compare = dyn_cast<FCmpInst>(U2);
292  if (!Compare || !Compare->hasOneUse() ||
293  !isa<BranchInst>(Compare->user_back()))
294  return;
295 
296  BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
297 
298  // We need to verify that the branch actually controls the iteration count
299  // of the loop. If not, the new IV can overflow and no one will notice.
300  // The branch block must be in the loop and one of the successors must be out
301  // of the loop.
302  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
303  if (!L->contains(TheBr->getParent()) ||
304  (L->contains(TheBr->getSuccessor(0)) &&
305  L->contains(TheBr->getSuccessor(1))))
306  return;
307 
308 
309  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
310  // transform it.
311  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
312  int64_t ExitValue;
313  if (ExitValueVal == nullptr ||
314  !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
315  return;
316 
317  // Find new predicate for integer comparison.
319  switch (Compare->getPredicate()) {
320  default: return; // Unknown comparison.
321  case CmpInst::FCMP_OEQ:
322  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
323  case CmpInst::FCMP_ONE:
324  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
325  case CmpInst::FCMP_OGT:
326  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
327  case CmpInst::FCMP_OGE:
328  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
329  case CmpInst::FCMP_OLT:
330  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
331  case CmpInst::FCMP_OLE:
332  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
333  }
334 
335  // We convert the floating point induction variable to a signed i32 value if
336  // we can. This is only safe if the comparison will not overflow in a way
337  // that won't be trapped by the integer equivalent operations. Check for this
338  // now.
339  // TODO: We could use i64 if it is native and the range requires it.
340 
341  // The start/stride/exit values must all fit in signed i32.
342  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
343  return;
344 
345  // If not actually striding (add x, 0.0), avoid touching the code.
346  if (IncValue == 0)
347  return;
348 
349  // Positive and negative strides have different safety conditions.
350  if (IncValue > 0) {
351  // If we have a positive stride, we require the init to be less than the
352  // exit value.
353  if (InitValue >= ExitValue)
354  return;
355 
356  uint32_t Range = uint32_t(ExitValue-InitValue);
357  // Check for infinite loop, either:
358  // while (i <= Exit) or until (i > Exit)
359  if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
360  if (++Range == 0) return; // Range overflows.
361  }
362 
363  unsigned Leftover = Range % uint32_t(IncValue);
364 
365  // If this is an equality comparison, we require that the strided value
366  // exactly land on the exit value, otherwise the IV condition will wrap
367  // around and do things the fp IV wouldn't.
368  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
369  Leftover != 0)
370  return;
371 
372  // If the stride would wrap around the i32 before exiting, we can't
373  // transform the IV.
374  if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
375  return;
376 
377  } else {
378  // If we have a negative stride, we require the init to be greater than the
379  // exit value.
380  if (InitValue <= ExitValue)
381  return;
382 
383  uint32_t Range = uint32_t(InitValue-ExitValue);
384  // Check for infinite loop, either:
385  // while (i >= Exit) or until (i < Exit)
386  if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
387  if (++Range == 0) return; // Range overflows.
388  }
389 
390  unsigned Leftover = Range % uint32_t(-IncValue);
391 
392  // If this is an equality comparison, we require that the strided value
393  // exactly land on the exit value, otherwise the IV condition will wrap
394  // around and do things the fp IV wouldn't.
395  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
396  Leftover != 0)
397  return;
398 
399  // If the stride would wrap around the i32 before exiting, we can't
400  // transform the IV.
401  if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
402  return;
403  }
404 
406 
407  // Insert new integer induction variable.
408  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
409  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
410  PN->getIncomingBlock(IncomingEdge));
411 
412  Value *NewAdd =
413  BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
414  Incr->getName()+".int", Incr);
415  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
416 
417  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
418  ConstantInt::get(Int32Ty, ExitValue),
419  Compare->getName());
420 
421  // In the following deletions, PN may become dead and may be deleted.
422  // Use a WeakTrackingVH to observe whether this happens.
423  WeakTrackingVH WeakPH = PN;
424 
425  // Delete the old floating point exit comparison. The branch starts using the
426  // new comparison.
427  NewCompare->takeName(Compare);
428  Compare->replaceAllUsesWith(NewCompare);
430 
431  // Delete the old floating point increment.
432  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
434 
435  // If the FP induction variable still has uses, this is because something else
436  // in the loop uses its value. In order to canonicalize the induction
437  // variable, we chose to eliminate the IV and rewrite it in terms of an
438  // int->fp cast.
439  //
440  // We give preference to sitofp over uitofp because it is faster on most
441  // platforms.
442  if (WeakPH) {
443  Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
444  &*PN->getParent()->getFirstInsertionPt());
445  PN->replaceAllUsesWith(Conv);
447  }
448  Changed = true;
449 }
450 
451 void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
452  // First step. Check to see if there are any floating-point recurrences.
453  // If there are, change them into integer recurrences, permitting analysis by
454  // the SCEV routines.
455  //
456  BasicBlock *Header = L->getHeader();
457 
459  for (BasicBlock::iterator I = Header->begin();
460  PHINode *PN = dyn_cast<PHINode>(I); ++I)
461  PHIs.push_back(PN);
462 
463  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
464  if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
465  handleFloatingPointIV(L, PN);
466 
467  // If the loop previously had floating-point IV, ScalarEvolution
468  // may not have been able to compute a trip count. Now that we've done some
469  // re-writing, the trip count may be computable.
470  if (Changed)
471  SE->forgetLoop(L);
472 }
473 
474 namespace {
475 // Collect information about PHI nodes which can be transformed in
476 // rewriteLoopExitValues.
477 struct RewritePhi {
478  PHINode *PN;
479  unsigned Ith; // Ith incoming value.
480  Value *Val; // Exit value after expansion.
481  bool HighCost; // High Cost when expansion.
482 
483  RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
484  : PN(P), Ith(I), Val(V), HighCost(H) {}
485 };
486 }
487 
488 Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
489  Loop *L, Instruction *InsertPt,
490  Type *ResultTy) {
491  // Before expanding S into an expensive LLVM expression, see if we can use an
492  // already existing value as the expansion for S.
493  if (Value *ExistingValue = Rewriter.getExactExistingExpansion(S, InsertPt, L))
494  if (ExistingValue->getType() == ResultTy)
495  return ExistingValue;
496 
497  // We didn't find anything, fall back to using SCEVExpander.
498  return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
499 }
500 
501 //===----------------------------------------------------------------------===//
502 // rewriteLoopExitValues - Optimize IV users outside the loop.
503 // As a side effect, reduces the amount of IV processing within the loop.
504 //===----------------------------------------------------------------------===//
505 
506 /// Check to see if this loop has a computable loop-invariant execution count.
507 /// If so, this means that we can compute the final value of any expressions
508 /// that are recurrent in the loop, and substitute the exit values from the loop
509 /// into any instructions outside of the loop that use the final values of the
510 /// current expressions.
511 ///
512 /// This is mostly redundant with the regular IndVarSimplify activities that
513 /// happen later, except that it's more powerful in some cases, because it's
514 /// able to brute-force evaluate arbitrary instructions as long as they have
515 /// constant operands at the beginning of the loop.
516 void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
517  // Check a pre-condition.
518  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
519  "Indvars did not preserve LCSSA!");
520 
521  SmallVector<BasicBlock*, 8> ExitBlocks;
522  L->getUniqueExitBlocks(ExitBlocks);
523 
524  SmallVector<RewritePhi, 8> RewritePhiSet;
525  // Find all values that are computed inside the loop, but used outside of it.
526  // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
527  // the exit blocks of the loop to find them.
528  for (BasicBlock *ExitBB : ExitBlocks) {
529  // If there are no PHI nodes in this exit block, then no values defined
530  // inside the loop are used on this path, skip it.
531  PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
532  if (!PN) continue;
533 
534  unsigned NumPreds = PN->getNumIncomingValues();
535 
536  // Iterate over all of the PHI nodes.
537  BasicBlock::iterator BBI = ExitBB->begin();
538  while ((PN = dyn_cast<PHINode>(BBI++))) {
539  if (PN->use_empty())
540  continue; // dead use, don't replace it
541 
542  if (!SE->isSCEVable(PN->getType()))
543  continue;
544 
545  // It's necessary to tell ScalarEvolution about this explicitly so that
546  // it can walk the def-use list and forget all SCEVs, as it may not be
547  // watching the PHI itself. Once the new exit value is in place, there
548  // may not be a def-use connection between the loop and every instruction
549  // which got a SCEVAddRecExpr for that loop.
550  SE->forgetValue(PN);
551 
552  // Iterate over all of the values in all the PHI nodes.
553  for (unsigned i = 0; i != NumPreds; ++i) {
554  // If the value being merged in is not integer or is not defined
555  // in the loop, skip it.
556  Value *InVal = PN->getIncomingValue(i);
557  if (!isa<Instruction>(InVal))
558  continue;
559 
560  // If this pred is for a subloop, not L itself, skip it.
561  if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
562  continue; // The Block is in a subloop, skip it.
563 
564  // Check that InVal is defined in the loop.
565  Instruction *Inst = cast<Instruction>(InVal);
566  if (!L->contains(Inst))
567  continue;
568 
569  // Okay, this instruction has a user outside of the current loop
570  // and varies predictably *inside* the loop. Evaluate the value it
571  // contains when the loop exits, if possible.
572  const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
573  if (!SE->isLoopInvariant(ExitValue, L) ||
574  !isSafeToExpand(ExitValue, *SE))
575  continue;
576 
577  // Computing the value outside of the loop brings no benefit if :
578  // - it is definitely used inside the loop in a way which can not be
579  // optimized away.
580  // - no use outside of the loop can take advantage of hoisting the
581  // computation out of the loop
582  if (ExitValue->getSCEVType()>=scMulExpr) {
583  unsigned NumHardInternalUses = 0;
584  unsigned NumSoftExternalUses = 0;
585  unsigned NumUses = 0;
586  for (auto IB = Inst->user_begin(), IE = Inst->user_end();
587  IB != IE && NumUses <= 6; ++IB) {
588  Instruction *UseInstr = cast<Instruction>(*IB);
589  unsigned Opc = UseInstr->getOpcode();
590  NumUses++;
591  if (L->contains(UseInstr)) {
592  if (Opc == Instruction::Call || Opc == Instruction::Ret)
593  NumHardInternalUses++;
594  } else {
595  if (Opc == Instruction::PHI) {
596  // Do not count the Phi as a use. LCSSA may have inserted
597  // plenty of trivial ones.
598  NumUses--;
599  for (auto PB = UseInstr->user_begin(),
600  PE = UseInstr->user_end();
601  PB != PE && NumUses <= 6; ++PB, ++NumUses) {
602  unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
603  if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
604  NumSoftExternalUses++;
605  }
606  continue;
607  }
608  if (Opc != Instruction::Call && Opc != Instruction::Ret)
609  NumSoftExternalUses++;
610  }
611  }
612  if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses)
613  continue;
614  }
615 
616  bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
617  Value *ExitVal =
618  expandSCEVIfNeeded(Rewriter, ExitValue, L, Inst, PN->getType());
619 
620  DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
621  << " LoopVal = " << *Inst << "\n");
622 
623  if (!isValidRewrite(Inst, ExitVal)) {
624  DeadInsts.push_back(ExitVal);
625  continue;
626  }
627 
628  // Collect all the candidate PHINodes to be rewritten.
629  RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
630  }
631  }
632  }
633 
634  bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
635 
636  // Transformation.
637  for (const RewritePhi &Phi : RewritePhiSet) {
638  PHINode *PN = Phi.PN;
639  Value *ExitVal = Phi.Val;
640 
641  // Only do the rewrite when the ExitValue can be expanded cheaply.
642  // If LoopCanBeDel is true, rewrite exit value aggressively.
643  if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
644  DeadInsts.push_back(ExitVal);
645  continue;
646  }
647 
648  Changed = true;
649  ++NumReplaced;
650  Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
651  PN->setIncomingValue(Phi.Ith, ExitVal);
652 
653  // If this instruction is dead now, delete it. Don't do it now to avoid
654  // invalidating iterators.
655  if (isInstructionTriviallyDead(Inst, TLI))
656  DeadInsts.push_back(Inst);
657 
658  // Replace PN with ExitVal if that is legal and does not break LCSSA.
659  if (PN->getNumIncomingValues() == 1 &&
660  LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
661  PN->replaceAllUsesWith(ExitVal);
662  PN->eraseFromParent();
663  }
664  }
665 
666  // The insertion point instruction may have been deleted; clear it out
667  // so that the rewriter doesn't trip over it later.
668  Rewriter.clearInsertPoint();
669 }
670 
671 //===---------------------------------------------------------------------===//
672 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
673 // they will exit at the first iteration.
674 //===---------------------------------------------------------------------===//
675 
676 /// Check to see if this loop has loop invariant conditions which lead to loop
677 /// exits. If so, we know that if the exit path is taken, it is at the first
678 /// loop iteration. This lets us predict exit values of PHI nodes that live in
679 /// loop header.
680 void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
681  // Verify the input to the pass is already in LCSSA form.
682  assert(L->isLCSSAForm(*DT));
683 
684  SmallVector<BasicBlock *, 8> ExitBlocks;
685  L->getUniqueExitBlocks(ExitBlocks);
686  auto *LoopHeader = L->getHeader();
687  assert(LoopHeader && "Invalid loop");
688 
689  for (auto *ExitBB : ExitBlocks) {
690  BasicBlock::iterator BBI = ExitBB->begin();
691  // If there are no more PHI nodes in this exit block, then no more
692  // values defined inside the loop are used on this path.
693  while (auto *PN = dyn_cast<PHINode>(BBI++)) {
694  for (unsigned IncomingValIdx = 0, E = PN->getNumIncomingValues();
695  IncomingValIdx != E; ++IncomingValIdx) {
696  auto *IncomingBB = PN->getIncomingBlock(IncomingValIdx);
697 
698  // We currently only support loop exits from loop header. If the
699  // incoming block is not loop header, we need to recursively check
700  // all conditions starting from loop header are loop invariants.
701  // Additional support might be added in the future.
702  if (IncomingBB != LoopHeader)
703  continue;
704 
705  // Get condition that leads to the exit path.
706  auto *TermInst = IncomingBB->getTerminator();
707 
708  Value *Cond = nullptr;
709  if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
710  // Must be a conditional branch, otherwise the block
711  // should not be in the loop.
712  Cond = BI->getCondition();
713  } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
714  Cond = SI->getCondition();
715  else
716  continue;
717 
718  if (!L->isLoopInvariant(Cond))
719  continue;
720 
721  auto *ExitVal =
722  dyn_cast<PHINode>(PN->getIncomingValue(IncomingValIdx));
723 
724  // Only deal with PHIs.
725  if (!ExitVal)
726  continue;
727 
728  // If ExitVal is a PHI on the loop header, then we know its
729  // value along this exit because the exit can only be taken
730  // on the first iteration.
731  auto *LoopPreheader = L->getLoopPreheader();
732  assert(LoopPreheader && "Invalid loop");
733  int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
734  if (PreheaderIdx != -1) {
735  assert(ExitVal->getParent() == LoopHeader &&
736  "ExitVal must be in loop header");
737  PN->setIncomingValue(IncomingValIdx,
738  ExitVal->getIncomingValue(PreheaderIdx));
739  }
740  }
741  }
742  }
743 }
744 
745 /// Check whether it is possible to delete the loop after rewriting exit
746 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
747 /// aggressively.
748 bool IndVarSimplify::canLoopBeDeleted(
749  Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
750 
751  BasicBlock *Preheader = L->getLoopPreheader();
752  // If there is no preheader, the loop will not be deleted.
753  if (!Preheader)
754  return false;
755 
756  // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
757  // We obviate multiple ExitingBlocks case for simplicity.
758  // TODO: If we see testcase with multiple ExitingBlocks can be deleted
759  // after exit value rewriting, we can enhance the logic here.
760  SmallVector<BasicBlock *, 4> ExitingBlocks;
761  L->getExitingBlocks(ExitingBlocks);
762  SmallVector<BasicBlock *, 8> ExitBlocks;
763  L->getUniqueExitBlocks(ExitBlocks);
764  if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
765  return false;
766 
767  BasicBlock *ExitBlock = ExitBlocks[0];
768  BasicBlock::iterator BI = ExitBlock->begin();
769  while (PHINode *P = dyn_cast<PHINode>(BI)) {
770  Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
771 
772  // If the Incoming value of P is found in RewritePhiSet, we know it
773  // could be rewritten to use a loop invariant value in transformation
774  // phase later. Skip it in the loop invariant check below.
775  bool found = false;
776  for (const RewritePhi &Phi : RewritePhiSet) {
777  unsigned i = Phi.Ith;
778  if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
779  found = true;
780  break;
781  }
782  }
783 
784  Instruction *I;
785  if (!found && (I = dyn_cast<Instruction>(Incoming)))
786  if (!L->hasLoopInvariantOperands(I))
787  return false;
788 
789  ++BI;
790  }
791 
792  for (auto *BB : L->blocks())
793  if (any_of(*BB, [](Instruction &I) { return I.mayHaveSideEffects(); }))
794  return false;
795 
796  return true;
797 }
798 
799 //===----------------------------------------------------------------------===//
800 // IV Widening - Extend the width of an IV to cover its widest uses.
801 //===----------------------------------------------------------------------===//
802 
803 namespace {
804 // Collect information about induction variables that are used by sign/zero
805 // extend operations. This information is recorded by CollectExtend and provides
806 // the input to WidenIV.
807 struct WideIVInfo {
808  PHINode *NarrowIV = nullptr;
809  Type *WidestNativeType = nullptr; // Widest integer type created [sz]ext
810  bool IsSigned = false; // Was a sext user seen before a zext?
811 };
812 }
813 
814 /// Update information about the induction variable that is extended by this
815 /// sign or zero extend operation. This is used to determine the final width of
816 /// the IV before actually widening it.
817 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
818  const TargetTransformInfo *TTI) {
819  bool IsSigned = Cast->getOpcode() == Instruction::SExt;
820  if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
821  return;
822 
823  Type *Ty = Cast->getType();
824  uint64_t Width = SE->getTypeSizeInBits(Ty);
825  if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
826  return;
827 
828  // Check that `Cast` actually extends the induction variable (we rely on this
829  // later). This takes care of cases where `Cast` is extending a truncation of
830  // the narrow induction variable, and thus can end up being narrower than the
831  // "narrow" induction variable.
832  uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
833  if (NarrowIVWidth >= Width)
834  return;
835 
836  // Cast is either an sext or zext up to this point.
837  // We should not widen an indvar if arithmetics on the wider indvar are more
838  // expensive than those on the narrower indvar. We check only the cost of ADD
839  // because at least an ADD is required to increment the induction variable. We
840  // could compute more comprehensively the cost of all instructions on the
841  // induction variable when necessary.
842  if (TTI &&
845  Cast->getOperand(0)->getType())) {
846  return;
847  }
848 
849  if (!WI.WidestNativeType) {
850  WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
851  WI.IsSigned = IsSigned;
852  return;
853  }
854 
855  // We extend the IV to satisfy the sign of its first user, arbitrarily.
856  if (WI.IsSigned != IsSigned)
857  return;
858 
859  if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
860  WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
861 }
862 
863 namespace {
864 
865 /// Record a link in the Narrow IV def-use chain along with the WideIV that
866 /// computes the same value as the Narrow IV def. This avoids caching Use*
867 /// pointers.
868 struct NarrowIVDefUse {
869  Instruction *NarrowDef = nullptr;
870  Instruction *NarrowUse = nullptr;
871  Instruction *WideDef = nullptr;
872 
873  // True if the narrow def is never negative. Tracking this information lets
874  // us use a sign extension instead of a zero extension or vice versa, when
875  // profitable and legal.
876  bool NeverNegative = false;
877 
878  NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
879  bool NeverNegative)
880  : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
881  NeverNegative(NeverNegative) {}
882 };
883 
884 /// The goal of this transform is to remove sign and zero extends without
885 /// creating any new induction variables. To do this, it creates a new phi of
886 /// the wider type and redirects all users, either removing extends or inserting
887 /// truncs whenever we stop propagating the type.
888 ///
889 class WidenIV {
890  // Parameters
891  PHINode *OrigPhi;
892  Type *WideType;
893 
894  // Context
895  LoopInfo *LI;
896  Loop *L;
897  ScalarEvolution *SE;
898  DominatorTree *DT;
899 
900  // Does the module have any calls to the llvm.experimental.guard intrinsic
901  // at all? If not we can avoid scanning instructions looking for guards.
902  bool HasGuards;
903 
904  // Result
905  PHINode *WidePhi;
906  Instruction *WideInc;
907  const SCEV *WideIncExpr;
909 
911  SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
912 
913  enum ExtendKind { ZeroExtended, SignExtended, Unknown };
914  // A map tracking the kind of extension used to widen each narrow IV
915  // and narrow IV user.
916  // Key: pointer to a narrow IV or IV user.
917  // Value: the kind of extension used to widen this Instruction.
918  DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
919 
920  typedef std::pair<AssertingVH<Value>, AssertingVH<Instruction>> DefUserPair;
921  // A map with control-dependent ranges for post increment IV uses. The key is
922  // a pair of IV def and a use of this def denoting the context. The value is
923  // a ConstantRange representing possible values of the def at the given
924  // context.
925  DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
926 
927  Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
928  Instruction *UseI) {
929  DefUserPair Key(Def, UseI);
930  auto It = PostIncRangeInfos.find(Key);
931  return It == PostIncRangeInfos.end()
933  : Optional<ConstantRange>(It->second);
934  }
935 
936  void calculatePostIncRanges(PHINode *OrigPhi);
937  void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
938  void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
939  DefUserPair Key(Def, UseI);
940  auto It = PostIncRangeInfos.find(Key);
941  if (It == PostIncRangeInfos.end())
942  PostIncRangeInfos.insert({Key, R});
943  else
944  It->second = R.intersectWith(It->second);
945  }
946 
947 public:
948  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
950  bool HasGuards)
951  : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
952  L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
953  HasGuards(HasGuards), WidePhi(nullptr), WideInc(nullptr),
954  WideIncExpr(nullptr), DeadInsts(DI) {
955  assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
956  ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
957  }
958 
959  PHINode *createWideIV(SCEVExpander &Rewriter);
960 
961 protected:
962  Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
963  Instruction *Use);
964 
965  Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
966  Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
967  const SCEVAddRecExpr *WideAR);
968  Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
969 
970  ExtendKind getExtendKind(Instruction *I);
971 
972  typedef std::pair<const SCEVAddRecExpr *, ExtendKind> WidenedRecTy;
973 
974  WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
975 
976  WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
977 
978  const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
979  unsigned OpCode) const;
980 
981  Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
982 
983  bool widenLoopCompare(NarrowIVDefUse DU);
984 
985  void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
986 };
987 } // anonymous namespace
988 
989 /// Perform a quick domtree based check for loop invariance assuming that V is
990 /// used within the loop. LoopInfo::isLoopInvariant() seems gratuitous for this
991 /// purpose.
992 static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
993  Instruction *Inst = dyn_cast<Instruction>(V);
994  if (!Inst)
995  return true;
996 
997  return DT->properlyDominates(Inst->getParent(), L->getHeader());
998 }
999 
1000 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1001  bool IsSigned, Instruction *Use) {
1002  // Set the debug location and conservative insertion point.
1003  IRBuilder<> Builder(Use);
1004  // Hoist the insertion point into loop preheaders as far as possible.
1005  for (const Loop *L = LI->getLoopFor(Use->getParent());
1006  L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
1007  L = L->getParentLoop())
1009 
1010  return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1011  Builder.CreateZExt(NarrowOper, WideType);
1012 }
1013 
1014 /// Instantiate a wide operation to replace a narrow operation. This only needs
1015 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1016 /// 0 for any operation we decide not to clone.
1017 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1018  const SCEVAddRecExpr *WideAR) {
1019  unsigned Opcode = DU.NarrowUse->getOpcode();
1020  switch (Opcode) {
1021  default:
1022  return nullptr;
1023  case Instruction::Add:
1024  case Instruction::Mul:
1025  case Instruction::UDiv:
1026  case Instruction::Sub:
1027  return cloneArithmeticIVUser(DU, WideAR);
1028 
1029  case Instruction::And:
1030  case Instruction::Or:
1031  case Instruction::Xor:
1032  case Instruction::Shl:
1033  case Instruction::LShr:
1034  case Instruction::AShr:
1035  return cloneBitwiseIVUser(DU);
1036  }
1037 }
1038 
1039 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1040  Instruction *NarrowUse = DU.NarrowUse;
1041  Instruction *NarrowDef = DU.NarrowDef;
1042  Instruction *WideDef = DU.WideDef;
1043 
1044  DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1045 
1046  // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1047  // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1048  // invariant and will be folded or hoisted. If it actually comes from a
1049  // widened IV, it should be removed during a future call to widenIVUse.
1050  bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1051  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1052  ? WideDef
1053  : createExtendInst(NarrowUse->getOperand(0), WideType,
1054  IsSigned, NarrowUse);
1055  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1056  ? WideDef
1057  : createExtendInst(NarrowUse->getOperand(1), WideType,
1058  IsSigned, NarrowUse);
1059 
1060  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1061  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1062  NarrowBO->getName());
1063  IRBuilder<> Builder(NarrowUse);
1064  Builder.Insert(WideBO);
1065  WideBO->copyIRFlags(NarrowBO);
1066  return WideBO;
1067 }
1068 
1069 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1070  const SCEVAddRecExpr *WideAR) {
1071  Instruction *NarrowUse = DU.NarrowUse;
1072  Instruction *NarrowDef = DU.NarrowDef;
1073  Instruction *WideDef = DU.WideDef;
1074 
1075  DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1076 
1077  unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1078 
1079  // We're trying to find X such that
1080  //
1081  // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1082  //
1083  // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1084  // and check using SCEV if any of them are correct.
1085 
1086  // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1087  // correct solution to X.
1088  auto GuessNonIVOperand = [&](bool SignExt) {
1089  const SCEV *WideLHS;
1090  const SCEV *WideRHS;
1091 
1092  auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1093  if (SignExt)
1094  return SE->getSignExtendExpr(S, Ty);
1095  return SE->getZeroExtendExpr(S, Ty);
1096  };
1097 
1098  if (IVOpIdx == 0) {
1099  WideLHS = SE->getSCEV(WideDef);
1100  const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1101  WideRHS = GetExtend(NarrowRHS, WideType);
1102  } else {
1103  const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1104  WideLHS = GetExtend(NarrowLHS, WideType);
1105  WideRHS = SE->getSCEV(WideDef);
1106  }
1107 
1108  // WideUse is "WideDef `op.wide` X" as described in the comment.
1109  const SCEV *WideUse = nullptr;
1110 
1111  switch (NarrowUse->getOpcode()) {
1112  default:
1113  llvm_unreachable("No other possibility!");
1114 
1115  case Instruction::Add:
1116  WideUse = SE->getAddExpr(WideLHS, WideRHS);
1117  break;
1118 
1119  case Instruction::Mul:
1120  WideUse = SE->getMulExpr(WideLHS, WideRHS);
1121  break;
1122 
1123  case Instruction::UDiv:
1124  WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1125  break;
1126 
1127  case Instruction::Sub:
1128  WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1129  break;
1130  }
1131 
1132  return WideUse == WideAR;
1133  };
1134 
1135  bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1136  if (!GuessNonIVOperand(SignExtend)) {
1137  SignExtend = !SignExtend;
1138  if (!GuessNonIVOperand(SignExtend))
1139  return nullptr;
1140  }
1141 
1142  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1143  ? WideDef
1144  : createExtendInst(NarrowUse->getOperand(0), WideType,
1145  SignExtend, NarrowUse);
1146  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1147  ? WideDef
1148  : createExtendInst(NarrowUse->getOperand(1), WideType,
1149  SignExtend, NarrowUse);
1150 
1151  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1152  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1153  NarrowBO->getName());
1154 
1155  IRBuilder<> Builder(NarrowUse);
1156  Builder.Insert(WideBO);
1157  WideBO->copyIRFlags(NarrowBO);
1158  return WideBO;
1159 }
1160 
1161 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1162  auto It = ExtendKindMap.find(I);
1163  assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1164  return It->second;
1165 }
1166 
1167 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1168  unsigned OpCode) const {
1169  if (OpCode == Instruction::Add)
1170  return SE->getAddExpr(LHS, RHS);
1171  if (OpCode == Instruction::Sub)
1172  return SE->getMinusSCEV(LHS, RHS);
1173  if (OpCode == Instruction::Mul)
1174  return SE->getMulExpr(LHS, RHS);
1175 
1176  llvm_unreachable("Unsupported opcode.");
1177 }
1178 
1179 /// No-wrap operations can transfer sign extension of their result to their
1180 /// operands. Generate the SCEV value for the widened operation without
1181 /// actually modifying the IR yet. If the expression after extending the
1182 /// operands is an AddRec for this loop, return the AddRec and the kind of
1183 /// extension used.
1184 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1185 
1186  // Handle the common case of add<nsw/nuw>
1187  const unsigned OpCode = DU.NarrowUse->getOpcode();
1188  // Only Add/Sub/Mul instructions supported yet.
1189  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1190  OpCode != Instruction::Mul)
1191  return {nullptr, Unknown};
1192 
1193  // One operand (NarrowDef) has already been extended to WideDef. Now determine
1194  // if extending the other will lead to a recurrence.
1195  const unsigned ExtendOperIdx =
1196  DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1197  assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1198 
1199  const SCEV *ExtendOperExpr = nullptr;
1200  const OverflowingBinaryOperator *OBO =
1201  cast<OverflowingBinaryOperator>(DU.NarrowUse);
1202  ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1203  if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1204  ExtendOperExpr = SE->getSignExtendExpr(
1205  SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1206  else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1207  ExtendOperExpr = SE->getZeroExtendExpr(
1208  SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1209  else
1210  return {nullptr, Unknown};
1211 
1212  // When creating this SCEV expr, don't apply the current operations NSW or NUW
1213  // flags. This instruction may be guarded by control flow that the no-wrap
1214  // behavior depends on. Non-control-equivalent instructions can be mapped to
1215  // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1216  // semantics to those operations.
1217  const SCEV *lhs = SE->getSCEV(DU.WideDef);
1218  const SCEV *rhs = ExtendOperExpr;
1219 
1220  // Let's swap operands to the initial order for the case of non-commutative
1221  // operations, like SUB. See PR21014.
1222  if (ExtendOperIdx == 0)
1223  std::swap(lhs, rhs);
1224  const SCEVAddRecExpr *AddRec =
1225  dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1226 
1227  if (!AddRec || AddRec->getLoop() != L)
1228  return {nullptr, Unknown};
1229 
1230  return {AddRec, ExtKind};
1231 }
1232 
1233 /// Is this instruction potentially interesting for further simplification after
1234 /// widening it's type? In other words, can the extend be safely hoisted out of
1235 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1236 /// so, return the extended recurrence and the kind of extension used. Otherwise
1237 /// return {nullptr, Unknown}.
1238 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1239  if (!SE->isSCEVable(DU.NarrowUse->getType()))
1240  return {nullptr, Unknown};
1241 
1242  const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1243  if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1244  SE->getTypeSizeInBits(WideType)) {
1245  // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1246  // index. So don't follow this use.
1247  return {nullptr, Unknown};
1248  }
1249 
1250  const SCEV *WideExpr;
1251  ExtendKind ExtKind;
1252  if (DU.NeverNegative) {
1253  WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1254  if (isa<SCEVAddRecExpr>(WideExpr))
1255  ExtKind = SignExtended;
1256  else {
1257  WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1258  ExtKind = ZeroExtended;
1259  }
1260  } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1261  WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1262  ExtKind = SignExtended;
1263  } else {
1264  WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1265  ExtKind = ZeroExtended;
1266  }
1267  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1268  if (!AddRec || AddRec->getLoop() != L)
1269  return {nullptr, Unknown};
1270  return {AddRec, ExtKind};
1271 }
1272 
1273 /// This IV user cannot be widen. Replace this use of the original narrow IV
1274 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1275 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1276  DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef
1277  << " for user " << *DU.NarrowUse << "\n");
1278  IRBuilder<> Builder(
1279  getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
1280  Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1281  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1282 }
1283 
1284 /// If the narrow use is a compare instruction, then widen the compare
1285 // (and possibly the other operand). The extend operation is hoisted into the
1286 // loop preheader as far as possible.
1287 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1288  ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1289  if (!Cmp)
1290  return false;
1291 
1292  // We can legally widen the comparison in the following two cases:
1293  //
1294  // - The signedness of the IV extension and comparison match
1295  //
1296  // - The narrow IV is always positive (and thus its sign extension is equal
1297  // to its zero extension). For instance, let's say we're zero extending
1298  // %narrow for the following use
1299  //
1300  // icmp slt i32 %narrow, %val ... (A)
1301  //
1302  // and %narrow is always positive. Then
1303  //
1304  // (A) == icmp slt i32 sext(%narrow), sext(%val)
1305  // == icmp slt i32 zext(%narrow), sext(%val)
1306  bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1307  if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1308  return false;
1309 
1310  Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1311  unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1312  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1313  assert (CastWidth <= IVWidth && "Unexpected width while widening compare.");
1314 
1315  // Widen the compare instruction.
1316  IRBuilder<> Builder(
1317  getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
1318  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1319 
1320  // Widen the other operand of the compare, if necessary.
1321  if (CastWidth < IVWidth) {
1322  Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1323  DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1324  }
1325  return true;
1326 }
1327 
1328 /// Determine whether an individual user of the narrow IV can be widened. If so,
1329 /// return the wide clone of the user.
1330 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1331  assert(ExtendKindMap.count(DU.NarrowDef) &&
1332  "Should already know the kind of extension used to widen NarrowDef");
1333 
1334  // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1335  if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1336  if (LI->getLoopFor(UsePhi->getParent()) != L) {
1337  // For LCSSA phis, sink the truncate outside the loop.
1338  // After SimplifyCFG most loop exit targets have a single predecessor.
1339  // Otherwise fall back to a truncate within the loop.
1340  if (UsePhi->getNumOperands() != 1)
1341  truncateIVUse(DU, DT, LI);
1342  else {
1343  // Widening the PHI requires us to insert a trunc. The logical place
1344  // for this trunc is in the same BB as the PHI. This is not possible if
1345  // the BB is terminated by a catchswitch.
1346  if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1347  return nullptr;
1348 
1349  PHINode *WidePhi =
1350  PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1351  UsePhi);
1352  WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1353  IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1354  Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1355  UsePhi->replaceAllUsesWith(Trunc);
1356  DeadInsts.emplace_back(UsePhi);
1357  DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi
1358  << " to " << *WidePhi << "\n");
1359  }
1360  return nullptr;
1361  }
1362  }
1363 
1364  // This narrow use can be widened by a sext if it's non-negative or its narrow
1365  // def was widended by a sext. Same for zext.
1366  auto canWidenBySExt = [&]() {
1367  return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1368  };
1369  auto canWidenByZExt = [&]() {
1370  return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1371  };
1372 
1373  // Our raison d'etre! Eliminate sign and zero extension.
1374  if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1375  (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1376  Value *NewDef = DU.WideDef;
1377  if (DU.NarrowUse->getType() != WideType) {
1378  unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1379  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1380  if (CastWidth < IVWidth) {
1381  // The cast isn't as wide as the IV, so insert a Trunc.
1382  IRBuilder<> Builder(DU.NarrowUse);
1383  NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1384  }
1385  else {
1386  // A wider extend was hidden behind a narrower one. This may induce
1387  // another round of IV widening in which the intermediate IV becomes
1388  // dead. It should be very rare.
1389  DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1390  << " not wide enough to subsume " << *DU.NarrowUse << "\n");
1391  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1392  NewDef = DU.NarrowUse;
1393  }
1394  }
1395  if (NewDef != DU.NarrowUse) {
1396  DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1397  << " replaced by " << *DU.WideDef << "\n");
1398  ++NumElimExt;
1399  DU.NarrowUse->replaceAllUsesWith(NewDef);
1400  DeadInsts.emplace_back(DU.NarrowUse);
1401  }
1402  // Now that the extend is gone, we want to expose it's uses for potential
1403  // further simplification. We don't need to directly inform SimplifyIVUsers
1404  // of the new users, because their parent IV will be processed later as a
1405  // new loop phi. If we preserved IVUsers analysis, we would also want to
1406  // push the uses of WideDef here.
1407 
1408  // No further widening is needed. The deceased [sz]ext had done it for us.
1409  return nullptr;
1410  }
1411 
1412  // Does this user itself evaluate to a recurrence after widening?
1413  WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1414  if (!WideAddRec.first)
1415  WideAddRec = getWideRecurrence(DU);
1416 
1417  assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1418  if (!WideAddRec.first) {
1419  // If use is a loop condition, try to promote the condition instead of
1420  // truncating the IV first.
1421  if (widenLoopCompare(DU))
1422  return nullptr;
1423 
1424  // This user does not evaluate to a recurrence after widening, so don't
1425  // follow it. Instead insert a Trunc to kill off the original use,
1426  // eventually isolating the original narrow IV so it can be removed.
1427  truncateIVUse(DU, DT, LI);
1428  return nullptr;
1429  }
1430  // Assume block terminators cannot evaluate to a recurrence. We can't to
1431  // insert a Trunc after a terminator if there happens to be a critical edge.
1432  assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1433  "SCEV is not expected to evaluate a block terminator");
1434 
1435  // Reuse the IV increment that SCEVExpander created as long as it dominates
1436  // NarrowUse.
1437  Instruction *WideUse = nullptr;
1438  if (WideAddRec.first == WideIncExpr &&
1439  Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1440  WideUse = WideInc;
1441  else {
1442  WideUse = cloneIVUser(DU, WideAddRec.first);
1443  if (!WideUse)
1444  return nullptr;
1445  }
1446  // Evaluation of WideAddRec ensured that the narrow expression could be
1447  // extended outside the loop without overflow. This suggests that the wide use
1448  // evaluates to the same expression as the extended narrow use, but doesn't
1449  // absolutely guarantee it. Hence the following failsafe check. In rare cases
1450  // where it fails, we simply throw away the newly created wide use.
1451  if (WideAddRec.first != SE->getSCEV(WideUse)) {
1452  DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
1453  << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first << "\n");
1454  DeadInsts.emplace_back(WideUse);
1455  return nullptr;
1456  }
1457 
1458  ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1459  // Returning WideUse pushes it on the worklist.
1460  return WideUse;
1461 }
1462 
1463 /// Add eligible users of NarrowDef to NarrowIVUsers.
1464 ///
1465 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1466  const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1467  bool NonNegativeDef =
1468  SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1469  SE->getConstant(NarrowSCEV->getType(), 0));
1470  for (User *U : NarrowDef->users()) {
1471  Instruction *NarrowUser = cast<Instruction>(U);
1472 
1473  // Handle data flow merges and bizarre phi cycles.
1474  if (!Widened.insert(NarrowUser).second)
1475  continue;
1476 
1477  bool NonNegativeUse = false;
1478  if (!NonNegativeDef) {
1479  // We might have a control-dependent range information for this context.
1480  if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1481  NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1482  }
1483 
1484  NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1485  NonNegativeDef || NonNegativeUse);
1486  }
1487 }
1488 
1489 /// Process a single induction variable. First use the SCEVExpander to create a
1490 /// wide induction variable that evaluates to the same recurrence as the
1491 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1492 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1493 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1494 ///
1495 /// It would be simpler to delete uses as they are processed, but we must avoid
1496 /// invalidating SCEV expressions.
1497 ///
1498 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1499  // Is this phi an induction variable?
1500  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1501  if (!AddRec)
1502  return nullptr;
1503 
1504  // Widen the induction variable expression.
1505  const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1506  ? SE->getSignExtendExpr(AddRec, WideType)
1507  : SE->getZeroExtendExpr(AddRec, WideType);
1508 
1509  assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1510  "Expect the new IV expression to preserve its type");
1511 
1512  // Can the IV be extended outside the loop without overflow?
1513  AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1514  if (!AddRec || AddRec->getLoop() != L)
1515  return nullptr;
1516 
1517  // An AddRec must have loop-invariant operands. Since this AddRec is
1518  // materialized by a loop header phi, the expression cannot have any post-loop
1519  // operands, so they must dominate the loop header.
1520  assert(
1521  SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1522  SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1523  "Loop header phi recurrence inputs do not dominate the loop");
1524 
1525  // Iterate over IV uses (including transitive ones) looking for IV increments
1526  // of the form 'add nsw %iv, <const>'. For each increment and each use of
1527  // the increment calculate control-dependent range information basing on
1528  // dominating conditions inside of the loop (e.g. a range check inside of the
1529  // loop). Calculated ranges are stored in PostIncRangeInfos map.
1530  //
1531  // Control-dependent range information is later used to prove that a narrow
1532  // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1533  // this on demand because when pushNarrowIVUsers needs this information some
1534  // of the dominating conditions might be already widened.
1535  if (UsePostIncrementRanges)
1536  calculatePostIncRanges(OrigPhi);
1537 
1538  // The rewriter provides a value for the desired IV expression. This may
1539  // either find an existing phi or materialize a new one. Either way, we
1540  // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1541  // of the phi-SCC dominates the loop entry.
1542  Instruction *InsertPt = &L->getHeader()->front();
1543  WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1544 
1545  // Remembering the WideIV increment generated by SCEVExpander allows
1546  // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1547  // employ a general reuse mechanism because the call above is the only call to
1548  // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1549  if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1550  WideInc =
1551  cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1552  WideIncExpr = SE->getSCEV(WideInc);
1553  // Propagate the debug location associated with the original loop increment
1554  // to the new (widened) increment.
1555  auto *OrigInc =
1556  cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1557  WideInc->setDebugLoc(OrigInc->getDebugLoc());
1558  }
1559 
1560  DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1561  ++NumWidened;
1562 
1563  // Traverse the def-use chain using a worklist starting at the original IV.
1564  assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1565 
1566  Widened.insert(OrigPhi);
1567  pushNarrowIVUsers(OrigPhi, WidePhi);
1568 
1569  while (!NarrowIVUsers.empty()) {
1570  NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1571 
1572  // Process a def-use edge. This may replace the use, so don't hold a
1573  // use_iterator across it.
1574  Instruction *WideUse = widenIVUse(DU, Rewriter);
1575 
1576  // Follow all def-use edges from the previous narrow use.
1577  if (WideUse)
1578  pushNarrowIVUsers(DU.NarrowUse, WideUse);
1579 
1580  // widenIVUse may have removed the def-use edge.
1581  if (DU.NarrowDef->use_empty())
1582  DeadInsts.emplace_back(DU.NarrowDef);
1583  }
1584  return WidePhi;
1585 }
1586 
1587 /// Calculates control-dependent range for the given def at the given context
1588 /// by looking at dominating conditions inside of the loop
1589 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1590  Instruction *NarrowUser) {
1591  using namespace llvm::PatternMatch;
1592 
1593  Value *NarrowDefLHS;
1594  const APInt *NarrowDefRHS;
1595  if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1596  m_APInt(NarrowDefRHS))) ||
1597  !NarrowDefRHS->isNonNegative())
1598  return;
1599 
1600  auto UpdateRangeFromCondition = [&] (Value *Condition,
1601  bool TrueDest) {
1602  CmpInst::Predicate Pred;
1603  Value *CmpRHS;
1604  if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1605  m_Value(CmpRHS))))
1606  return;
1607 
1609  TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1610 
1611  auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1612  auto CmpConstrainedLHSRange =
1613  ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1614  auto NarrowDefRange =
1615  CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
1616 
1617  updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1618  };
1619 
1620  auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1621  if (!HasGuards)
1622  return;
1623 
1624  for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1625  Ctx->getParent()->rend())) {
1626  Value *C = nullptr;
1627  if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1628  UpdateRangeFromCondition(C, /*TrueDest=*/true);
1629  }
1630  };
1631 
1632  UpdateRangeFromGuards(NarrowUser);
1633 
1634  BasicBlock *NarrowUserBB = NarrowUser->getParent();
1635  // If NarrowUserBB is statically unreachable asking dominator queries may
1636  // yield surprising results. (e.g. the block may not have a dom tree node)
1637  if (!DT->isReachableFromEntry(NarrowUserBB))
1638  return;
1639 
1640  for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1641  L->contains(DTB->getBlock());
1642  DTB = DTB->getIDom()) {
1643  auto *BB = DTB->getBlock();
1644  auto *TI = BB->getTerminator();
1645  UpdateRangeFromGuards(TI);
1646 
1647  auto *BI = dyn_cast<BranchInst>(TI);
1648  if (!BI || !BI->isConditional())
1649  continue;
1650 
1651  auto *TrueSuccessor = BI->getSuccessor(0);
1652  auto *FalseSuccessor = BI->getSuccessor(1);
1653 
1654  auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1655  return BBE.isSingleEdge() &&
1656  DT->dominates(BBE, NarrowUser->getParent());
1657  };
1658 
1659  if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1660  UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1661 
1662  if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1663  UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1664  }
1665 }
1666 
1667 /// Calculates PostIncRangeInfos map for the given IV
1668 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1671  Worklist.push_back(OrigPhi);
1672  Visited.insert(OrigPhi);
1673 
1674  while (!Worklist.empty()) {
1675  Instruction *NarrowDef = Worklist.pop_back_val();
1676 
1677  for (Use &U : NarrowDef->uses()) {
1678  auto *NarrowUser = cast<Instruction>(U.getUser());
1679 
1680  // Don't go looking outside the current loop.
1681  auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1682  if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1683  continue;
1684 
1685  if (!Visited.insert(NarrowUser).second)
1686  continue;
1687 
1688  Worklist.push_back(NarrowUser);
1689 
1690  calculatePostIncRange(NarrowDef, NarrowUser);
1691  }
1692  }
1693 }
1694 
1695 //===----------------------------------------------------------------------===//
1696 // Live IV Reduction - Minimize IVs live across the loop.
1697 //===----------------------------------------------------------------------===//
1698 
1699 
1700 //===----------------------------------------------------------------------===//
1701 // Simplification of IV users based on SCEV evaluation.
1702 //===----------------------------------------------------------------------===//
1703 
1704 namespace {
1705 class IndVarSimplifyVisitor : public IVVisitor {
1706  ScalarEvolution *SE;
1707  const TargetTransformInfo *TTI;
1708  PHINode *IVPhi;
1709 
1710 public:
1711  WideIVInfo WI;
1712 
1713  IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1714  const TargetTransformInfo *TTI,
1715  const DominatorTree *DTree)
1716  : SE(SCEV), TTI(TTI), IVPhi(IV) {
1717  DT = DTree;
1718  WI.NarrowIV = IVPhi;
1719  }
1720 
1721  // Implement the interface used by simplifyUsersOfIV.
1722  void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1723 };
1724 }
1725 
1726 /// Iteratively perform simplification on a worklist of IV users. Each
1727 /// successive simplification may push more users which may themselves be
1728 /// candidates for simplification.
1729 ///
1730 /// Sign/Zero extend elimination is interleaved with IV simplification.
1731 ///
1732 void IndVarSimplify::simplifyAndExtend(Loop *L,
1733  SCEVExpander &Rewriter,
1734  LoopInfo *LI) {
1736 
1737  auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1738  Intrinsic::getName(Intrinsic::experimental_guard));
1739  bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1740 
1741  SmallVector<PHINode*, 8> LoopPhis;
1742  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1743  LoopPhis.push_back(cast<PHINode>(I));
1744  }
1745  // Each round of simplification iterates through the SimplifyIVUsers worklist
1746  // for all current phis, then determines whether any IVs can be
1747  // widened. Widening adds new phis to LoopPhis, inducing another round of
1748  // simplification on the wide IVs.
1749  while (!LoopPhis.empty()) {
1750  // Evaluate as many IV expressions as possible before widening any IVs. This
1751  // forces SCEV to set no-wrap flags before evaluating sign/zero
1752  // extension. The first time SCEV attempts to normalize sign/zero extension,
1753  // the result becomes final. So for the most predictable results, we delay
1754  // evaluation of sign/zero extend evaluation until needed, and avoid running
1755  // other SCEV based analysis prior to simplifyAndExtend.
1756  do {
1757  PHINode *CurrIV = LoopPhis.pop_back_val();
1758 
1759  // Information about sign/zero extensions of CurrIV.
1760  IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1761 
1762  Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, &Visitor);
1763 
1764  if (Visitor.WI.WidestNativeType) {
1765  WideIVs.push_back(Visitor.WI);
1766  }
1767  } while(!LoopPhis.empty());
1768 
1769  for (; !WideIVs.empty(); WideIVs.pop_back()) {
1770  WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1771  if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1772  Changed = true;
1773  LoopPhis.push_back(WidePhi);
1774  }
1775  }
1776  }
1777 }
1778 
1779 //===----------------------------------------------------------------------===//
1780 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1781 //===----------------------------------------------------------------------===//
1782 
1783 /// Return true if this loop's backedge taken count expression can be safely and
1784 /// cheaply expanded into an instruction sequence that can be used by
1785 /// linearFunctionTestReplace.
1786 ///
1787 /// TODO: This fails for pointer-type loop counters with greater than one byte
1788 /// strides, consequently preventing LFTR from running. For the purpose of LFTR
1789 /// we could skip this check in the case that the LFTR loop counter (chosen by
1790 /// FindLoopCounter) is also pointer type. Instead, we could directly convert
1791 /// the loop test to an inequality test by checking the target data's alignment
1792 /// of element types (given that the initial pointer value originates from or is
1793 /// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
1794 /// However, we don't yet have a strong motivation for converting loop tests
1795 /// into inequality tests.
1797  SCEVExpander &Rewriter) {
1798  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1799  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
1800  BackedgeTakenCount->isZero())
1801  return false;
1802 
1803  if (!L->getExitingBlock())
1804  return false;
1805 
1806  // Can't rewrite non-branch yet.
1807  if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
1808  return false;
1809 
1810  if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
1811  return false;
1812 
1813  return true;
1814 }
1815 
1816 /// Return the loop header phi IFF IncV adds a loop invariant value to the phi.
1818  Instruction *IncI = dyn_cast<Instruction>(IncV);
1819  if (!IncI)
1820  return nullptr;
1821 
1822  switch (IncI->getOpcode()) {
1823  case Instruction::Add:
1824  case Instruction::Sub:
1825  break;
1826  case Instruction::GetElementPtr:
1827  // An IV counter must preserve its type.
1828  if (IncI->getNumOperands() == 2)
1829  break;
1831  default:
1832  return nullptr;
1833  }
1834 
1835  PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
1836  if (Phi && Phi->getParent() == L->getHeader()) {
1837  if (isLoopInvariant(IncI->getOperand(1), L, DT))
1838  return Phi;
1839  return nullptr;
1840  }
1841  if (IncI->getOpcode() == Instruction::GetElementPtr)
1842  return nullptr;
1843 
1844  // Allow add/sub to be commuted.
1845  Phi = dyn_cast<PHINode>(IncI->getOperand(1));
1846  if (Phi && Phi->getParent() == L->getHeader()) {
1847  if (isLoopInvariant(IncI->getOperand(0), L, DT))
1848  return Phi;
1849  }
1850  return nullptr;
1851 }
1852 
1853 /// Return the compare guarding the loop latch, or NULL for unrecognized tests.
1855  assert(L->getExitingBlock() && "expected loop exit");
1856 
1857  BasicBlock *LatchBlock = L->getLoopLatch();
1858  // Don't bother with LFTR if the loop is not properly simplified.
1859  if (!LatchBlock)
1860  return nullptr;
1861 
1863  assert(BI && "expected exit branch");
1864 
1865  return dyn_cast<ICmpInst>(BI->getCondition());
1866 }
1867 
1868 /// linearFunctionTestReplace policy. Return true unless we can show that the
1869 /// current exit test is already sufficiently canonical.
1870 static bool needsLFTR(Loop *L, DominatorTree *DT) {
1871  // Do LFTR to simplify the exit condition to an ICMP.
1872  ICmpInst *Cond = getLoopTest(L);
1873  if (!Cond)
1874  return true;
1875 
1876  // Do LFTR to simplify the exit ICMP to EQ/NE
1877  ICmpInst::Predicate Pred = Cond->getPredicate();
1878  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
1879  return true;
1880 
1881  // Look for a loop invariant RHS
1882  Value *LHS = Cond->getOperand(0);
1883  Value *RHS = Cond->getOperand(1);
1884  if (!isLoopInvariant(RHS, L, DT)) {
1885  if (!isLoopInvariant(LHS, L, DT))
1886  return true;
1887  std::swap(LHS, RHS);
1888  }
1889  // Look for a simple IV counter LHS
1890  PHINode *Phi = dyn_cast<PHINode>(LHS);
1891  if (!Phi)
1892  Phi = getLoopPhiForCounter(LHS, L, DT);
1893 
1894  if (!Phi)
1895  return true;
1896 
1897  // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
1898  int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
1899  if (Idx < 0)
1900  return true;
1901 
1902  // Do LFTR if the exit condition's IV is *not* a simple counter.
1903  Value *IncV = Phi->getIncomingValue(Idx);
1904  return Phi != getLoopPhiForCounter(IncV, L, DT);
1905 }
1906 
1907 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
1908 /// down to checking that all operands are constant and listing instructions
1909 /// that may hide undef.
1911  unsigned Depth) {
1912  if (isa<Constant>(V))
1913  return !isa<UndefValue>(V);
1914 
1915  if (Depth >= 6)
1916  return false;
1917 
1918  // Conservatively handle non-constant non-instructions. For example, Arguments
1919  // may be undef.
1920  Instruction *I = dyn_cast<Instruction>(V);
1921  if (!I)
1922  return false;
1923 
1924  // Load and return values may be undef.
1925  if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
1926  return false;
1927 
1928  // Optimistically handle other instructions.
1929  for (Value *Op : I->operands()) {
1930  if (!Visited.insert(Op).second)
1931  continue;
1932  if (!hasConcreteDefImpl(Op, Visited, Depth+1))
1933  return false;
1934  }
1935  return true;
1936 }
1937 
1938 /// Return true if the given value is concrete. We must prove that undef can
1939 /// never reach it.
1940 ///
1941 /// TODO: If we decide that this is a good approach to checking for undef, we
1942 /// may factor it into a common location.
1943 static bool hasConcreteDef(Value *V) {
1944  SmallPtrSet<Value*, 8> Visited;
1945  Visited.insert(V);
1946  return hasConcreteDefImpl(V, Visited, 0);
1947 }
1948 
1949 /// Return true if this IV has any uses other than the (soon to be rewritten)
1950 /// loop exit test.
1951 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
1952  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
1953  Value *IncV = Phi->getIncomingValue(LatchIdx);
1954 
1955  for (User *U : Phi->users())
1956  if (U != Cond && U != IncV) return false;
1957 
1958  for (User *U : IncV->users())
1959  if (U != Cond && U != Phi) return false;
1960  return true;
1961 }
1962 
1963 /// Find an affine IV in canonical form.
1964 ///
1965 /// BECount may be an i8* pointer type. The pointer difference is already
1966 /// valid count without scaling the address stride, so it remains a pointer
1967 /// expression as far as SCEV is concerned.
1968 ///
1969 /// Currently only valid for LFTR. See the comments on hasConcreteDef below.
1970 ///
1971 /// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
1972 ///
1973 /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
1974 /// This is difficult in general for SCEV because of potential overflow. But we
1975 /// could at least handle constant BECounts.
1976 static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
1977  ScalarEvolution *SE, DominatorTree *DT) {
1978  uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
1979 
1980  Value *Cond =
1981  cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
1982 
1983  // Loop over all of the PHI nodes, looking for a simple counter.
1984  PHINode *BestPhi = nullptr;
1985  const SCEV *BestInit = nullptr;
1986  BasicBlock *LatchBlock = L->getLoopLatch();
1987  assert(LatchBlock && "needsLFTR should guarantee a loop latch");
1988  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
1989 
1990  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1991  PHINode *Phi = cast<PHINode>(I);
1992  if (!SE->isSCEVable(Phi->getType()))
1993  continue;
1994 
1995  // Avoid comparing an integer IV against a pointer Limit.
1996  if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
1997  continue;
1998 
1999  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2000  if (!AR || AR->getLoop() != L || !AR->isAffine())
2001  continue;
2002 
2003  // AR may be a pointer type, while BECount is an integer type.
2004  // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2005  // AR may not be a narrower type, or we may never exit.
2006  uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2007  if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2008  continue;
2009 
2010  const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2011  if (!Step || !Step->isOne())
2012  continue;
2013 
2014  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2015  Value *IncV = Phi->getIncomingValue(LatchIdx);
2016  if (getLoopPhiForCounter(IncV, L, DT) != Phi)
2017  continue;
2018 
2019  // Avoid reusing a potentially undef value to compute other values that may
2020  // have originally had a concrete definition.
2021  if (!hasConcreteDef(Phi)) {
2022  // We explicitly allow unknown phis as long as they are already used by
2023  // the loop test. In this case we assume that performing LFTR could not
2024  // increase the number of undef users.
2025  if (ICmpInst *Cond = getLoopTest(L)) {
2026  if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT) &&
2027  Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
2028  continue;
2029  }
2030  }
2031  }
2032  const SCEV *Init = AR->getStart();
2033 
2034  if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2035  // Don't force a live loop counter if another IV can be used.
2036  if (AlmostDeadIV(Phi, LatchBlock, Cond))
2037  continue;
2038 
2039  // Prefer to count-from-zero. This is a more "canonical" counter form. It
2040  // also prefers integer to pointer IVs.
2041  if (BestInit->isZero() != Init->isZero()) {
2042  if (BestInit->isZero())
2043  continue;
2044  }
2045  // If two IVs both count from zero or both count from nonzero then the
2046  // narrower is likely a dead phi that has been widened. Use the wider phi
2047  // to allow the other to be eliminated.
2048  else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2049  continue;
2050  }
2051  BestPhi = Phi;
2052  BestInit = Init;
2053  }
2054  return BestPhi;
2055 }
2056 
2057 /// Help linearFunctionTestReplace by generating a value that holds the RHS of
2058 /// the new loop test.
2059 static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
2060  SCEVExpander &Rewriter, ScalarEvolution *SE) {
2061  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2062  assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
2063  const SCEV *IVInit = AR->getStart();
2064 
2065  // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
2066  // finds a valid pointer IV. Sign extend BECount in order to materialize a
2067  // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2068  // the existing GEPs whenever possible.
2069  if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) {
2070  // IVOffset will be the new GEP offset that is interpreted by GEP as a
2071  // signed value. IVCount on the other hand represents the loop trip count,
2072  // which is an unsigned value. FindLoopCounter only allows induction
2073  // variables that have a positive unit stride of one. This means we don't
2074  // have to handle the case of negative offsets (yet) and just need to zero
2075  // extend IVCount.
2076  Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2077  const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
2078 
2079  // Expand the code for the iteration count.
2080  assert(SE->isLoopInvariant(IVOffset, L) &&
2081  "Computed iteration count is not loop invariant!");
2082  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2083  Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
2084 
2085  Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
2086  assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
2087  // We could handle pointer IVs other than i8*, but we need to compensate for
2088  // gep index scaling. See canExpandBackedgeTakenCount comments.
2090  cast<PointerType>(GEPBase->getType())
2091  ->getElementType())->isOne() &&
2092  "unit stride pointer IV must be i8*");
2093 
2094  IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
2095  return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
2096  } else {
2097  // In any other case, convert both IVInit and IVCount to integers before
2098  // comparing. This may result in SCEV expansion of pointers, but in practice
2099  // SCEV will fold the pointer arithmetic away as such:
2100  // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2101  //
2102  // Valid Cases: (1) both integers is most common; (2) both may be pointers
2103  // for simple memset-style loops.
2104  //
2105  // IVInit integer and IVCount pointer would only occur if a canonical IV
2106  // were generated on top of case #2, which is not expected.
2107 
2108  const SCEV *IVLimit = nullptr;
2109  // For unit stride, IVCount = Start + BECount with 2's complement overflow.
2110  // For non-zero Start, compute IVCount here.
2111  if (AR->getStart()->isZero())
2112  IVLimit = IVCount;
2113  else {
2114  assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2115  const SCEV *IVInit = AR->getStart();
2116 
2117  // For integer IVs, truncate the IV before computing IVInit + BECount.
2118  if (SE->getTypeSizeInBits(IVInit->getType())
2119  > SE->getTypeSizeInBits(IVCount->getType()))
2120  IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
2121 
2122  IVLimit = SE->getAddExpr(IVInit, IVCount);
2123  }
2124  // Expand the code for the iteration count.
2125  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2126  IRBuilder<> Builder(BI);
2127  assert(SE->isLoopInvariant(IVLimit, L) &&
2128  "Computed iteration count is not loop invariant!");
2129  // Ensure that we generate the same type as IndVar, or a smaller integer
2130  // type. In the presence of null pointer values, we have an integer type
2131  // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2132  Type *LimitTy = IVCount->getType()->isPointerTy() ?
2133  IndVar->getType() : IVCount->getType();
2134  return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2135  }
2136 }
2137 
2138 /// This method rewrites the exit condition of the loop to be a canonical !=
2139 /// comparison against the incremented loop induction variable. This pass is
2140 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2141 /// determine a loop-invariant trip count of the loop, which is actually a much
2142 /// broader range than just linear tests.
2143 Value *IndVarSimplify::
2144 linearFunctionTestReplace(Loop *L,
2145  const SCEV *BackedgeTakenCount,
2146  PHINode *IndVar,
2147  SCEVExpander &Rewriter) {
2148  assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
2149 
2150  // Initialize CmpIndVar and IVCount to their preincremented values.
2151  Value *CmpIndVar = IndVar;
2152  const SCEV *IVCount = BackedgeTakenCount;
2153 
2154  assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2155 
2156  // If the exiting block is the same as the backedge block, we prefer to
2157  // compare against the post-incremented value, otherwise we must compare
2158  // against the preincremented value.
2159  if (L->getExitingBlock() == L->getLoopLatch()) {
2160  // Add one to the "backedge-taken" count to get the trip count.
2161  // This addition may overflow, which is valid as long as the comparison is
2162  // truncated to BackedgeTakenCount->getType().
2163  IVCount = SE->getAddExpr(BackedgeTakenCount,
2164  SE->getOne(BackedgeTakenCount->getType()));
2165  // The BackedgeTaken expression contains the number of times that the
2166  // backedge branches to the loop header. This is one less than the
2167  // number of times the loop executes, so use the incremented indvar.
2168  CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
2169  }
2170 
2171  Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
2172  assert(ExitCnt->getType()->isPointerTy() ==
2173  IndVar->getType()->isPointerTy() &&
2174  "genLoopLimit missed a cast");
2175 
2176  // Insert a new icmp_ne or icmp_eq instruction before the branch.
2177  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2179  if (L->contains(BI->getSuccessor(0)))
2180  P = ICmpInst::ICMP_NE;
2181  else
2182  P = ICmpInst::ICMP_EQ;
2183 
2184  DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2185  << " LHS:" << *CmpIndVar << '\n'
2186  << " op:\t"
2187  << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
2188  << " RHS:\t" << *ExitCnt << "\n"
2189  << " IVCount:\t" << *IVCount << "\n");
2190 
2191  IRBuilder<> Builder(BI);
2192 
2193  // The new loop exit condition should reuse the debug location of the
2194  // original loop exit condition.
2195  if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2196  Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2197 
2198  // LFTR can ignore IV overflow and truncate to the width of
2199  // BECount. This avoids materializing the add(zext(add)) expression.
2200  unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2201  unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2202  if (CmpIndVarSize > ExitCntSize) {
2203  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2204  const SCEV *ARStart = AR->getStart();
2205  const SCEV *ARStep = AR->getStepRecurrence(*SE);
2206  // For constant IVCount, avoid truncation.
2207  if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
2208  const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt();
2209  APInt Count = cast<SCEVConstant>(IVCount)->getAPInt();
2210  // Note that the post-inc value of BackedgeTakenCount may have overflowed
2211  // above such that IVCount is now zero.
2212  if (IVCount != BackedgeTakenCount && Count == 0) {
2213  Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
2214  ++Count;
2215  }
2216  else
2217  Count = Count.zext(CmpIndVarSize);
2218  APInt NewLimit;
2219  if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
2220  NewLimit = Start - Count;
2221  else
2222  NewLimit = Start + Count;
2223  ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
2224 
2225  DEBUG(dbgs() << " Widen RHS:\t" << *ExitCnt << "\n");
2226  } else {
2227  // We try to extend trip count first. If that doesn't work we truncate IV.
2228  // Zext(trunc(IV)) == IV implies equivalence of the following two:
2229  // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If
2230  // one of the two holds, extend the trip count, otherwise we truncate IV.
2231  bool Extended = false;
2232  const SCEV *IV = SE->getSCEV(CmpIndVar);
2233  const SCEV *ZExtTrunc =
2234  SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2235  ExitCnt->getType()),
2236  CmpIndVar->getType());
2237 
2238  if (ZExtTrunc == IV) {
2239  Extended = true;
2240  ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2241  "wide.trip.count");
2242  } else {
2243  const SCEV *SExtTrunc =
2244  SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2245  ExitCnt->getType()),
2246  CmpIndVar->getType());
2247  if (SExtTrunc == IV) {
2248  Extended = true;
2249  ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2250  "wide.trip.count");
2251  }
2252  }
2253 
2254  if (!Extended)
2255  CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2256  "lftr.wideiv");
2257  }
2258  }
2259  Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2260  Value *OrigCond = BI->getCondition();
2261  // It's tempting to use replaceAllUsesWith here to fully replace the old
2262  // comparison, but that's not immediately safe, since users of the old
2263  // comparison may not be dominated by the new comparison. Instead, just
2264  // update the branch to use the new comparison; in the common case this
2265  // will make old comparison dead.
2266  BI->setCondition(Cond);
2267  DeadInsts.push_back(OrigCond);
2268 
2269  ++NumLFTR;
2270  Changed = true;
2271  return Cond;
2272 }
2273 
2274 //===----------------------------------------------------------------------===//
2275 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2276 //===----------------------------------------------------------------------===//
2277 
2278 /// If there's a single exit block, sink any loop-invariant values that
2279 /// were defined in the preheader but not used inside the loop into the
2280 /// exit block to reduce register pressure in the loop.
2281 void IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2282  BasicBlock *ExitBlock = L->getExitBlock();
2283  if (!ExitBlock) return;
2284 
2285  BasicBlock *Preheader = L->getLoopPreheader();
2286  if (!Preheader) return;
2287 
2288  BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2289  BasicBlock::iterator I(Preheader->getTerminator());
2290  while (I != Preheader->begin()) {
2291  --I;
2292  // New instructions were inserted at the end of the preheader.
2293  if (isa<PHINode>(I))
2294  break;
2295 
2296  // Don't move instructions which might have side effects, since the side
2297  // effects need to complete before instructions inside the loop. Also don't
2298  // move instructions which might read memory, since the loop may modify
2299  // memory. Note that it's okay if the instruction might have undefined
2300  // behavior: LoopSimplify guarantees that the preheader dominates the exit
2301  // block.
2302  if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2303  continue;
2304 
2305  // Skip debug info intrinsics.
2306  if (isa<DbgInfoIntrinsic>(I))
2307  continue;
2308 
2309  // Skip eh pad instructions.
2310  if (I->isEHPad())
2311  continue;
2312 
2313  // Don't sink alloca: we never want to sink static alloca's out of the
2314  // entry block, and correctly sinking dynamic alloca's requires
2315  // checks for stacksave/stackrestore intrinsics.
2316  // FIXME: Refactor this check somehow?
2317  if (isa<AllocaInst>(I))
2318  continue;
2319 
2320  // Determine if there is a use in or before the loop (direct or
2321  // otherwise).
2322  bool UsedInLoop = false;
2323  for (Use &U : I->uses()) {
2324  Instruction *User = cast<Instruction>(U.getUser());
2325  BasicBlock *UseBB = User->getParent();
2326  if (PHINode *P = dyn_cast<PHINode>(User)) {
2327  unsigned i =
2328  PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2329  UseBB = P->getIncomingBlock(i);
2330  }
2331  if (UseBB == Preheader || L->contains(UseBB)) {
2332  UsedInLoop = true;
2333  break;
2334  }
2335  }
2336 
2337  // If there is, the def must remain in the preheader.
2338  if (UsedInLoop)
2339  continue;
2340 
2341  // Otherwise, sink it to the exit block.
2342  Instruction *ToMove = &*I;
2343  bool Done = false;
2344 
2345  if (I != Preheader->begin()) {
2346  // Skip debug info intrinsics.
2347  do {
2348  --I;
2349  } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2350 
2351  if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2352  Done = true;
2353  } else {
2354  Done = true;
2355  }
2356 
2357  ToMove->moveBefore(*ExitBlock, InsertPt);
2358  if (Done) break;
2359  InsertPt = ToMove->getIterator();
2360  }
2361 }
2362 
2363 //===----------------------------------------------------------------------===//
2364 // IndVarSimplify driver. Manage several subpasses of IV simplification.
2365 //===----------------------------------------------------------------------===//
2366 
2367 bool IndVarSimplify::run(Loop *L) {
2368  // We need (and expect!) the incoming loop to be in LCSSA.
2369  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2370  "LCSSA required to run indvars!");
2371 
2372  // If LoopSimplify form is not available, stay out of trouble. Some notes:
2373  // - LSR currently only supports LoopSimplify-form loops. Indvars'
2374  // canonicalization can be a pessimization without LSR to "clean up"
2375  // afterwards.
2376  // - We depend on having a preheader; in particular,
2377  // Loop::getCanonicalInductionVariable only supports loops with preheaders,
2378  // and we're in trouble if we can't find the induction variable even when
2379  // we've manually inserted one.
2380  // - LFTR relies on having a single backedge.
2381  if (!L->isLoopSimplifyForm())
2382  return false;
2383 
2384  // If there are any floating-point recurrences, attempt to
2385  // transform them to use integer recurrences.
2386  rewriteNonIntegerIVs(L);
2387 
2388  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2389 
2390  // Create a rewriter object which we'll use to transform the code with.
2391  SCEVExpander Rewriter(*SE, DL, "indvars");
2392 #ifndef NDEBUG
2393  Rewriter.setDebugType(DEBUG_TYPE);
2394 #endif
2395 
2396  // Eliminate redundant IV users.
2397  //
2398  // Simplification works best when run before other consumers of SCEV. We
2399  // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2400  // other expressions involving loop IVs have been evaluated. This helps SCEV
2401  // set no-wrap flags before normalizing sign/zero extension.
2402  Rewriter.disableCanonicalMode();
2403  simplifyAndExtend(L, Rewriter, LI);
2404 
2405  // Check to see if this loop has a computable loop-invariant execution count.
2406  // If so, this means that we can compute the final value of any expressions
2407  // that are recurrent in the loop, and substitute the exit values from the
2408  // loop into any instructions outside of the loop that use the final values of
2409  // the current expressions.
2410  //
2411  if (ReplaceExitValue != NeverRepl &&
2412  !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2413  rewriteLoopExitValues(L, Rewriter);
2414 
2415  // Eliminate redundant IV cycles.
2416  NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2417 
2418  // If we have a trip count expression, rewrite the loop's exit condition
2419  // using it. We can currently only handle loops with a single exit.
2420  if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter) &&
2421  needsLFTR(L, DT)) {
2422  PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
2423  if (IndVar) {
2424  // Check preconditions for proper SCEVExpander operation. SCEV does not
2425  // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
2426  // pass that uses the SCEVExpander must do it. This does not work well for
2427  // loop passes because SCEVExpander makes assumptions about all loops,
2428  // while LoopPassManager only forces the current loop to be simplified.
2429  //
2430  // FIXME: SCEV expansion has no way to bail out, so the caller must
2431  // explicitly check any assumptions made by SCEV. Brittle.
2432  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
2433  if (!AR || AR->getLoop()->getLoopPreheader())
2434  (void)linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
2435  Rewriter);
2436  }
2437  }
2438  // Clear the rewriter cache, because values that are in the rewriter's cache
2439  // can be deleted in the loop below, causing the AssertingVH in the cache to
2440  // trigger.
2441  Rewriter.clear();
2442 
2443  // Now that we're done iterating through lists, clean up any instructions
2444  // which are now dead.
2445  while (!DeadInsts.empty())
2446  if (Instruction *Inst =
2447  dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2449 
2450  // The Rewriter may not be used from this point on.
2451 
2452  // Loop-invariant instructions in the preheader that aren't used in the
2453  // loop may be sunk below the loop to reduce register pressure.
2454  sinkUnusedInvariants(L);
2455 
2456  // rewriteFirstIterationLoopExitValues does not rely on the computation of
2457  // trip count and therefore can further simplify exit values in addition to
2458  // rewriteLoopExitValues.
2459  rewriteFirstIterationLoopExitValues(L);
2460 
2461  // Clean up dead instructions.
2462  Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2463 
2464  // Check a post-condition.
2465  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2466  "Indvars did not preserve LCSSA!");
2467 
2468  // Verify that LFTR, and any other change have not interfered with SCEV's
2469  // ability to compute trip count.
2470 #ifndef NDEBUG
2471  if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2472  SE->forgetLoop(L);
2473  const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2474  if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2475  SE->getTypeSizeInBits(NewBECount->getType()))
2476  NewBECount = SE->getTruncateOrNoop(NewBECount,
2477  BackedgeTakenCount->getType());
2478  else
2479  BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2480  NewBECount->getType());
2481  assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
2482  }
2483 #endif
2484 
2485  return Changed;
2486 }
2487 
2490  LPMUpdater &) {
2491  Function *F = L.getHeader()->getParent();
2492  const DataLayout &DL = F->getParent()->getDataLayout();
2493 
2494  IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
2495  if (!IVS.run(&L))
2496  return PreservedAnalyses::all();
2497 
2498  auto PA = getLoopPassPreservedAnalyses();
2499  PA.preserveSet<CFGAnalyses>();
2500  return PA;
2501 }
2502 
2503 namespace {
2504 struct IndVarSimplifyLegacyPass : public LoopPass {
2505  static char ID; // Pass identification, replacement for typeid
2506  IndVarSimplifyLegacyPass() : LoopPass(ID) {
2508  }
2509 
2510  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2511  if (skipLoop(L))
2512  return false;
2513 
2514  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2515  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2516  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2517  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2518  auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
2519  auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2520  auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2521  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2522 
2523  IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
2524  return IVS.run(L);
2525  }
2526 
2527  void getAnalysisUsage(AnalysisUsage &AU) const override {
2528  AU.setPreservesCFG();
2530  }
2531 };
2532 }
2533 
2535 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2536  "Induction Variable Simplification", false, false)
2538 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2539  "Induction Variable Simplification", false, false)
2540 
2542  return new IndVarSimplifyLegacyPass();
2543 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:69
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
bool hoistIVInc(Instruction *IncV, Instruction *InsertPos)
Utility for hoisting an IV increment.
Induction Variable Simplification
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1634
iterator_range< use_iterator > uses()
Definition: Value.h:350
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:157
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Value * getExactExistingExpansion(const SCEV *S, const Instruction *At, Loop *L)
Try to find existing LLVM IR value for S available at the point At.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT)
Perform a quick domtree based check for loop invariance assuming that V is used within the loop...
MutableArrayRef< T > makeMutableArrayRef(T &OneElt)
Construct a MutableArrayRef from a single element.
Definition: ArrayRef.h:503
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
This is the interface for a simple mod/ref and alias analysis over globals.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
bool isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:182
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:175
The main scalar evolution driver.
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:865
bool isZero() const
Return true if the expression is a constant zero.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:106
bool isHighCostExpansion(const SCEV *Expr, Loop *L, const Instruction *At=nullptr)
Return true for expressions that may incur non-trivial cost to evaluate at runtime.
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:866
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:697
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:876
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
void setDebugType(const char *s)
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
BasicBlock * getSuccessor(unsigned i) const
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1386
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
STATISTIC(NumFunctions, "Total number of functions")
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
Hexagon Common GEP
Value * getCondition() const
static ConstantRange makeAllowedICmpRegion(CmpInst::Predicate Pred, const ConstantRange &Other)
Produce the smallest range such that all values that may satisfy the given predicate with any value c...
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
ReplaceExitVal
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:62
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", "Induction Variable Simplification", false, false) INITIALIZE_PASS_END(IndVarSimplifyLegacyPass
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1069
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:871
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
static Value * genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Help linearFunctionTestReplace by generating a value that holds the RHS of the new loop test...
Interface for visiting interesting IV users that are recognized but not simplified by this utility...
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr it the function does no...
Definition: BasicBlock.cpp:116
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
bool isSigned() const
Determine if this instruction is using a signed comparison.
Definition: InstrTypes.h:1001
This is the interface for a SCEV-based alias analysis.
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:591
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:958
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:631
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:362
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:560
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:664
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:867
BlockT * getHeader() const
Definition: LoopInfo.h:107
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:820
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:182
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, SmallVectorImpl< WeakTrackingVH > &Dead, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
This node represents a polynomial recurrence on the trip count of the specified loop.
This instruction compares its operands according to the predicate given to the constructor.
static Instruction * getInsertPointForUses(Instruction *User, Value *Def, DominatorTree *DT, LoopInfo *LI)
Determine the insertion point for this user.
static cl::opt< bool > VerifyIndvars("verify-indvars", cl::Hidden, cl::desc("Verify the ScalarEvolution result after running indvars"))
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Examine each PHI in the given block and delete it if it is dead.
const SCEV * getSizeOfExpr(Type *IntTy, Type *AllocTy)
Return an expression for sizeof AllocTy that is type IntTy.
ConstantRange intersectWith(const ConstantRange &CR) const
Return the range that results from the intersection of this range with another range.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:152
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:428
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1383
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:290
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:140
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:128
Value * getOperand(unsigned i) const
Definition: User.h:154
void getUniqueExitBlocks(SmallVectorImpl< BasicBlock *> &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfo.cpp:360
void initializeIndVarSimplifyLegacyPassPass(PassRegistry &)
void clearInsertPoint()
Clear the current insertion point.
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt...
Definition: PatternMatch.h:260
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:200
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:281
static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE, SCEVExpander &Rewriter)
Return true if this loop&#39;s backedge taken count expression can be safely and cheaply expanded into an...
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Conditional or Unconditional Branch instruction.
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value *> &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:626
Value * getIncomingValueForBlock(const BasicBlock *BB) const
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
const Instruction & front() const
Definition: BasicBlock.h:264
#define H(x, y, z)
Definition: MD5.cpp:57
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:363
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:504
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
const std::vector< BlockT * > & getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:156
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:382
Represent the analysis usage information of a pass.
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:823
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:860
Utility class for integer arithmetic operators which may exhibit overflow - Add, Sub, and Mul.
Definition: Operator.h:67
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
op_range operands()
Definition: User.h:222
self_iterator getIterator()
Definition: ilist_node.h:82
Class to represent integer types.
Definition: DerivedTypes.h:40
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:76
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:35
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1320
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:370
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
static unsigned getIncomingValueNumForOperand(unsigned i)
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static ICmpInst * getLoopTest(Loop *L)
Return the compare guarding the loop latch, or NULL for unrecognized tests.
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:875
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1380
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:56
signed greater than
Definition: InstrTypes.h:887
#define DEBUG_TYPE
const APFloat & getValueAPF() const
Definition: Constants.h:294
Value * CreateGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1223
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:864
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:117
unsigned getSCEVType() const
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:176
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:410
Type * getType() const
Return the LLVM type of this SCEV expression.
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:301
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty)
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:874
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:239
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:63
Provides information about what library functions are available for the current target.
This class represents a range of values.
Definition: ConstantRange.h:47
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
Definition: DataLayout.h:238
signed less than
Definition: InstrTypes.h:889
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:385
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:560
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT)
Return the loop header phi IFF IncV adds a loop invariant value to the phi.
bool isConditional() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:285
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:238
static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI)
This IV user cannot be widen.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:637
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
signed less or equal
Definition: InstrTypes.h:890
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:57
Class for arbitrary precision integers.
Definition: APInt.h:69
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:395
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property...
Definition: Operator.h:96
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
This class uses information about analyze scalars to rewrite expressions in canonical form...
Represents analyses that only rely on functions&#39; control flow.
Definition: PassManager.h:114
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:523
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:601
LoopT * getParentLoop() const
Definition: LoopInfo.h:108
Virtual Register Rewriter
Definition: VirtRegMap.cpp:221
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:934
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:191
void emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:656
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:404
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:218
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
user_iterator_impl< User > user_iterator
Definition: Value.h:364
bool mayReadFromMemory() const
Return true if this instruction may read memory.
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:1023
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:868
static bool needsLFTR(Loop *L, DominatorTree *DT)
linearFunctionTestReplace policy.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>()) const
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:872
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetTransformInfo *TTI=nullptr)
replace congruent phis with their most canonical representative.
const SCEV * getBackedgeTakenCount(const Loop *L)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
This class represents a cast from signed integer to floating point.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:371
bool isOne() const
Return true if the expression is a constant one.
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:863
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:545
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:293
LLVM Value Representation.
Definition: Value.h:73
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:873
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond)
Return true if this IV has any uses other than the (soon to be rewritten) loop exit test...
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:88
#define DEBUG(X)
Definition: Debug.h:118
void disableCanonicalMode()
Disable the behavior of expanding expressions in canonical form rather than in a more literal form...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:507
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:408
A container for analyses that lazily runs them and caches their results.
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
This pass exposes codegen information to IR-level passes.
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
void setIncomingValue(unsigned i, Value *V)
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:163
static cl::opt< bool > DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization"))
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:865
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:50
bool use_empty() const
Definition: Value.h:322
Pass * createIndVarSimplifyPass()
signed greater or equal
Definition: InstrTypes.h:888
IntegerType * Int32Ty
const BasicBlock * getParent() const
Definition: Instruction.h:66
This class represents a constant integer value.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:837
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property...
Definition: Operator.h:90
user_iterator user_end()
Definition: Value.h:379
static PHINode * FindLoopCounter(Loop *L, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Find an affine IV in canonical form.