LLVM  8.0.0svn
IndVarSimplify.cpp
Go to the documentation of this file.
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This transformation analyzes and transforms the induction variables (and
11 // computations derived from them) into simpler forms suitable for subsequent
12 // analysis and transformation.
13 //
14 // If the trip count of a loop is computable, this pass also makes the following
15 // changes:
16 // 1. The exit condition for the loop is canonicalized to compare the
17 // induction value against the exit value. This turns loops like:
18 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
19 // 2. Any use outside of the loop of an expression derived from the indvar
20 // is changed to compute the derived value outside of the loop, eliminating
21 // the dependence on the exit value of the induction variable. If the only
22 // purpose of the loop is to compute the exit value of some derived
23 // expression, this transformation will make the loop dead.
24 //
25 //===----------------------------------------------------------------------===//
26 
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/DenseMap.h"
32 #include "llvm/ADT/None.h"
33 #include "llvm/ADT/Optional.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
47 #include "llvm/IR/BasicBlock.h"
48 #include "llvm/IR/Constant.h"
49 #include "llvm/IR/ConstantRange.h"
50 #include "llvm/IR/Constants.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Dominators.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/IRBuilder.h"
56 #include "llvm/IR/InstrTypes.h"
57 #include "llvm/IR/Instruction.h"
58 #include "llvm/IR/Instructions.h"
59 #include "llvm/IR/IntrinsicInst.h"
60 #include "llvm/IR/Intrinsics.h"
61 #include "llvm/IR/Module.h"
62 #include "llvm/IR/Operator.h"
63 #include "llvm/IR/PassManager.h"
64 #include "llvm/IR/PatternMatch.h"
65 #include "llvm/IR/Type.h"
66 #include "llvm/IR/Use.h"
67 #include "llvm/IR/User.h"
68 #include "llvm/IR/Value.h"
69 #include "llvm/IR/ValueHandle.h"
70 #include "llvm/Pass.h"
71 #include "llvm/Support/Casting.h"
73 #include "llvm/Support/Compiler.h"
74 #include "llvm/Support/Debug.h"
78 #include "llvm/Transforms/Scalar.h"
83 #include <cassert>
84 #include <cstdint>
85 #include <utility>
86 
87 using namespace llvm;
88 
89 #define DEBUG_TYPE "indvars"
90 
91 STATISTIC(NumWidened , "Number of indvars widened");
92 STATISTIC(NumReplaced , "Number of exit values replaced");
93 STATISTIC(NumLFTR , "Number of loop exit tests replaced");
94 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
95 STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
96 
97 // Trip count verification can be enabled by default under NDEBUG if we
98 // implement a strong expression equivalence checker in SCEV. Until then, we
99 // use the verify-indvars flag, which may assert in some cases.
101  "verify-indvars", cl::Hidden,
102  cl::desc("Verify the ScalarEvolution result after running indvars"));
103 
105 
107  "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
108  cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
109  cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
110  clEnumValN(OnlyCheapRepl, "cheap",
111  "only replace exit value when the cost is cheap"),
112  clEnumValN(AlwaysRepl, "always",
113  "always replace exit value whenever possible")));
114 
116  "indvars-post-increment-ranges", cl::Hidden,
117  cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
118  cl::init(true));
119 
120 static cl::opt<bool>
121 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
122  cl::desc("Disable Linear Function Test Replace optimization"));
123 
124 namespace {
125 
126 struct RewritePhi;
127 
128 class IndVarSimplify {
129  LoopInfo *LI;
130  ScalarEvolution *SE;
131  DominatorTree *DT;
132  const DataLayout &DL;
133  TargetLibraryInfo *TLI;
134  const TargetTransformInfo *TTI;
135 
137 
138  bool isValidRewrite(Value *FromVal, Value *ToVal);
139 
140  bool handleFloatingPointIV(Loop *L, PHINode *PH);
141  bool rewriteNonIntegerIVs(Loop *L);
142 
143  bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
144 
145  bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
146  bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
147  bool rewriteFirstIterationLoopExitValues(Loop *L);
148 
149  bool linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
150  PHINode *IndVar, SCEVExpander &Rewriter);
151 
152  bool sinkUnusedInvariants(Loop *L);
153 
154 public:
155  IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
156  const DataLayout &DL, TargetLibraryInfo *TLI,
157  TargetTransformInfo *TTI)
158  : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
159 
160  bool run(Loop *L);
161 };
162 
163 } // end anonymous namespace
164 
165 /// Return true if the SCEV expansion generated by the rewriter can replace the
166 /// original value. SCEV guarantees that it produces the same value, but the way
167 /// it is produced may be illegal IR. Ideally, this function will only be
168 /// called for verification.
169 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
170  // If an SCEV expression subsumed multiple pointers, its expansion could
171  // reassociate the GEP changing the base pointer. This is illegal because the
172  // final address produced by a GEP chain must be inbounds relative to its
173  // underlying object. Otherwise basic alias analysis, among other things,
174  // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
175  // producing an expression involving multiple pointers. Until then, we must
176  // bail out here.
177  //
178  // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
179  // because it understands lcssa phis while SCEV does not.
180  Value *FromPtr = FromVal;
181  Value *ToPtr = ToVal;
182  if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
183  FromPtr = GEP->getPointerOperand();
184  }
185  if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
186  ToPtr = GEP->getPointerOperand();
187  }
188  if (FromPtr != FromVal || ToPtr != ToVal) {
189  // Quickly check the common case
190  if (FromPtr == ToPtr)
191  return true;
192 
193  // SCEV may have rewritten an expression that produces the GEP's pointer
194  // operand. That's ok as long as the pointer operand has the same base
195  // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
196  // base of a recurrence. This handles the case in which SCEV expansion
197  // converts a pointer type recurrence into a nonrecurrent pointer base
198  // indexed by an integer recurrence.
199 
200  // If the GEP base pointer is a vector of pointers, abort.
201  if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
202  return false;
203 
204  const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
205  const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
206  if (FromBase == ToBase)
207  return true;
208 
209  LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
210  << " != " << *ToBase << "\n");
211 
212  return false;
213  }
214  return true;
215 }
216 
217 /// Determine the insertion point for this user. By default, insert immediately
218 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
219 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
220 /// common dominator for the incoming blocks.
222  DominatorTree *DT, LoopInfo *LI) {
223  PHINode *PHI = dyn_cast<PHINode>(User);
224  if (!PHI)
225  return User;
226 
227  Instruction *InsertPt = nullptr;
228  for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
229  if (PHI->getIncomingValue(i) != Def)
230  continue;
231 
232  BasicBlock *InsertBB = PHI->getIncomingBlock(i);
233  if (!InsertPt) {
234  InsertPt = InsertBB->getTerminator();
235  continue;
236  }
237  InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
238  InsertPt = InsertBB->getTerminator();
239  }
240  assert(InsertPt && "Missing phi operand");
241 
242  auto *DefI = dyn_cast<Instruction>(Def);
243  if (!DefI)
244  return InsertPt;
245 
246  assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
247 
248  auto *L = LI->getLoopFor(DefI->getParent());
249  assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
250 
251  for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
252  if (LI->getLoopFor(DTN->getBlock()) == L)
253  return DTN->getBlock()->getTerminator();
254 
255  llvm_unreachable("DefI dominates InsertPt!");
256 }
257 
258 //===----------------------------------------------------------------------===//
259 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
260 //===----------------------------------------------------------------------===//
261 
262 /// Convert APF to an integer, if possible.
263 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
264  bool isExact = false;
265  // See if we can convert this to an int64_t
266  uint64_t UIntVal;
267  if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
268  APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
269  !isExact)
270  return false;
271  IntVal = UIntVal;
272  return true;
273 }
274 
275 /// If the loop has floating induction variable then insert corresponding
276 /// integer induction variable if possible.
277 /// For example,
278 /// for(double i = 0; i < 10000; ++i)
279 /// bar(i)
280 /// is converted into
281 /// for(int i = 0; i < 10000; ++i)
282 /// bar((double)i);
283 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
284  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
285  unsigned BackEdge = IncomingEdge^1;
286 
287  // Check incoming value.
288  auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
289 
290  int64_t InitValue;
291  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
292  return false;
293 
294  // Check IV increment. Reject this PN if increment operation is not
295  // an add or increment value can not be represented by an integer.
296  auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
297  if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
298 
299  // If this is not an add of the PHI with a constantfp, or if the constant fp
300  // is not an integer, bail out.
301  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
302  int64_t IncValue;
303  if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
304  !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
305  return false;
306 
307  // Check Incr uses. One user is PN and the other user is an exit condition
308  // used by the conditional terminator.
309  Value::user_iterator IncrUse = Incr->user_begin();
310  Instruction *U1 = cast<Instruction>(*IncrUse++);
311  if (IncrUse == Incr->user_end()) return false;
312  Instruction *U2 = cast<Instruction>(*IncrUse++);
313  if (IncrUse != Incr->user_end()) return false;
314 
315  // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
316  // only used by a branch, we can't transform it.
318  if (!Compare)
319  Compare = dyn_cast<FCmpInst>(U2);
320  if (!Compare || !Compare->hasOneUse() ||
321  !isa<BranchInst>(Compare->user_back()))
322  return false;
323 
324  BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
325 
326  // We need to verify that the branch actually controls the iteration count
327  // of the loop. If not, the new IV can overflow and no one will notice.
328  // The branch block must be in the loop and one of the successors must be out
329  // of the loop.
330  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
331  if (!L->contains(TheBr->getParent()) ||
332  (L->contains(TheBr->getSuccessor(0)) &&
333  L->contains(TheBr->getSuccessor(1))))
334  return false;
335 
336  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
337  // transform it.
338  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
339  int64_t ExitValue;
340  if (ExitValueVal == nullptr ||
341  !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
342  return false;
343 
344  // Find new predicate for integer comparison.
346  switch (Compare->getPredicate()) {
347  default: return false; // Unknown comparison.
348  case CmpInst::FCMP_OEQ:
349  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
350  case CmpInst::FCMP_ONE:
351  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
352  case CmpInst::FCMP_OGT:
353  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
354  case CmpInst::FCMP_OGE:
355  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
356  case CmpInst::FCMP_OLT:
357  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
358  case CmpInst::FCMP_OLE:
359  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
360  }
361 
362  // We convert the floating point induction variable to a signed i32 value if
363  // we can. This is only safe if the comparison will not overflow in a way
364  // that won't be trapped by the integer equivalent operations. Check for this
365  // now.
366  // TODO: We could use i64 if it is native and the range requires it.
367 
368  // The start/stride/exit values must all fit in signed i32.
369  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
370  return false;
371 
372  // If not actually striding (add x, 0.0), avoid touching the code.
373  if (IncValue == 0)
374  return false;
375 
376  // Positive and negative strides have different safety conditions.
377  if (IncValue > 0) {
378  // If we have a positive stride, we require the init to be less than the
379  // exit value.
380  if (InitValue >= ExitValue)
381  return false;
382 
383  uint32_t Range = uint32_t(ExitValue-InitValue);
384  // Check for infinite loop, either:
385  // while (i <= Exit) or until (i > Exit)
386  if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
387  if (++Range == 0) return false; // Range overflows.
388  }
389 
390  unsigned Leftover = Range % uint32_t(IncValue);
391 
392  // If this is an equality comparison, we require that the strided value
393  // exactly land on the exit value, otherwise the IV condition will wrap
394  // around and do things the fp IV wouldn't.
395  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
396  Leftover != 0)
397  return false;
398 
399  // If the stride would wrap around the i32 before exiting, we can't
400  // transform the IV.
401  if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
402  return false;
403  } else {
404  // If we have a negative stride, we require the init to be greater than the
405  // exit value.
406  if (InitValue <= ExitValue)
407  return false;
408 
409  uint32_t Range = uint32_t(InitValue-ExitValue);
410  // Check for infinite loop, either:
411  // while (i >= Exit) or until (i < Exit)
412  if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
413  if (++Range == 0) return false; // Range overflows.
414  }
415 
416  unsigned Leftover = Range % uint32_t(-IncValue);
417 
418  // If this is an equality comparison, we require that the strided value
419  // exactly land on the exit value, otherwise the IV condition will wrap
420  // around and do things the fp IV wouldn't.
421  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
422  Leftover != 0)
423  return false;
424 
425  // If the stride would wrap around the i32 before exiting, we can't
426  // transform the IV.
427  if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
428  return false;
429  }
430 
432 
433  // Insert new integer induction variable.
434  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
435  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
436  PN->getIncomingBlock(IncomingEdge));
437 
438  Value *NewAdd =
439  BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
440  Incr->getName()+".int", Incr);
441  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
442 
443  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
444  ConstantInt::get(Int32Ty, ExitValue),
445  Compare->getName());
446 
447  // In the following deletions, PN may become dead and may be deleted.
448  // Use a WeakTrackingVH to observe whether this happens.
449  WeakTrackingVH WeakPH = PN;
450 
451  // Delete the old floating point exit comparison. The branch starts using the
452  // new comparison.
453  NewCompare->takeName(Compare);
454  Compare->replaceAllUsesWith(NewCompare);
456 
457  // Delete the old floating point increment.
458  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
460 
461  // If the FP induction variable still has uses, this is because something else
462  // in the loop uses its value. In order to canonicalize the induction
463  // variable, we chose to eliminate the IV and rewrite it in terms of an
464  // int->fp cast.
465  //
466  // We give preference to sitofp over uitofp because it is faster on most
467  // platforms.
468  if (WeakPH) {
469  Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
470  &*PN->getParent()->getFirstInsertionPt());
471  PN->replaceAllUsesWith(Conv);
473  }
474  return true;
475 }
476 
477 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
478  // First step. Check to see if there are any floating-point recurrences.
479  // If there are, change them into integer recurrences, permitting analysis by
480  // the SCEV routines.
481  BasicBlock *Header = L->getHeader();
482 
484  for (PHINode &PN : Header->phis())
485  PHIs.push_back(&PN);
486 
487  bool Changed = false;
488  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
489  if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
490  Changed |= handleFloatingPointIV(L, PN);
491 
492  // If the loop previously had floating-point IV, ScalarEvolution
493  // may not have been able to compute a trip count. Now that we've done some
494  // re-writing, the trip count may be computable.
495  if (Changed)
496  SE->forgetLoop(L);
497  return Changed;
498 }
499 
500 namespace {
501 
502 // Collect information about PHI nodes which can be transformed in
503 // rewriteLoopExitValues.
504 struct RewritePhi {
505  PHINode *PN;
506 
507  // Ith incoming value.
508  unsigned Ith;
509 
510  // Exit value after expansion.
511  Value *Val;
512 
513  // High Cost when expansion.
514  bool HighCost;
515 
516  RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
517  : PN(P), Ith(I), Val(V), HighCost(H) {}
518 };
519 
520 } // end anonymous namespace
521 
522 //===----------------------------------------------------------------------===//
523 // rewriteLoopExitValues - Optimize IV users outside the loop.
524 // As a side effect, reduces the amount of IV processing within the loop.
525 //===----------------------------------------------------------------------===//
526 
527 /// Check to see if this loop has a computable loop-invariant execution count.
528 /// If so, this means that we can compute the final value of any expressions
529 /// that are recurrent in the loop, and substitute the exit values from the loop
530 /// into any instructions outside of the loop that use the final values of the
531 /// current expressions.
532 ///
533 /// This is mostly redundant with the regular IndVarSimplify activities that
534 /// happen later, except that it's more powerful in some cases, because it's
535 /// able to brute-force evaluate arbitrary instructions as long as they have
536 /// constant operands at the beginning of the loop.
537 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
538  // Check a pre-condition.
539  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
540  "Indvars did not preserve LCSSA!");
541 
542  SmallVector<BasicBlock*, 8> ExitBlocks;
543  L->getUniqueExitBlocks(ExitBlocks);
544 
545  SmallVector<RewritePhi, 8> RewritePhiSet;
546  // Find all values that are computed inside the loop, but used outside of it.
547  // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
548  // the exit blocks of the loop to find them.
549  for (BasicBlock *ExitBB : ExitBlocks) {
550  // If there are no PHI nodes in this exit block, then no values defined
551  // inside the loop are used on this path, skip it.
552  PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
553  if (!PN) continue;
554 
555  unsigned NumPreds = PN->getNumIncomingValues();
556 
557  // Iterate over all of the PHI nodes.
558  BasicBlock::iterator BBI = ExitBB->begin();
559  while ((PN = dyn_cast<PHINode>(BBI++))) {
560  if (PN->use_empty())
561  continue; // dead use, don't replace it
562 
563  if (!SE->isSCEVable(PN->getType()))
564  continue;
565 
566  // It's necessary to tell ScalarEvolution about this explicitly so that
567  // it can walk the def-use list and forget all SCEVs, as it may not be
568  // watching the PHI itself. Once the new exit value is in place, there
569  // may not be a def-use connection between the loop and every instruction
570  // which got a SCEVAddRecExpr for that loop.
571  SE->forgetValue(PN);
572 
573  // Iterate over all of the values in all the PHI nodes.
574  for (unsigned i = 0; i != NumPreds; ++i) {
575  // If the value being merged in is not integer or is not defined
576  // in the loop, skip it.
577  Value *InVal = PN->getIncomingValue(i);
578  if (!isa<Instruction>(InVal))
579  continue;
580 
581  // If this pred is for a subloop, not L itself, skip it.
582  if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
583  continue; // The Block is in a subloop, skip it.
584 
585  // Check that InVal is defined in the loop.
586  Instruction *Inst = cast<Instruction>(InVal);
587  if (!L->contains(Inst))
588  continue;
589 
590  // Okay, this instruction has a user outside of the current loop
591  // and varies predictably *inside* the loop. Evaluate the value it
592  // contains when the loop exits, if possible.
593  const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
594  if (!SE->isLoopInvariant(ExitValue, L) ||
595  !isSafeToExpand(ExitValue, *SE))
596  continue;
597 
598  // Computing the value outside of the loop brings no benefit if :
599  // - it is definitely used inside the loop in a way which can not be
600  // optimized away.
601  // - no use outside of the loop can take advantage of hoisting the
602  // computation out of the loop
603  if (ExitValue->getSCEVType()>=scMulExpr) {
604  bool HasHardInternalUses = false;
605  bool HasSoftExternalUses = false;
606  for (auto *IB : Inst->users()) {
607  Instruction *UseInstr = cast<Instruction>(IB);
608  unsigned Opc = UseInstr->getOpcode();
609  if (L->contains(UseInstr)) {
610  if (Opc == Instruction::Call)
611  HasHardInternalUses = true;
612  } else {
613  if (Opc == Instruction::PHI) {
614  // Do not count the Phi as a use. LCSSA may have inserted
615  // plenty of trivial ones.
616  for (auto *PB : UseInstr->users()) {
617  unsigned PhiOpc = cast<Instruction>(PB)->getOpcode();
618  if (PhiOpc != Instruction::Call &&
619  PhiOpc != Instruction::Ret) {
620  HasSoftExternalUses = true;
621  break;
622  }
623  }
624  continue;
625  }
626  if (Opc != Instruction::Call && Opc != Instruction::Ret) {
627  HasSoftExternalUses = true;
628  break;
629  }
630  }
631  }
632  if (HasHardInternalUses && !HasSoftExternalUses)
633  continue;
634  }
635 
636  bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
637  Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
638 
639  LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
640  << '\n'
641  << " LoopVal = " << *Inst << "\n");
642 
643  if (!isValidRewrite(Inst, ExitVal)) {
644  DeadInsts.push_back(ExitVal);
645  continue;
646  }
647 
648 #ifndef NDEBUG
649  // If we reuse an instruction from a loop which is neither L nor one of
650  // its containing loops, we end up breaking LCSSA form for this loop by
651  // creating a new use of its instruction.
652  if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
653  if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
654  if (EVL != L)
655  assert(EVL->contains(L) && "LCSSA breach detected!");
656 #endif
657 
658  // Collect all the candidate PHINodes to be rewritten.
659  RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
660  }
661  }
662  }
663 
664  bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
665 
666  bool Changed = false;
667  // Transformation.
668  for (const RewritePhi &Phi : RewritePhiSet) {
669  PHINode *PN = Phi.PN;
670  Value *ExitVal = Phi.Val;
671 
672  // Only do the rewrite when the ExitValue can be expanded cheaply.
673  // If LoopCanBeDel is true, rewrite exit value aggressively.
674  if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
675  DeadInsts.push_back(ExitVal);
676  continue;
677  }
678 
679  Changed = true;
680  ++NumReplaced;
681  Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
682  PN->setIncomingValue(Phi.Ith, ExitVal);
683 
684  // If this instruction is dead now, delete it. Don't do it now to avoid
685  // invalidating iterators.
686  if (isInstructionTriviallyDead(Inst, TLI))
687  DeadInsts.push_back(Inst);
688 
689  // Replace PN with ExitVal if that is legal and does not break LCSSA.
690  if (PN->getNumIncomingValues() == 1 &&
691  LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
692  PN->replaceAllUsesWith(ExitVal);
693  PN->eraseFromParent();
694  }
695  }
696 
697  // The insertion point instruction may have been deleted; clear it out
698  // so that the rewriter doesn't trip over it later.
699  Rewriter.clearInsertPoint();
700  return Changed;
701 }
702 
703 //===---------------------------------------------------------------------===//
704 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
705 // they will exit at the first iteration.
706 //===---------------------------------------------------------------------===//
707 
708 /// Check to see if this loop has loop invariant conditions which lead to loop
709 /// exits. If so, we know that if the exit path is taken, it is at the first
710 /// loop iteration. This lets us predict exit values of PHI nodes that live in
711 /// loop header.
712 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
713  // Verify the input to the pass is already in LCSSA form.
714  assert(L->isLCSSAForm(*DT));
715 
716  SmallVector<BasicBlock *, 8> ExitBlocks;
717  L->getUniqueExitBlocks(ExitBlocks);
718  auto *LoopHeader = L->getHeader();
719  assert(LoopHeader && "Invalid loop");
720 
721  bool MadeAnyChanges = false;
722  for (auto *ExitBB : ExitBlocks) {
723  // If there are no more PHI nodes in this exit block, then no more
724  // values defined inside the loop are used on this path.
725  for (PHINode &PN : ExitBB->phis()) {
726  for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
727  IncomingValIdx != E; ++IncomingValIdx) {
728  auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
729 
730  // We currently only support loop exits from loop header. If the
731  // incoming block is not loop header, we need to recursively check
732  // all conditions starting from loop header are loop invariants.
733  // Additional support might be added in the future.
734  if (IncomingBB != LoopHeader)
735  continue;
736 
737  // Get condition that leads to the exit path.
738  auto *TermInst = IncomingBB->getTerminator();
739 
740  Value *Cond = nullptr;
741  if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
742  // Must be a conditional branch, otherwise the block
743  // should not be in the loop.
744  Cond = BI->getCondition();
745  } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
746  Cond = SI->getCondition();
747  else
748  continue;
749 
750  if (!L->isLoopInvariant(Cond))
751  continue;
752 
753  auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
754 
755  // Only deal with PHIs.
756  if (!ExitVal)
757  continue;
758 
759  // If ExitVal is a PHI on the loop header, then we know its
760  // value along this exit because the exit can only be taken
761  // on the first iteration.
762  auto *LoopPreheader = L->getLoopPreheader();
763  assert(LoopPreheader && "Invalid loop");
764  int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
765  if (PreheaderIdx != -1) {
766  assert(ExitVal->getParent() == LoopHeader &&
767  "ExitVal must be in loop header");
768  MadeAnyChanges = true;
769  PN.setIncomingValue(IncomingValIdx,
770  ExitVal->getIncomingValue(PreheaderIdx));
771  }
772  }
773  }
774  }
775  return MadeAnyChanges;
776 }
777 
778 /// Check whether it is possible to delete the loop after rewriting exit
779 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
780 /// aggressively.
781 bool IndVarSimplify::canLoopBeDeleted(
782  Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
783  BasicBlock *Preheader = L->getLoopPreheader();
784  // If there is no preheader, the loop will not be deleted.
785  if (!Preheader)
786  return false;
787 
788  // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
789  // We obviate multiple ExitingBlocks case for simplicity.
790  // TODO: If we see testcase with multiple ExitingBlocks can be deleted
791  // after exit value rewriting, we can enhance the logic here.
792  SmallVector<BasicBlock *, 4> ExitingBlocks;
793  L->getExitingBlocks(ExitingBlocks);
794  SmallVector<BasicBlock *, 8> ExitBlocks;
795  L->getUniqueExitBlocks(ExitBlocks);
796  if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
797  return false;
798 
799  BasicBlock *ExitBlock = ExitBlocks[0];
800  BasicBlock::iterator BI = ExitBlock->begin();
801  while (PHINode *P = dyn_cast<PHINode>(BI)) {
802  Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
803 
804  // If the Incoming value of P is found in RewritePhiSet, we know it
805  // could be rewritten to use a loop invariant value in transformation
806  // phase later. Skip it in the loop invariant check below.
807  bool found = false;
808  for (const RewritePhi &Phi : RewritePhiSet) {
809  unsigned i = Phi.Ith;
810  if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
811  found = true;
812  break;
813  }
814  }
815 
816  Instruction *I;
817  if (!found && (I = dyn_cast<Instruction>(Incoming)))
818  if (!L->hasLoopInvariantOperands(I))
819  return false;
820 
821  ++BI;
822  }
823 
824  for (auto *BB : L->blocks())
825  if (llvm::any_of(*BB, [](Instruction &I) {
826  return I.mayHaveSideEffects();
827  }))
828  return false;
829 
830  return true;
831 }
832 
833 //===----------------------------------------------------------------------===//
834 // IV Widening - Extend the width of an IV to cover its widest uses.
835 //===----------------------------------------------------------------------===//
836 
837 namespace {
838 
839 // Collect information about induction variables that are used by sign/zero
840 // extend operations. This information is recorded by CollectExtend and provides
841 // the input to WidenIV.
842 struct WideIVInfo {
843  PHINode *NarrowIV = nullptr;
844 
845  // Widest integer type created [sz]ext
846  Type *WidestNativeType = nullptr;
847 
848  // Was a sext user seen before a zext?
849  bool IsSigned = false;
850 };
851 
852 } // end anonymous namespace
853 
854 /// Update information about the induction variable that is extended by this
855 /// sign or zero extend operation. This is used to determine the final width of
856 /// the IV before actually widening it.
857 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
858  const TargetTransformInfo *TTI) {
859  bool IsSigned = Cast->getOpcode() == Instruction::SExt;
860  if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
861  return;
862 
863  Type *Ty = Cast->getType();
864  uint64_t Width = SE->getTypeSizeInBits(Ty);
865  if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
866  return;
867 
868  // Check that `Cast` actually extends the induction variable (we rely on this
869  // later). This takes care of cases where `Cast` is extending a truncation of
870  // the narrow induction variable, and thus can end up being narrower than the
871  // "narrow" induction variable.
872  uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
873  if (NarrowIVWidth >= Width)
874  return;
875 
876  // Cast is either an sext or zext up to this point.
877  // We should not widen an indvar if arithmetics on the wider indvar are more
878  // expensive than those on the narrower indvar. We check only the cost of ADD
879  // because at least an ADD is required to increment the induction variable. We
880  // could compute more comprehensively the cost of all instructions on the
881  // induction variable when necessary.
882  if (TTI &&
885  Cast->getOperand(0)->getType())) {
886  return;
887  }
888 
889  if (!WI.WidestNativeType) {
890  WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
891  WI.IsSigned = IsSigned;
892  return;
893  }
894 
895  // We extend the IV to satisfy the sign of its first user, arbitrarily.
896  if (WI.IsSigned != IsSigned)
897  return;
898 
899  if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
900  WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
901 }
902 
903 namespace {
904 
905 /// Record a link in the Narrow IV def-use chain along with the WideIV that
906 /// computes the same value as the Narrow IV def. This avoids caching Use*
907 /// pointers.
908 struct NarrowIVDefUse {
909  Instruction *NarrowDef = nullptr;
910  Instruction *NarrowUse = nullptr;
911  Instruction *WideDef = nullptr;
912 
913  // True if the narrow def is never negative. Tracking this information lets
914  // us use a sign extension instead of a zero extension or vice versa, when
915  // profitable and legal.
916  bool NeverNegative = false;
917 
918  NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
919  bool NeverNegative)
920  : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
921  NeverNegative(NeverNegative) {}
922 };
923 
924 /// The goal of this transform is to remove sign and zero extends without
925 /// creating any new induction variables. To do this, it creates a new phi of
926 /// the wider type and redirects all users, either removing extends or inserting
927 /// truncs whenever we stop propagating the type.
928 class WidenIV {
929  // Parameters
930  PHINode *OrigPhi;
931  Type *WideType;
932 
933  // Context
934  LoopInfo *LI;
935  Loop *L;
936  ScalarEvolution *SE;
937  DominatorTree *DT;
938 
939  // Does the module have any calls to the llvm.experimental.guard intrinsic
940  // at all? If not we can avoid scanning instructions looking for guards.
941  bool HasGuards;
942 
943  // Result
944  PHINode *WidePhi = nullptr;
945  Instruction *WideInc = nullptr;
946  const SCEV *WideIncExpr = nullptr;
948 
950  SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
951 
952  enum ExtendKind { ZeroExtended, SignExtended, Unknown };
953 
954  // A map tracking the kind of extension used to widen each narrow IV
955  // and narrow IV user.
956  // Key: pointer to a narrow IV or IV user.
957  // Value: the kind of extension used to widen this Instruction.
958  DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
959 
960  using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
961 
962  // A map with control-dependent ranges for post increment IV uses. The key is
963  // a pair of IV def and a use of this def denoting the context. The value is
964  // a ConstantRange representing possible values of the def at the given
965  // context.
966  DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
967 
968  Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
969  Instruction *UseI) {
970  DefUserPair Key(Def, UseI);
971  auto It = PostIncRangeInfos.find(Key);
972  return It == PostIncRangeInfos.end()
974  : Optional<ConstantRange>(It->second);
975  }
976 
977  void calculatePostIncRanges(PHINode *OrigPhi);
978  void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
979 
980  void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
981  DefUserPair Key(Def, UseI);
982  auto It = PostIncRangeInfos.find(Key);
983  if (It == PostIncRangeInfos.end())
984  PostIncRangeInfos.insert({Key, R});
985  else
986  It->second = R.intersectWith(It->second);
987  }
988 
989 public:
990  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
992  bool HasGuards)
993  : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
994  L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
995  HasGuards(HasGuards), DeadInsts(DI) {
996  assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
997  ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
998  }
999 
1000  PHINode *createWideIV(SCEVExpander &Rewriter);
1001 
1002 protected:
1003  Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1004  Instruction *Use);
1005 
1006  Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1007  Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1008  const SCEVAddRecExpr *WideAR);
1009  Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1010 
1011  ExtendKind getExtendKind(Instruction *I);
1012 
1013  using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1014 
1015  WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1016 
1017  WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1018 
1019  const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1020  unsigned OpCode) const;
1021 
1022  Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1023 
1024  bool widenLoopCompare(NarrowIVDefUse DU);
1025  bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1026  void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1027 
1028  void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1029 };
1030 
1031 } // end anonymous namespace
1032 
1033 /// Perform a quick domtree based check for loop invariance assuming that V is
1034 /// used within the loop. LoopInfo::isLoopInvariant() seems gratuitous for this
1035 /// purpose.
1036 static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
1037  Instruction *Inst = dyn_cast<Instruction>(V);
1038  if (!Inst)
1039  return true;
1040 
1041  return DT->properlyDominates(Inst->getParent(), L->getHeader());
1042 }
1043 
1044 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1045  bool IsSigned, Instruction *Use) {
1046  // Set the debug location and conservative insertion point.
1047  IRBuilder<> Builder(Use);
1048  // Hoist the insertion point into loop preheaders as far as possible.
1049  for (const Loop *L = LI->getLoopFor(Use->getParent());
1050  L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
1051  L = L->getParentLoop())
1053 
1054  return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1055  Builder.CreateZExt(NarrowOper, WideType);
1056 }
1057 
1058 /// Instantiate a wide operation to replace a narrow operation. This only needs
1059 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1060 /// 0 for any operation we decide not to clone.
1061 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1062  const SCEVAddRecExpr *WideAR) {
1063  unsigned Opcode = DU.NarrowUse->getOpcode();
1064  switch (Opcode) {
1065  default:
1066  return nullptr;
1067  case Instruction::Add:
1068  case Instruction::Mul:
1069  case Instruction::UDiv:
1070  case Instruction::Sub:
1071  return cloneArithmeticIVUser(DU, WideAR);
1072 
1073  case Instruction::And:
1074  case Instruction::Or:
1075  case Instruction::Xor:
1076  case Instruction::Shl:
1077  case Instruction::LShr:
1078  case Instruction::AShr:
1079  return cloneBitwiseIVUser(DU);
1080  }
1081 }
1082 
1083 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1084  Instruction *NarrowUse = DU.NarrowUse;
1085  Instruction *NarrowDef = DU.NarrowDef;
1086  Instruction *WideDef = DU.WideDef;
1087 
1088  LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1089 
1090  // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1091  // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1092  // invariant and will be folded or hoisted. If it actually comes from a
1093  // widened IV, it should be removed during a future call to widenIVUse.
1094  bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1095  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1096  ? WideDef
1097  : createExtendInst(NarrowUse->getOperand(0), WideType,
1098  IsSigned, NarrowUse);
1099  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1100  ? WideDef
1101  : createExtendInst(NarrowUse->getOperand(1), WideType,
1102  IsSigned, NarrowUse);
1103 
1104  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1105  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1106  NarrowBO->getName());
1107  IRBuilder<> Builder(NarrowUse);
1108  Builder.Insert(WideBO);
1109  WideBO->copyIRFlags(NarrowBO);
1110  return WideBO;
1111 }
1112 
1113 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1114  const SCEVAddRecExpr *WideAR) {
1115  Instruction *NarrowUse = DU.NarrowUse;
1116  Instruction *NarrowDef = DU.NarrowDef;
1117  Instruction *WideDef = DU.WideDef;
1118 
1119  LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1120 
1121  unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1122 
1123  // We're trying to find X such that
1124  //
1125  // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1126  //
1127  // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1128  // and check using SCEV if any of them are correct.
1129 
1130  // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1131  // correct solution to X.
1132  auto GuessNonIVOperand = [&](bool SignExt) {
1133  const SCEV *WideLHS;
1134  const SCEV *WideRHS;
1135 
1136  auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1137  if (SignExt)
1138  return SE->getSignExtendExpr(S, Ty);
1139  return SE->getZeroExtendExpr(S, Ty);
1140  };
1141 
1142  if (IVOpIdx == 0) {
1143  WideLHS = SE->getSCEV(WideDef);
1144  const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1145  WideRHS = GetExtend(NarrowRHS, WideType);
1146  } else {
1147  const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1148  WideLHS = GetExtend(NarrowLHS, WideType);
1149  WideRHS = SE->getSCEV(WideDef);
1150  }
1151 
1152  // WideUse is "WideDef `op.wide` X" as described in the comment.
1153  const SCEV *WideUse = nullptr;
1154 
1155  switch (NarrowUse->getOpcode()) {
1156  default:
1157  llvm_unreachable("No other possibility!");
1158 
1159  case Instruction::Add:
1160  WideUse = SE->getAddExpr(WideLHS, WideRHS);
1161  break;
1162 
1163  case Instruction::Mul:
1164  WideUse = SE->getMulExpr(WideLHS, WideRHS);
1165  break;
1166 
1167  case Instruction::UDiv:
1168  WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1169  break;
1170 
1171  case Instruction::Sub:
1172  WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1173  break;
1174  }
1175 
1176  return WideUse == WideAR;
1177  };
1178 
1179  bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1180  if (!GuessNonIVOperand(SignExtend)) {
1181  SignExtend = !SignExtend;
1182  if (!GuessNonIVOperand(SignExtend))
1183  return nullptr;
1184  }
1185 
1186  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1187  ? WideDef
1188  : createExtendInst(NarrowUse->getOperand(0), WideType,
1189  SignExtend, NarrowUse);
1190  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1191  ? WideDef
1192  : createExtendInst(NarrowUse->getOperand(1), WideType,
1193  SignExtend, NarrowUse);
1194 
1195  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1196  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1197  NarrowBO->getName());
1198 
1199  IRBuilder<> Builder(NarrowUse);
1200  Builder.Insert(WideBO);
1201  WideBO->copyIRFlags(NarrowBO);
1202  return WideBO;
1203 }
1204 
1205 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1206  auto It = ExtendKindMap.find(I);
1207  assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1208  return It->second;
1209 }
1210 
1211 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1212  unsigned OpCode) const {
1213  if (OpCode == Instruction::Add)
1214  return SE->getAddExpr(LHS, RHS);
1215  if (OpCode == Instruction::Sub)
1216  return SE->getMinusSCEV(LHS, RHS);
1217  if (OpCode == Instruction::Mul)
1218  return SE->getMulExpr(LHS, RHS);
1219 
1220  llvm_unreachable("Unsupported opcode.");
1221 }
1222 
1223 /// No-wrap operations can transfer sign extension of their result to their
1224 /// operands. Generate the SCEV value for the widened operation without
1225 /// actually modifying the IR yet. If the expression after extending the
1226 /// operands is an AddRec for this loop, return the AddRec and the kind of
1227 /// extension used.
1228 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1229  // Handle the common case of add<nsw/nuw>
1230  const unsigned OpCode = DU.NarrowUse->getOpcode();
1231  // Only Add/Sub/Mul instructions supported yet.
1232  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1233  OpCode != Instruction::Mul)
1234  return {nullptr, Unknown};
1235 
1236  // One operand (NarrowDef) has already been extended to WideDef. Now determine
1237  // if extending the other will lead to a recurrence.
1238  const unsigned ExtendOperIdx =
1239  DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1240  assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1241 
1242  const SCEV *ExtendOperExpr = nullptr;
1243  const OverflowingBinaryOperator *OBO =
1244  cast<OverflowingBinaryOperator>(DU.NarrowUse);
1245  ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1246  if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1247  ExtendOperExpr = SE->getSignExtendExpr(
1248  SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1249  else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1250  ExtendOperExpr = SE->getZeroExtendExpr(
1251  SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1252  else
1253  return {nullptr, Unknown};
1254 
1255  // When creating this SCEV expr, don't apply the current operations NSW or NUW
1256  // flags. This instruction may be guarded by control flow that the no-wrap
1257  // behavior depends on. Non-control-equivalent instructions can be mapped to
1258  // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1259  // semantics to those operations.
1260  const SCEV *lhs = SE->getSCEV(DU.WideDef);
1261  const SCEV *rhs = ExtendOperExpr;
1262 
1263  // Let's swap operands to the initial order for the case of non-commutative
1264  // operations, like SUB. See PR21014.
1265  if (ExtendOperIdx == 0)
1266  std::swap(lhs, rhs);
1267  const SCEVAddRecExpr *AddRec =
1268  dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1269 
1270  if (!AddRec || AddRec->getLoop() != L)
1271  return {nullptr, Unknown};
1272 
1273  return {AddRec, ExtKind};
1274 }
1275 
1276 /// Is this instruction potentially interesting for further simplification after
1277 /// widening it's type? In other words, can the extend be safely hoisted out of
1278 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1279 /// so, return the extended recurrence and the kind of extension used. Otherwise
1280 /// return {nullptr, Unknown}.
1281 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1282  if (!SE->isSCEVable(DU.NarrowUse->getType()))
1283  return {nullptr, Unknown};
1284 
1285  const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1286  if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1287  SE->getTypeSizeInBits(WideType)) {
1288  // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1289  // index. So don't follow this use.
1290  return {nullptr, Unknown};
1291  }
1292 
1293  const SCEV *WideExpr;
1294  ExtendKind ExtKind;
1295  if (DU.NeverNegative) {
1296  WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1297  if (isa<SCEVAddRecExpr>(WideExpr))
1298  ExtKind = SignExtended;
1299  else {
1300  WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1301  ExtKind = ZeroExtended;
1302  }
1303  } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1304  WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1305  ExtKind = SignExtended;
1306  } else {
1307  WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1308  ExtKind = ZeroExtended;
1309  }
1310  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1311  if (!AddRec || AddRec->getLoop() != L)
1312  return {nullptr, Unknown};
1313  return {AddRec, ExtKind};
1314 }
1315 
1316 /// This IV user cannot be widen. Replace this use of the original narrow IV
1317 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1318 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1319  LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1320  << *DU.NarrowUse << "\n");
1321  IRBuilder<> Builder(
1322  getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
1323  Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1324  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1325 }
1326 
1327 /// If the narrow use is a compare instruction, then widen the compare
1328 // (and possibly the other operand). The extend operation is hoisted into the
1329 // loop preheader as far as possible.
1330 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1331  ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1332  if (!Cmp)
1333  return false;
1334 
1335  // We can legally widen the comparison in the following two cases:
1336  //
1337  // - The signedness of the IV extension and comparison match
1338  //
1339  // - The narrow IV is always positive (and thus its sign extension is equal
1340  // to its zero extension). For instance, let's say we're zero extending
1341  // %narrow for the following use
1342  //
1343  // icmp slt i32 %narrow, %val ... (A)
1344  //
1345  // and %narrow is always positive. Then
1346  //
1347  // (A) == icmp slt i32 sext(%narrow), sext(%val)
1348  // == icmp slt i32 zext(%narrow), sext(%val)
1349  bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1350  if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1351  return false;
1352 
1353  Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1354  unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1355  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1356  assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1357 
1358  // Widen the compare instruction.
1359  IRBuilder<> Builder(
1360  getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
1361  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1362 
1363  // Widen the other operand of the compare, if necessary.
1364  if (CastWidth < IVWidth) {
1365  Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1366  DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1367  }
1368  return true;
1369 }
1370 
1371 /// If the narrow use is an instruction whose two operands are the defining
1372 /// instruction of DU and a load instruction, then we have the following:
1373 /// if the load is hoisted outside the loop, then we do not reach this function
1374 /// as scalar evolution analysis works fine in widenIVUse with variables
1375 /// hoisted outside the loop and efficient code is subsequently generated by
1376 /// not emitting truncate instructions. But when the load is not hoisted
1377 /// (whether due to limitation in alias analysis or due to a true legality),
1378 /// then scalar evolution can not proceed with loop variant values and
1379 /// inefficient code is generated. This function handles the non-hoisted load
1380 /// special case by making the optimization generate the same type of code for
1381 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1382 /// instruction). This special case is important especially when the induction
1383 /// variables are affecting addressing mode in code generation.
1384 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1385  Instruction *NarrowUse = DU.NarrowUse;
1386  Instruction *NarrowDef = DU.NarrowDef;
1387  Instruction *WideDef = DU.WideDef;
1388 
1389  // Handle the common case of add<nsw/nuw>
1390  const unsigned OpCode = NarrowUse->getOpcode();
1391  // Only Add/Sub/Mul instructions are supported.
1392  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1393  OpCode != Instruction::Mul)
1394  return false;
1395 
1396  // The operand that is not defined by NarrowDef of DU. Let's call it the
1397  // other operand.
1398  unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1399  assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1400  "bad DU");
1401 
1402  const SCEV *ExtendOperExpr = nullptr;
1403  const OverflowingBinaryOperator *OBO =
1404  cast<OverflowingBinaryOperator>(NarrowUse);
1405  ExtendKind ExtKind = getExtendKind(NarrowDef);
1406  if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1407  ExtendOperExpr = SE->getSignExtendExpr(
1408  SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1409  else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1410  ExtendOperExpr = SE->getZeroExtendExpr(
1411  SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1412  else
1413  return false;
1414 
1415  // We are interested in the other operand being a load instruction.
1416  // But, we should look into relaxing this restriction later on.
1417  auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1418  if (I && I->getOpcode() != Instruction::Load)
1419  return false;
1420 
1421  // Verifying that Defining operand is an AddRec
1422  const SCEV *Op1 = SE->getSCEV(WideDef);
1423  const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1424  if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1425  return false;
1426  // Verifying that other operand is an Extend.
1427  if (ExtKind == SignExtended) {
1428  if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1429  return false;
1430  } else {
1431  if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1432  return false;
1433  }
1434 
1435  if (ExtKind == SignExtended) {
1436  for (Use &U : NarrowUse->uses()) {
1437  SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1438  if (!User || User->getType() != WideType)
1439  return false;
1440  }
1441  } else { // ExtKind == ZeroExtended
1442  for (Use &U : NarrowUse->uses()) {
1443  ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1444  if (!User || User->getType() != WideType)
1445  return false;
1446  }
1447  }
1448 
1449  return true;
1450 }
1451 
1452 /// Special Case for widening with variant Loads (see
1453 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1454 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1455  Instruction *NarrowUse = DU.NarrowUse;
1456  Instruction *NarrowDef = DU.NarrowDef;
1457  Instruction *WideDef = DU.WideDef;
1458 
1459  ExtendKind ExtKind = getExtendKind(NarrowDef);
1460 
1461  LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1462 
1463  // Generating a widening use instruction.
1464  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1465  ? WideDef
1466  : createExtendInst(NarrowUse->getOperand(0), WideType,
1467  ExtKind, NarrowUse);
1468  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1469  ? WideDef
1470  : createExtendInst(NarrowUse->getOperand(1), WideType,
1471  ExtKind, NarrowUse);
1472 
1473  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1474  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1475  NarrowBO->getName());
1476  IRBuilder<> Builder(NarrowUse);
1477  Builder.Insert(WideBO);
1478  WideBO->copyIRFlags(NarrowBO);
1479 
1480  if (ExtKind == SignExtended)
1481  ExtendKindMap[NarrowUse] = SignExtended;
1482  else
1483  ExtendKindMap[NarrowUse] = ZeroExtended;
1484 
1485  // Update the Use.
1486  if (ExtKind == SignExtended) {
1487  for (Use &U : NarrowUse->uses()) {
1488  SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1489  if (User && User->getType() == WideType) {
1490  LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1491  << *WideBO << "\n");
1492  ++NumElimExt;
1493  User->replaceAllUsesWith(WideBO);
1494  DeadInsts.emplace_back(User);
1495  }
1496  }
1497  } else { // ExtKind == ZeroExtended
1498  for (Use &U : NarrowUse->uses()) {
1499  ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1500  if (User && User->getType() == WideType) {
1501  LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1502  << *WideBO << "\n");
1503  ++NumElimExt;
1504  User->replaceAllUsesWith(WideBO);
1505  DeadInsts.emplace_back(User);
1506  }
1507  }
1508  }
1509 }
1510 
1511 /// Determine whether an individual user of the narrow IV can be widened. If so,
1512 /// return the wide clone of the user.
1513 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1514  assert(ExtendKindMap.count(DU.NarrowDef) &&
1515  "Should already know the kind of extension used to widen NarrowDef");
1516 
1517  // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1518  if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1519  if (LI->getLoopFor(UsePhi->getParent()) != L) {
1520  // For LCSSA phis, sink the truncate outside the loop.
1521  // After SimplifyCFG most loop exit targets have a single predecessor.
1522  // Otherwise fall back to a truncate within the loop.
1523  if (UsePhi->getNumOperands() != 1)
1524  truncateIVUse(DU, DT, LI);
1525  else {
1526  // Widening the PHI requires us to insert a trunc. The logical place
1527  // for this trunc is in the same BB as the PHI. This is not possible if
1528  // the BB is terminated by a catchswitch.
1529  if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1530  return nullptr;
1531 
1532  PHINode *WidePhi =
1533  PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1534  UsePhi);
1535  WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1536  IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1537  Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1538  UsePhi->replaceAllUsesWith(Trunc);
1539  DeadInsts.emplace_back(UsePhi);
1540  LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1541  << *WidePhi << "\n");
1542  }
1543  return nullptr;
1544  }
1545  }
1546 
1547  // This narrow use can be widened by a sext if it's non-negative or its narrow
1548  // def was widended by a sext. Same for zext.
1549  auto canWidenBySExt = [&]() {
1550  return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1551  };
1552  auto canWidenByZExt = [&]() {
1553  return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1554  };
1555 
1556  // Our raison d'etre! Eliminate sign and zero extension.
1557  if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1558  (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1559  Value *NewDef = DU.WideDef;
1560  if (DU.NarrowUse->getType() != WideType) {
1561  unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1562  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1563  if (CastWidth < IVWidth) {
1564  // The cast isn't as wide as the IV, so insert a Trunc.
1565  IRBuilder<> Builder(DU.NarrowUse);
1566  NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1567  }
1568  else {
1569  // A wider extend was hidden behind a narrower one. This may induce
1570  // another round of IV widening in which the intermediate IV becomes
1571  // dead. It should be very rare.
1572  LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1573  << " not wide enough to subsume " << *DU.NarrowUse
1574  << "\n");
1575  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1576  NewDef = DU.NarrowUse;
1577  }
1578  }
1579  if (NewDef != DU.NarrowUse) {
1580  LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1581  << " replaced by " << *DU.WideDef << "\n");
1582  ++NumElimExt;
1583  DU.NarrowUse->replaceAllUsesWith(NewDef);
1584  DeadInsts.emplace_back(DU.NarrowUse);
1585  }
1586  // Now that the extend is gone, we want to expose it's uses for potential
1587  // further simplification. We don't need to directly inform SimplifyIVUsers
1588  // of the new users, because their parent IV will be processed later as a
1589  // new loop phi. If we preserved IVUsers analysis, we would also want to
1590  // push the uses of WideDef here.
1591 
1592  // No further widening is needed. The deceased [sz]ext had done it for us.
1593  return nullptr;
1594  }
1595 
1596  // Does this user itself evaluate to a recurrence after widening?
1597  WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1598  if (!WideAddRec.first)
1599  WideAddRec = getWideRecurrence(DU);
1600 
1601  assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1602  if (!WideAddRec.first) {
1603  // If use is a loop condition, try to promote the condition instead of
1604  // truncating the IV first.
1605  if (widenLoopCompare(DU))
1606  return nullptr;
1607 
1608  // We are here about to generate a truncate instruction that may hurt
1609  // performance because the scalar evolution expression computed earlier
1610  // in WideAddRec.first does not indicate a polynomial induction expression.
1611  // In that case, look at the operands of the use instruction to determine
1612  // if we can still widen the use instead of truncating its operand.
1613  if (widenWithVariantLoadUse(DU)) {
1614  widenWithVariantLoadUseCodegen(DU);
1615  return nullptr;
1616  }
1617 
1618  // This user does not evaluate to a recurrence after widening, so don't
1619  // follow it. Instead insert a Trunc to kill off the original use,
1620  // eventually isolating the original narrow IV so it can be removed.
1621  truncateIVUse(DU, DT, LI);
1622  return nullptr;
1623  }
1624  // Assume block terminators cannot evaluate to a recurrence. We can't to
1625  // insert a Trunc after a terminator if there happens to be a critical edge.
1626  assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1627  "SCEV is not expected to evaluate a block terminator");
1628 
1629  // Reuse the IV increment that SCEVExpander created as long as it dominates
1630  // NarrowUse.
1631  Instruction *WideUse = nullptr;
1632  if (WideAddRec.first == WideIncExpr &&
1633  Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1634  WideUse = WideInc;
1635  else {
1636  WideUse = cloneIVUser(DU, WideAddRec.first);
1637  if (!WideUse)
1638  return nullptr;
1639  }
1640  // Evaluation of WideAddRec ensured that the narrow expression could be
1641  // extended outside the loop without overflow. This suggests that the wide use
1642  // evaluates to the same expression as the extended narrow use, but doesn't
1643  // absolutely guarantee it. Hence the following failsafe check. In rare cases
1644  // where it fails, we simply throw away the newly created wide use.
1645  if (WideAddRec.first != SE->getSCEV(WideUse)) {
1646  LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1647  << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1648  << "\n");
1649  DeadInsts.emplace_back(WideUse);
1650  return nullptr;
1651  }
1652 
1653  ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1654  // Returning WideUse pushes it on the worklist.
1655  return WideUse;
1656 }
1657 
1658 /// Add eligible users of NarrowDef to NarrowIVUsers.
1659 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1660  const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1661  bool NonNegativeDef =
1662  SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1663  SE->getConstant(NarrowSCEV->getType(), 0));
1664  for (User *U : NarrowDef->users()) {
1665  Instruction *NarrowUser = cast<Instruction>(U);
1666 
1667  // Handle data flow merges and bizarre phi cycles.
1668  if (!Widened.insert(NarrowUser).second)
1669  continue;
1670 
1671  bool NonNegativeUse = false;
1672  if (!NonNegativeDef) {
1673  // We might have a control-dependent range information for this context.
1674  if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1675  NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1676  }
1677 
1678  NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1679  NonNegativeDef || NonNegativeUse);
1680  }
1681 }
1682 
1683 /// Process a single induction variable. First use the SCEVExpander to create a
1684 /// wide induction variable that evaluates to the same recurrence as the
1685 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1686 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1687 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1688 ///
1689 /// It would be simpler to delete uses as they are processed, but we must avoid
1690 /// invalidating SCEV expressions.
1691 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1692  // Is this phi an induction variable?
1693  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1694  if (!AddRec)
1695  return nullptr;
1696 
1697  // Widen the induction variable expression.
1698  const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1699  ? SE->getSignExtendExpr(AddRec, WideType)
1700  : SE->getZeroExtendExpr(AddRec, WideType);
1701 
1702  assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1703  "Expect the new IV expression to preserve its type");
1704 
1705  // Can the IV be extended outside the loop without overflow?
1706  AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1707  if (!AddRec || AddRec->getLoop() != L)
1708  return nullptr;
1709 
1710  // An AddRec must have loop-invariant operands. Since this AddRec is
1711  // materialized by a loop header phi, the expression cannot have any post-loop
1712  // operands, so they must dominate the loop header.
1713  assert(
1714  SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1715  SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1716  "Loop header phi recurrence inputs do not dominate the loop");
1717 
1718  // Iterate over IV uses (including transitive ones) looking for IV increments
1719  // of the form 'add nsw %iv, <const>'. For each increment and each use of
1720  // the increment calculate control-dependent range information basing on
1721  // dominating conditions inside of the loop (e.g. a range check inside of the
1722  // loop). Calculated ranges are stored in PostIncRangeInfos map.
1723  //
1724  // Control-dependent range information is later used to prove that a narrow
1725  // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1726  // this on demand because when pushNarrowIVUsers needs this information some
1727  // of the dominating conditions might be already widened.
1728  if (UsePostIncrementRanges)
1729  calculatePostIncRanges(OrigPhi);
1730 
1731  // The rewriter provides a value for the desired IV expression. This may
1732  // either find an existing phi or materialize a new one. Either way, we
1733  // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1734  // of the phi-SCC dominates the loop entry.
1735  Instruction *InsertPt = &L->getHeader()->front();
1736  WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1737 
1738  // Remembering the WideIV increment generated by SCEVExpander allows
1739  // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1740  // employ a general reuse mechanism because the call above is the only call to
1741  // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1742  if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1743  WideInc =
1744  cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1745  WideIncExpr = SE->getSCEV(WideInc);
1746  // Propagate the debug location associated with the original loop increment
1747  // to the new (widened) increment.
1748  auto *OrigInc =
1749  cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1750  WideInc->setDebugLoc(OrigInc->getDebugLoc());
1751  }
1752 
1753  LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1754  ++NumWidened;
1755 
1756  // Traverse the def-use chain using a worklist starting at the original IV.
1757  assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1758 
1759  Widened.insert(OrigPhi);
1760  pushNarrowIVUsers(OrigPhi, WidePhi);
1761 
1762  while (!NarrowIVUsers.empty()) {
1763  NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1764 
1765  // Process a def-use edge. This may replace the use, so don't hold a
1766  // use_iterator across it.
1767  Instruction *WideUse = widenIVUse(DU, Rewriter);
1768 
1769  // Follow all def-use edges from the previous narrow use.
1770  if (WideUse)
1771  pushNarrowIVUsers(DU.NarrowUse, WideUse);
1772 
1773  // widenIVUse may have removed the def-use edge.
1774  if (DU.NarrowDef->use_empty())
1775  DeadInsts.emplace_back(DU.NarrowDef);
1776  }
1777 
1778  // Attach any debug information to the new PHI. Since OrigPhi and WidePHI
1779  // evaluate the same recurrence, we can just copy the debug info over.
1781  llvm::findDbgValues(DbgValues, OrigPhi);
1782  auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(),
1783  ValueAsMetadata::get(WidePhi));
1784  for (auto &DbgValue : DbgValues)
1785  DbgValue->setOperand(0, MDPhi);
1786  return WidePhi;
1787 }
1788 
1789 /// Calculates control-dependent range for the given def at the given context
1790 /// by looking at dominating conditions inside of the loop
1791 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1792  Instruction *NarrowUser) {
1793  using namespace llvm::PatternMatch;
1794 
1795  Value *NarrowDefLHS;
1796  const APInt *NarrowDefRHS;
1797  if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1798  m_APInt(NarrowDefRHS))) ||
1799  !NarrowDefRHS->isNonNegative())
1800  return;
1801 
1802  auto UpdateRangeFromCondition = [&] (Value *Condition,
1803  bool TrueDest) {
1804  CmpInst::Predicate Pred;
1805  Value *CmpRHS;
1806  if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1807  m_Value(CmpRHS))))
1808  return;
1809 
1811  TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1812 
1813  auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1814  auto CmpConstrainedLHSRange =
1815  ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1816  auto NarrowDefRange =
1817  CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
1818 
1819  updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1820  };
1821 
1822  auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1823  if (!HasGuards)
1824  return;
1825 
1826  for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1827  Ctx->getParent()->rend())) {
1828  Value *C = nullptr;
1829  if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1830  UpdateRangeFromCondition(C, /*TrueDest=*/true);
1831  }
1832  };
1833 
1834  UpdateRangeFromGuards(NarrowUser);
1835 
1836  BasicBlock *NarrowUserBB = NarrowUser->getParent();
1837  // If NarrowUserBB is statically unreachable asking dominator queries may
1838  // yield surprising results. (e.g. the block may not have a dom tree node)
1839  if (!DT->isReachableFromEntry(NarrowUserBB))
1840  return;
1841 
1842  for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1843  L->contains(DTB->getBlock());
1844  DTB = DTB->getIDom()) {
1845  auto *BB = DTB->getBlock();
1846  auto *TI = BB->getTerminator();
1847  UpdateRangeFromGuards(TI);
1848 
1849  auto *BI = dyn_cast<BranchInst>(TI);
1850  if (!BI || !BI->isConditional())
1851  continue;
1852 
1853  auto *TrueSuccessor = BI->getSuccessor(0);
1854  auto *FalseSuccessor = BI->getSuccessor(1);
1855 
1856  auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1857  return BBE.isSingleEdge() &&
1858  DT->dominates(BBE, NarrowUser->getParent());
1859  };
1860 
1861  if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1862  UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1863 
1864  if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1865  UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1866  }
1867 }
1868 
1869 /// Calculates PostIncRangeInfos map for the given IV
1870 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1873  Worklist.push_back(OrigPhi);
1874  Visited.insert(OrigPhi);
1875 
1876  while (!Worklist.empty()) {
1877  Instruction *NarrowDef = Worklist.pop_back_val();
1878 
1879  for (Use &U : NarrowDef->uses()) {
1880  auto *NarrowUser = cast<Instruction>(U.getUser());
1881 
1882  // Don't go looking outside the current loop.
1883  auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1884  if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1885  continue;
1886 
1887  if (!Visited.insert(NarrowUser).second)
1888  continue;
1889 
1890  Worklist.push_back(NarrowUser);
1891 
1892  calculatePostIncRange(NarrowDef, NarrowUser);
1893  }
1894  }
1895 }
1896 
1897 //===----------------------------------------------------------------------===//
1898 // Live IV Reduction - Minimize IVs live across the loop.
1899 //===----------------------------------------------------------------------===//
1900 
1901 //===----------------------------------------------------------------------===//
1902 // Simplification of IV users based on SCEV evaluation.
1903 //===----------------------------------------------------------------------===//
1904 
1905 namespace {
1906 
1907 class IndVarSimplifyVisitor : public IVVisitor {
1908  ScalarEvolution *SE;
1909  const TargetTransformInfo *TTI;
1910  PHINode *IVPhi;
1911 
1912 public:
1913  WideIVInfo WI;
1914 
1915  IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1916  const TargetTransformInfo *TTI,
1917  const DominatorTree *DTree)
1918  : SE(SCEV), TTI(TTI), IVPhi(IV) {
1919  DT = DTree;
1920  WI.NarrowIV = IVPhi;
1921  }
1922 
1923  // Implement the interface used by simplifyUsersOfIV.
1924  void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1925 };
1926 
1927 } // end anonymous namespace
1928 
1929 /// Iteratively perform simplification on a worklist of IV users. Each
1930 /// successive simplification may push more users which may themselves be
1931 /// candidates for simplification.
1932 ///
1933 /// Sign/Zero extend elimination is interleaved with IV simplification.
1934 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1935  SCEVExpander &Rewriter,
1936  LoopInfo *LI) {
1938 
1939  auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1940  Intrinsic::getName(Intrinsic::experimental_guard));
1941  bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1942 
1943  SmallVector<PHINode*, 8> LoopPhis;
1944  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1945  LoopPhis.push_back(cast<PHINode>(I));
1946  }
1947  // Each round of simplification iterates through the SimplifyIVUsers worklist
1948  // for all current phis, then determines whether any IVs can be
1949  // widened. Widening adds new phis to LoopPhis, inducing another round of
1950  // simplification on the wide IVs.
1951  bool Changed = false;
1952  while (!LoopPhis.empty()) {
1953  // Evaluate as many IV expressions as possible before widening any IVs. This
1954  // forces SCEV to set no-wrap flags before evaluating sign/zero
1955  // extension. The first time SCEV attempts to normalize sign/zero extension,
1956  // the result becomes final. So for the most predictable results, we delay
1957  // evaluation of sign/zero extend evaluation until needed, and avoid running
1958  // other SCEV based analysis prior to simplifyAndExtend.
1959  do {
1960  PHINode *CurrIV = LoopPhis.pop_back_val();
1961 
1962  // Information about sign/zero extensions of CurrIV.
1963  IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1964 
1965  Changed |=
1966  simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1967 
1968  if (Visitor.WI.WidestNativeType) {
1969  WideIVs.push_back(Visitor.WI);
1970  }
1971  } while(!LoopPhis.empty());
1972 
1973  for (; !WideIVs.empty(); WideIVs.pop_back()) {
1974  WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1975  if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1976  Changed = true;
1977  LoopPhis.push_back(WidePhi);
1978  }
1979  }
1980  }
1981  return Changed;
1982 }
1983 
1984 //===----------------------------------------------------------------------===//
1985 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1986 //===----------------------------------------------------------------------===//
1987 
1988 /// Return true if this loop's backedge taken count expression can be safely and
1989 /// cheaply expanded into an instruction sequence that can be used by
1990 /// linearFunctionTestReplace.
1991 ///
1992 /// TODO: This fails for pointer-type loop counters with greater than one byte
1993 /// strides, consequently preventing LFTR from running. For the purpose of LFTR
1994 /// we could skip this check in the case that the LFTR loop counter (chosen by
1995 /// FindLoopCounter) is also pointer type. Instead, we could directly convert
1996 /// the loop test to an inequality test by checking the target data's alignment
1997 /// of element types (given that the initial pointer value originates from or is
1998 /// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
1999 /// However, we don't yet have a strong motivation for converting loop tests
2000 /// into inequality tests.
2002  SCEVExpander &Rewriter) {
2003  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2004  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
2005  BackedgeTakenCount->isZero())
2006  return false;
2007 
2008  if (!L->getExitingBlock())
2009  return false;
2010 
2011  // Can't rewrite non-branch yet.
2012  if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
2013  return false;
2014 
2015  if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
2016  return false;
2017 
2018  return true;
2019 }
2020 
2021 /// Return the loop header phi IFF IncV adds a loop invariant value to the phi.
2023  Instruction *IncI = dyn_cast<Instruction>(IncV);
2024  if (!IncI)
2025  return nullptr;
2026 
2027  switch (IncI->getOpcode()) {
2028  case Instruction::Add:
2029  case Instruction::Sub:
2030  break;
2031  case Instruction::GetElementPtr:
2032  // An IV counter must preserve its type.
2033  if (IncI->getNumOperands() == 2)
2034  break;
2036  default:
2037  return nullptr;
2038  }
2039 
2040  PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2041  if (Phi && Phi->getParent() == L->getHeader()) {
2042  if (isLoopInvariant(IncI->getOperand(1), L, DT))
2043  return Phi;
2044  return nullptr;
2045  }
2046  if (IncI->getOpcode() == Instruction::GetElementPtr)
2047  return nullptr;
2048 
2049  // Allow add/sub to be commuted.
2050  Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2051  if (Phi && Phi->getParent() == L->getHeader()) {
2052  if (isLoopInvariant(IncI->getOperand(0), L, DT))
2053  return Phi;
2054  }
2055  return nullptr;
2056 }
2057 
2058 /// Return the compare guarding the loop latch, or NULL for unrecognized tests.
2060  assert(L->getExitingBlock() && "expected loop exit");
2061 
2062  BasicBlock *LatchBlock = L->getLoopLatch();
2063  // Don't bother with LFTR if the loop is not properly simplified.
2064  if (!LatchBlock)
2065  return nullptr;
2066 
2068  assert(BI && "expected exit branch");
2069 
2070  return dyn_cast<ICmpInst>(BI->getCondition());
2071 }
2072 
2073 /// linearFunctionTestReplace policy. Return true unless we can show that the
2074 /// current exit test is already sufficiently canonical.
2075 static bool needsLFTR(Loop *L, DominatorTree *DT) {
2076  // Do LFTR to simplify the exit condition to an ICMP.
2077  ICmpInst *Cond = getLoopTest(L);
2078  if (!Cond)
2079  return true;
2080 
2081  // Do LFTR to simplify the exit ICMP to EQ/NE
2082  ICmpInst::Predicate Pred = Cond->getPredicate();
2083  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2084  return true;
2085 
2086  // Look for a loop invariant RHS
2087  Value *LHS = Cond->getOperand(0);
2088  Value *RHS = Cond->getOperand(1);
2089  if (!isLoopInvariant(RHS, L, DT)) {
2090  if (!isLoopInvariant(LHS, L, DT))
2091  return true;
2092  std::swap(LHS, RHS);
2093  }
2094  // Look for a simple IV counter LHS
2095  PHINode *Phi = dyn_cast<PHINode>(LHS);
2096  if (!Phi)
2097  Phi = getLoopPhiForCounter(LHS, L, DT);
2098 
2099  if (!Phi)
2100  return true;
2101 
2102  // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2103  int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2104  if (Idx < 0)
2105  return true;
2106 
2107  // Do LFTR if the exit condition's IV is *not* a simple counter.
2108  Value *IncV = Phi->getIncomingValue(Idx);
2109  return Phi != getLoopPhiForCounter(IncV, L, DT);
2110 }
2111 
2112 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2113 /// down to checking that all operands are constant and listing instructions
2114 /// that may hide undef.
2116  unsigned Depth) {
2117  if (isa<Constant>(V))
2118  return !isa<UndefValue>(V);
2119 
2120  if (Depth >= 6)
2121  return false;
2122 
2123  // Conservatively handle non-constant non-instructions. For example, Arguments
2124  // may be undef.
2125  Instruction *I = dyn_cast<Instruction>(V);
2126  if (!I)
2127  return false;
2128 
2129  // Load and return values may be undef.
2130  if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2131  return false;
2132 
2133  // Optimistically handle other instructions.
2134  for (Value *Op : I->operands()) {
2135  if (!Visited.insert(Op).second)
2136  continue;
2137  if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2138  return false;
2139  }
2140  return true;
2141 }
2142 
2143 /// Return true if the given value is concrete. We must prove that undef can
2144 /// never reach it.
2145 ///
2146 /// TODO: If we decide that this is a good approach to checking for undef, we
2147 /// may factor it into a common location.
2148 static bool hasConcreteDef(Value *V) {
2149  SmallPtrSet<Value*, 8> Visited;
2150  Visited.insert(V);
2151  return hasConcreteDefImpl(V, Visited, 0);
2152 }
2153 
2154 /// Return true if this IV has any uses other than the (soon to be rewritten)
2155 /// loop exit test.
2156 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2157  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2158  Value *IncV = Phi->getIncomingValue(LatchIdx);
2159 
2160  for (User *U : Phi->users())
2161  if (U != Cond && U != IncV) return false;
2162 
2163  for (User *U : IncV->users())
2164  if (U != Cond && U != Phi) return false;
2165  return true;
2166 }
2167 
2168 /// Find an affine IV in canonical form.
2169 ///
2170 /// BECount may be an i8* pointer type. The pointer difference is already
2171 /// valid count without scaling the address stride, so it remains a pointer
2172 /// expression as far as SCEV is concerned.
2173 ///
2174 /// Currently only valid for LFTR. See the comments on hasConcreteDef below.
2175 ///
2176 /// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
2177 ///
2178 /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
2179 /// This is difficult in general for SCEV because of potential overflow. But we
2180 /// could at least handle constant BECounts.
2181 static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
2182  ScalarEvolution *SE, DominatorTree *DT) {
2183  uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2184 
2185  Value *Cond =
2186  cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
2187 
2188  // Loop over all of the PHI nodes, looking for a simple counter.
2189  PHINode *BestPhi = nullptr;
2190  const SCEV *BestInit = nullptr;
2191  BasicBlock *LatchBlock = L->getLoopLatch();
2192  assert(LatchBlock && "needsLFTR should guarantee a loop latch");
2193  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2194 
2195  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2196  PHINode *Phi = cast<PHINode>(I);
2197  if (!SE->isSCEVable(Phi->getType()))
2198  continue;
2199 
2200  // Avoid comparing an integer IV against a pointer Limit.
2201  if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2202  continue;
2203 
2204  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2205  if (!AR || AR->getLoop() != L || !AR->isAffine())
2206  continue;
2207 
2208  // AR may be a pointer type, while BECount is an integer type.
2209  // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2210  // AR may not be a narrower type, or we may never exit.
2211  uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2212  if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2213  continue;
2214 
2215  const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2216  if (!Step || !Step->isOne())
2217  continue;
2218 
2219  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2220  Value *IncV = Phi->getIncomingValue(LatchIdx);
2221  if (getLoopPhiForCounter(IncV, L, DT) != Phi)
2222  continue;
2223 
2224  // Avoid reusing a potentially undef value to compute other values that may
2225  // have originally had a concrete definition.
2226  if (!hasConcreteDef(Phi)) {
2227  // We explicitly allow unknown phis as long as they are already used by
2228  // the loop test. In this case we assume that performing LFTR could not
2229  // increase the number of undef users.
2230  if (ICmpInst *Cond = getLoopTest(L)) {
2231  if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT) &&
2232  Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
2233  continue;
2234  }
2235  }
2236  }
2237  const SCEV *Init = AR->getStart();
2238 
2239  if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2240  // Don't force a live loop counter if another IV can be used.
2241  if (AlmostDeadIV(Phi, LatchBlock, Cond))
2242  continue;
2243 
2244  // Prefer to count-from-zero. This is a more "canonical" counter form. It
2245  // also prefers integer to pointer IVs.
2246  if (BestInit->isZero() != Init->isZero()) {
2247  if (BestInit->isZero())
2248  continue;
2249  }
2250  // If two IVs both count from zero or both count from nonzero then the
2251  // narrower is likely a dead phi that has been widened. Use the wider phi
2252  // to allow the other to be eliminated.
2253  else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2254  continue;
2255  }
2256  BestPhi = Phi;
2257  BestInit = Init;
2258  }
2259  return BestPhi;
2260 }
2261 
2262 /// Help linearFunctionTestReplace by generating a value that holds the RHS of
2263 /// the new loop test.
2264 static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
2265  SCEVExpander &Rewriter, ScalarEvolution *SE) {
2266  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2267  assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
2268  const SCEV *IVInit = AR->getStart();
2269 
2270  // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
2271  // finds a valid pointer IV. Sign extend BECount in order to materialize a
2272  // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2273  // the existing GEPs whenever possible.
2274  if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) {
2275  // IVOffset will be the new GEP offset that is interpreted by GEP as a
2276  // signed value. IVCount on the other hand represents the loop trip count,
2277  // which is an unsigned value. FindLoopCounter only allows induction
2278  // variables that have a positive unit stride of one. This means we don't
2279  // have to handle the case of negative offsets (yet) and just need to zero
2280  // extend IVCount.
2281  Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2282  const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
2283 
2284  // Expand the code for the iteration count.
2285  assert(SE->isLoopInvariant(IVOffset, L) &&
2286  "Computed iteration count is not loop invariant!");
2287  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2288  Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
2289 
2290  Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
2291  assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
2292  // We could handle pointer IVs other than i8*, but we need to compensate for
2293  // gep index scaling. See canExpandBackedgeTakenCount comments.
2295  cast<PointerType>(GEPBase->getType())
2296  ->getElementType())->isOne() &&
2297  "unit stride pointer IV must be i8*");
2298 
2299  IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
2300  return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
2301  } else {
2302  // In any other case, convert both IVInit and IVCount to integers before
2303  // comparing. This may result in SCEV expansion of pointers, but in practice
2304  // SCEV will fold the pointer arithmetic away as such:
2305  // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2306  //
2307  // Valid Cases: (1) both integers is most common; (2) both may be pointers
2308  // for simple memset-style loops.
2309  //
2310  // IVInit integer and IVCount pointer would only occur if a canonical IV
2311  // were generated on top of case #2, which is not expected.
2312 
2313  const SCEV *IVLimit = nullptr;
2314  // For unit stride, IVCount = Start + BECount with 2's complement overflow.
2315  // For non-zero Start, compute IVCount here.
2316  if (AR->getStart()->isZero())
2317  IVLimit = IVCount;
2318  else {
2319  assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2320  const SCEV *IVInit = AR->getStart();
2321 
2322  // For integer IVs, truncate the IV before computing IVInit + BECount.
2323  if (SE->getTypeSizeInBits(IVInit->getType())
2324  > SE->getTypeSizeInBits(IVCount->getType()))
2325  IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
2326 
2327  IVLimit = SE->getAddExpr(IVInit, IVCount);
2328  }
2329  // Expand the code for the iteration count.
2330  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2331  IRBuilder<> Builder(BI);
2332  assert(SE->isLoopInvariant(IVLimit, L) &&
2333  "Computed iteration count is not loop invariant!");
2334  // Ensure that we generate the same type as IndVar, or a smaller integer
2335  // type. In the presence of null pointer values, we have an integer type
2336  // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2337  Type *LimitTy = IVCount->getType()->isPointerTy() ?
2338  IndVar->getType() : IVCount->getType();
2339  return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2340  }
2341 }
2342 
2343 /// This method rewrites the exit condition of the loop to be a canonical !=
2344 /// comparison against the incremented loop induction variable. This pass is
2345 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2346 /// determine a loop-invariant trip count of the loop, which is actually a much
2347 /// broader range than just linear tests.
2348 bool IndVarSimplify::
2349 linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
2350  PHINode *IndVar, SCEVExpander &Rewriter) {
2351  assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
2352 
2353  // Initialize CmpIndVar and IVCount to their preincremented values.
2354  Value *CmpIndVar = IndVar;
2355  const SCEV *IVCount = BackedgeTakenCount;
2356 
2357  assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2358 
2359  // If the exiting block is the same as the backedge block, we prefer to
2360  // compare against the post-incremented value, otherwise we must compare
2361  // against the preincremented value.
2362  if (L->getExitingBlock() == L->getLoopLatch()) {
2363  // Add one to the "backedge-taken" count to get the trip count.
2364  // This addition may overflow, which is valid as long as the comparison is
2365  // truncated to BackedgeTakenCount->getType().
2366  IVCount = SE->getAddExpr(BackedgeTakenCount,
2367  SE->getOne(BackedgeTakenCount->getType()));
2368  // The BackedgeTaken expression contains the number of times that the
2369  // backedge branches to the loop header. This is one less than the
2370  // number of times the loop executes, so use the incremented indvar.
2371  CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
2372  }
2373 
2374  Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
2375  assert(ExitCnt->getType()->isPointerTy() ==
2376  IndVar->getType()->isPointerTy() &&
2377  "genLoopLimit missed a cast");
2378 
2379  // Insert a new icmp_ne or icmp_eq instruction before the branch.
2380  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2382  if (L->contains(BI->getSuccessor(0)))
2383  P = ICmpInst::ICMP_NE;
2384  else
2385  P = ICmpInst::ICMP_EQ;
2386 
2387  LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2388  << " LHS:" << *CmpIndVar << '\n'
2389  << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2390  << "\n"
2391  << " RHS:\t" << *ExitCnt << "\n"
2392  << " IVCount:\t" << *IVCount << "\n");
2393 
2394  IRBuilder<> Builder(BI);
2395 
2396  // The new loop exit condition should reuse the debug location of the
2397  // original loop exit condition.
2398  if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2399  Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2400 
2401  // LFTR can ignore IV overflow and truncate to the width of
2402  // BECount. This avoids materializing the add(zext(add)) expression.
2403  unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2404  unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2405  if (CmpIndVarSize > ExitCntSize) {
2406  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2407  const SCEV *ARStart = AR->getStart();
2408  const SCEV *ARStep = AR->getStepRecurrence(*SE);
2409  // For constant IVCount, avoid truncation.
2410  if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
2411  const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt();
2412  APInt Count = cast<SCEVConstant>(IVCount)->getAPInt();
2413  // Note that the post-inc value of BackedgeTakenCount may have overflowed
2414  // above such that IVCount is now zero.
2415  if (IVCount != BackedgeTakenCount && Count == 0) {
2416  Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
2417  ++Count;
2418  }
2419  else
2420  Count = Count.zext(CmpIndVarSize);
2421  APInt NewLimit;
2422  if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
2423  NewLimit = Start - Count;
2424  else
2425  NewLimit = Start + Count;
2426  ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
2427 
2428  LLVM_DEBUG(dbgs() << " Widen RHS:\t" << *ExitCnt << "\n");
2429  } else {
2430  // We try to extend trip count first. If that doesn't work we truncate IV.
2431  // Zext(trunc(IV)) == IV implies equivalence of the following two:
2432  // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If
2433  // one of the two holds, extend the trip count, otherwise we truncate IV.
2434  bool Extended = false;
2435  const SCEV *IV = SE->getSCEV(CmpIndVar);
2436  const SCEV *ZExtTrunc =
2437  SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2438  ExitCnt->getType()),
2439  CmpIndVar->getType());
2440 
2441  if (ZExtTrunc == IV) {
2442  Extended = true;
2443  ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2444  "wide.trip.count");
2445  } else {
2446  const SCEV *SExtTrunc =
2447  SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2448  ExitCnt->getType()),
2449  CmpIndVar->getType());
2450  if (SExtTrunc == IV) {
2451  Extended = true;
2452  ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2453  "wide.trip.count");
2454  }
2455  }
2456 
2457  if (!Extended)
2458  CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2459  "lftr.wideiv");
2460  }
2461  }
2462  Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2463  Value *OrigCond = BI->getCondition();
2464  // It's tempting to use replaceAllUsesWith here to fully replace the old
2465  // comparison, but that's not immediately safe, since users of the old
2466  // comparison may not be dominated by the new comparison. Instead, just
2467  // update the branch to use the new comparison; in the common case this
2468  // will make old comparison dead.
2469  BI->setCondition(Cond);
2470  DeadInsts.push_back(OrigCond);
2471 
2472  ++NumLFTR;
2473  return true;
2474 }
2475 
2476 //===----------------------------------------------------------------------===//
2477 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2478 //===----------------------------------------------------------------------===//
2479 
2480 /// If there's a single exit block, sink any loop-invariant values that
2481 /// were defined in the preheader but not used inside the loop into the
2482 /// exit block to reduce register pressure in the loop.
2483 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2484  BasicBlock *ExitBlock = L->getExitBlock();
2485  if (!ExitBlock) return false;
2486 
2487  BasicBlock *Preheader = L->getLoopPreheader();
2488  if (!Preheader) return false;
2489 
2490  bool MadeAnyChanges = false;
2491  BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2492  BasicBlock::iterator I(Preheader->getTerminator());
2493  while (I != Preheader->begin()) {
2494  --I;
2495  // New instructions were inserted at the end of the preheader.
2496  if (isa<PHINode>(I))
2497  break;
2498 
2499  // Don't move instructions which might have side effects, since the side
2500  // effects need to complete before instructions inside the loop. Also don't
2501  // move instructions which might read memory, since the loop may modify
2502  // memory. Note that it's okay if the instruction might have undefined
2503  // behavior: LoopSimplify guarantees that the preheader dominates the exit
2504  // block.
2505  if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2506  continue;
2507 
2508  // Skip debug info intrinsics.
2509  if (isa<DbgInfoIntrinsic>(I))
2510  continue;
2511 
2512  // Skip eh pad instructions.
2513  if (I->isEHPad())
2514  continue;
2515 
2516  // Don't sink alloca: we never want to sink static alloca's out of the
2517  // entry block, and correctly sinking dynamic alloca's requires
2518  // checks for stacksave/stackrestore intrinsics.
2519  // FIXME: Refactor this check somehow?
2520  if (isa<AllocaInst>(I))
2521  continue;
2522 
2523  // Determine if there is a use in or before the loop (direct or
2524  // otherwise).
2525  bool UsedInLoop = false;
2526  for (Use &U : I->uses()) {
2527  Instruction *User = cast<Instruction>(U.getUser());
2528  BasicBlock *UseBB = User->getParent();
2529  if (PHINode *P = dyn_cast<PHINode>(User)) {
2530  unsigned i =
2531  PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2532  UseBB = P->getIncomingBlock(i);
2533  }
2534  if (UseBB == Preheader || L->contains(UseBB)) {
2535  UsedInLoop = true;
2536  break;
2537  }
2538  }
2539 
2540  // If there is, the def must remain in the preheader.
2541  if (UsedInLoop)
2542  continue;
2543 
2544  // Otherwise, sink it to the exit block.
2545  Instruction *ToMove = &*I;
2546  bool Done = false;
2547 
2548  if (I != Preheader->begin()) {
2549  // Skip debug info intrinsics.
2550  do {
2551  --I;
2552  } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2553 
2554  if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2555  Done = true;
2556  } else {
2557  Done = true;
2558  }
2559 
2560  MadeAnyChanges = true;
2561  ToMove->moveBefore(*ExitBlock, InsertPt);
2562  if (Done) break;
2563  InsertPt = ToMove->getIterator();
2564  }
2565 
2566  return MadeAnyChanges;
2567 }
2568 
2569 //===----------------------------------------------------------------------===//
2570 // IndVarSimplify driver. Manage several subpasses of IV simplification.
2571 //===----------------------------------------------------------------------===//
2572 
2573 bool IndVarSimplify::run(Loop *L) {
2574  // We need (and expect!) the incoming loop to be in LCSSA.
2575  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2576  "LCSSA required to run indvars!");
2577  bool Changed = false;
2578 
2579  // If LoopSimplify form is not available, stay out of trouble. Some notes:
2580  // - LSR currently only supports LoopSimplify-form loops. Indvars'
2581  // canonicalization can be a pessimization without LSR to "clean up"
2582  // afterwards.
2583  // - We depend on having a preheader; in particular,
2584  // Loop::getCanonicalInductionVariable only supports loops with preheaders,
2585  // and we're in trouble if we can't find the induction variable even when
2586  // we've manually inserted one.
2587  // - LFTR relies on having a single backedge.
2588  if (!L->isLoopSimplifyForm())
2589  return false;
2590 
2591  // If there are any floating-point recurrences, attempt to
2592  // transform them to use integer recurrences.
2593  Changed |= rewriteNonIntegerIVs(L);
2594 
2595  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2596 
2597  // Create a rewriter object which we'll use to transform the code with.
2598  SCEVExpander Rewriter(*SE, DL, "indvars");
2599 #ifndef NDEBUG
2600  Rewriter.setDebugType(DEBUG_TYPE);
2601 #endif
2602 
2603  // Eliminate redundant IV users.
2604  //
2605  // Simplification works best when run before other consumers of SCEV. We
2606  // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2607  // other expressions involving loop IVs have been evaluated. This helps SCEV
2608  // set no-wrap flags before normalizing sign/zero extension.
2609  Rewriter.disableCanonicalMode();
2610  Changed |= simplifyAndExtend(L, Rewriter, LI);
2611 
2612  // Check to see if this loop has a computable loop-invariant execution count.
2613  // If so, this means that we can compute the final value of any expressions
2614  // that are recurrent in the loop, and substitute the exit values from the
2615  // loop into any instructions outside of the loop that use the final values of
2616  // the current expressions.
2617  //
2618  if (ReplaceExitValue != NeverRepl &&
2619  !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2620  Changed |= rewriteLoopExitValues(L, Rewriter);
2621 
2622  // Eliminate redundant IV cycles.
2623  NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2624 
2625  // If we have a trip count expression, rewrite the loop's exit condition
2626  // using it. We can currently only handle loops with a single exit.
2627  if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter) &&
2628  needsLFTR(L, DT)) {
2629  PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
2630  if (IndVar) {
2631  // Check preconditions for proper SCEVExpander operation. SCEV does not
2632  // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
2633  // pass that uses the SCEVExpander must do it. This does not work well for
2634  // loop passes because SCEVExpander makes assumptions about all loops,
2635  // while LoopPassManager only forces the current loop to be simplified.
2636  //
2637  // FIXME: SCEV expansion has no way to bail out, so the caller must
2638  // explicitly check any assumptions made by SCEV. Brittle.
2639  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
2640  if (!AR || AR->getLoop()->getLoopPreheader())
2641  Changed |= linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
2642  Rewriter);
2643  }
2644  }
2645  // Clear the rewriter cache, because values that are in the rewriter's cache
2646  // can be deleted in the loop below, causing the AssertingVH in the cache to
2647  // trigger.
2648  Rewriter.clear();
2649 
2650  // Now that we're done iterating through lists, clean up any instructions
2651  // which are now dead.
2652  while (!DeadInsts.empty())
2653  if (Instruction *Inst =
2654  dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2655  Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
2656 
2657  // The Rewriter may not be used from this point on.
2658 
2659  // Loop-invariant instructions in the preheader that aren't used in the
2660  // loop may be sunk below the loop to reduce register pressure.
2661  Changed |= sinkUnusedInvariants(L);
2662 
2663  // rewriteFirstIterationLoopExitValues does not rely on the computation of
2664  // trip count and therefore can further simplify exit values in addition to
2665  // rewriteLoopExitValues.
2666  Changed |= rewriteFirstIterationLoopExitValues(L);
2667 
2668  // Clean up dead instructions.
2669  Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2670 
2671  // Check a post-condition.
2672  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2673  "Indvars did not preserve LCSSA!");
2674 
2675  // Verify that LFTR, and any other change have not interfered with SCEV's
2676  // ability to compute trip count.
2677 #ifndef NDEBUG
2678  if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2679  SE->forgetLoop(L);
2680  const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2681  if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2682  SE->getTypeSizeInBits(NewBECount->getType()))
2683  NewBECount = SE->getTruncateOrNoop(NewBECount,
2684  BackedgeTakenCount->getType());
2685  else
2686  BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2687  NewBECount->getType());
2688  assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
2689  }
2690 #endif
2691 
2692  return Changed;
2693 }
2694 
2697  LPMUpdater &) {
2698  Function *F = L.getHeader()->getParent();
2699  const DataLayout &DL = F->getParent()->getDataLayout();
2700 
2701  IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
2702  if (!IVS.run(&L))
2703  return PreservedAnalyses::all();
2704 
2705  auto PA = getLoopPassPreservedAnalyses();
2706  PA.preserveSet<CFGAnalyses>();
2707  return PA;
2708 }
2709 
2710 namespace {
2711 
2712 struct IndVarSimplifyLegacyPass : public LoopPass {
2713  static char ID; // Pass identification, replacement for typeid
2714 
2715  IndVarSimplifyLegacyPass() : LoopPass(ID) {
2717  }
2718 
2719  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2720  if (skipLoop(L))
2721  return false;
2722 
2723  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2724  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2725  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2726  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2727  auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
2728  auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2729  auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2730  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2731 
2732  IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
2733  return IVS.run(L);
2734  }
2735 
2736  void getAnalysisUsage(AnalysisUsage &AU) const override {
2737  AU.setPreservesCFG();
2739  }
2740 };
2741 
2742 } // end anonymous namespace
2743 
2745 
2746 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2747  "Induction Variable Simplification", false, false)
2749 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2750  "Induction Variable Simplification", false, false)
2751 
2753  return new IndVarSimplifyLegacyPass();
2754 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:68
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
bool hoistIVInc(Instruction *IncV, Instruction *InsertPos)
Utility for hoisting an IV increment.
Induction Variable Simplification
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1858
iterator_range< use_iterator > uses()
Definition: Value.h:355
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:225
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT)
Perform a quick domtree based check for loop invariance assuming that V is used within the loop...
MutableArrayRef< T > makeMutableArrayRef(T &OneElt)
Construct a MutableArrayRef from a single element.
Definition: ArrayRef.h:503
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
bool isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:184
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:177
This class represents zero extension of integer types.
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:858
bool isZero() const
Return true if the expression is a constant zero.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:174
bool isHighCostExpansion(const SCEV *Expr, Loop *L, const Instruction *At=nullptr)
Return true for expressions that may incur non-trivial cost to evaluate at runtime.
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:691
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:714
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:701
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
void setDebugType(const char *s)
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
BasicBlock * getSuccessor(unsigned i) const
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1572
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
STATISTIC(NumFunctions, "Total number of functions")
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
This class represents a sign extension of integer types.
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
Hexagon Common GEP
Value * getCondition() const
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
static ConstantRange makeAllowedICmpRegion(CmpInst::Predicate Pred, const ConstantRange &Other)
Produce the smallest range such that all values that may satisfy the given predicate with any value c...
This defines the Use class.
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
ReplaceExitVal
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:64
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1503
INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", "Induction Variable Simplification", false, false) INITIALIZE_PASS_END(IndVarSimplifyLegacyPass
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:263
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1069
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:196
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:696
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
static Value * genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Help linearFunctionTestReplace by generating a value that holds the RHS of the new loop test...
Interface for visiting interesting IV users that are recognized but not simplified by this utility...
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:134
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
bool isSigned() const
Definition: InstrTypes.h:854
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:627
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:783
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:364
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:684
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:369
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:392
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:692
This file implements a class to represent arbitrary precision integral constant values and operations...
BlockT * getHeader() const
Definition: LoopInfo.h:100
Key
PAL metadata keys.
void findDbgValues(SmallVectorImpl< DbgValueInst *> &DbgValues, Value *V)
Finds the llvm.dbg.value intrinsics describing a value.
Definition: Local.cpp:1513
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:645
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:182
This node represents a polynomial recurrence on the trip count of the specified loop.
This instruction compares its operands according to the predicate given to the constructor.
static Instruction * getInsertPointForUses(Instruction *User, Value *Def, DominatorTree *DT, LoopInfo *LI)
Determine the insertion point for this user.
static cl::opt< bool > VerifyIndvars("verify-indvars", cl::Hidden, cl::desc("Verify the ScalarEvolution result after running indvars"))
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Examine each PHI in the given block and delete it if it is dead.
const SCEV * getSizeOfExpr(Type *IntTy, Type *AllocTy)
Return an expression for sizeof AllocTy that is type IntTy.
ConstantRange intersectWith(const ConstantRange &CR) const
Return the range that results from the intersection of this range with another range.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:151
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:439
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1568
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:301
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:127
Value * getOperand(unsigned i) const
Definition: User.h:170
void initializeIndVarSimplifyLegacyPassPass(PassRegistry &)
void clearInsertPoint()
Clear the current insertion point.
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:106
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt...
Definition: PatternMatch.h:177
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:219
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:304
static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE, SCEVExpander &Rewriter)
Return true if this loop&#39;s backedge taken count expression can be safely and cheaply expanded into an...
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Conditional or Unconditional Branch instruction.
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value *> &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:630
Value * getIncomingValueForBlock(const BasicBlock *BB) const
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
const Instruction & front() const
Definition: BasicBlock.h:275
#define H(x, y, z)
Definition: MD5.cpp:57
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:558
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:499
Represent the analysis usage information of a pass.
This file declares a class to represent arbitrary precision floating point values and provide a varie...
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1049
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:685
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl...
Definition: Operator.h:67
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
op_range operands()
Definition: User.h:238
self_iterator getIterator()
Definition: ilist_node.h:82
Class to represent integer types.
Definition: DerivedTypes.h:40
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:430
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:76
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:35
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1392
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
static unsigned getIncomingValueNumForOperand(unsigned i)
size_t size() const
Definition: SmallVector.h:53
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static ICmpInst * getLoopTest(Loop *L)
Return the compare guarding the loop latch, or NULL for unrecognized tests.
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:700
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1564
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:58
signed greater than
Definition: InstrTypes.h:712
#define DEBUG_TYPE
const APFloat & getValueAPF() const
Definition: Constants.h:299
Value * CreateGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1386
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:689
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
unsigned getSCEVType() const
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:192
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
Type * getType() const
Return the LLVM type of this SCEV expression.
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:309
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty)
static ValueAsMetadata * get(Value *V)
Definition: Metadata.cpp:349
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:699
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:249
Module.h This file contains the declarations for the Module class.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:64
Provides information about what library functions are available for the current target.
This class represents a range of values.
Definition: ConstantRange.h:47
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
Definition: DataLayout.h:243
signed less than
Definition: InstrTypes.h:714
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:621
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT)
Return the loop header phi IFF IncV adds a loop invariant value to the phi.
bool isConditional() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:238
static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI)
This IV user cannot be widen.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:788
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
signed less or equal
Definition: InstrTypes.h:715
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56
Class for arbitrary precision integers.
Definition: APInt.h:70
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:400
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property...
Definition: Operator.h:96
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
This class uses information about analyze scalars to rewrite expressions in canonical form...
Represents analyses that only rely on functions&#39; control flow.
Definition: PassManager.h:115
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:530
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:605
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
Virtual Register Rewriter
Definition: VirtRegMap.cpp:222
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:759
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:193
void emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:652
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:459
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:149
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:58
user_iterator_impl< User > user_iterator
Definition: Value.h:369
bool mayReadFromMemory() const
Return true if this instruction may read memory.
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:128
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:693
static bool needsLFTR(Loop *L, DominatorTree *DT)
linearFunctionTestReplace policy.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:319
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>()) const
This is an approximation of reciprocal throughput of a math/logic op.
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:697
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetTransformInfo *TTI=nullptr)
replace congruent phis with their most canonical representative.
const SCEV * getBackedgeTakenCount(const Loop *L)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
This class represents a cast from signed integer to floating point.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:376
bool isOne() const
Return true if the expression is a constant one.
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:688
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:348
LLVM Value Representation.
Definition: Value.h:73
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:698
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond)
Return true if this IV has any uses other than the (soon to be rewritten) loop exit test...
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:87
void disableCanonicalMode()
Disable the behavior of expanding expressions in canonical form rather than in a more literal form...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:569
void getUniqueExitBlocks(SmallVectorImpl< BlockT *> &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:100
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:413
A container for analyses that lazily runs them and caches their results.
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
This pass exposes codegen information to IR-level passes.
This header defines various interfaces for pass management in LLVM.
void setIncomingValue(unsigned i, Value *V)
#define LLVM_DEBUG(X)
Definition: Debug.h:123
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
static cl::opt< bool > DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization"))
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:690
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:50
bool use_empty() const
Definition: Value.h:323
Pass * createIndVarSimplifyPass()
signed greater or equal
Definition: InstrTypes.h:713
IntegerType * Int32Ty
const BasicBlock * getParent() const
Definition: Instruction.h:67
This class represents a constant integer value.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:990
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property...
Definition: Operator.h:90
static PHINode * FindLoopCounter(Loop *L, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Find an affine IV in canonical form.