LLVM  3.7.0
IndVarSimplify.cpp
Go to the documentation of this file.
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This transformation analyzes and transforms the induction variables (and
11 // computations derived from them) into simpler forms suitable for subsequent
12 // analysis and transformation.
13 //
14 // If the trip count of a loop is computable, this pass also makes the following
15 // changes:
16 // 1. The exit condition for the loop is canonicalized to compare the
17 // induction value against the exit value. This turns loops like:
18 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
19 // 2. Any use outside of the loop of an expression derived from the indvar
20 // is changed to compute the derived value outside of the loop, eliminating
21 // the dependence on the exit value of the induction variable. If the only
22 // purpose of the loop is to compute the exit value of some derived
23 // expression, this transformation will make the loop dead.
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #include "llvm/Transforms/Scalar.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/Statistic.h"
31 #include "llvm/Analysis/LoopInfo.h"
32 #include "llvm/Analysis/LoopPass.h"
36 #include "llvm/IR/BasicBlock.h"
37 #include "llvm/IR/CFG.h"
38 #include "llvm/IR/Constants.h"
39 #include "llvm/IR/DataLayout.h"
40 #include "llvm/IR/Dominators.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/IntrinsicInst.h"
43 #include "llvm/IR/LLVMContext.h"
44 #include "llvm/IR/PatternMatch.h"
45 #include "llvm/IR/Type.h"
47 #include "llvm/Support/Debug.h"
52 using namespace llvm;
53 
54 #define DEBUG_TYPE "indvars"
55 
56 STATISTIC(NumWidened , "Number of indvars widened");
57 STATISTIC(NumReplaced , "Number of exit values replaced");
58 STATISTIC(NumLFTR , "Number of loop exit tests replaced");
59 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
60 STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
61 
62 // Trip count verification can be enabled by default under NDEBUG if we
63 // implement a strong expression equivalence checker in SCEV. Until then, we
64 // use the verify-indvars flag, which may assert in some cases.
66  "verify-indvars", cl::Hidden,
67  cl::desc("Verify the ScalarEvolution result after running indvars"));
68 
69 static cl::opt<bool> ReduceLiveIVs("liv-reduce", cl::Hidden,
70  cl::desc("Reduce live induction variables."));
71 
73 
75  "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
76  cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
77  cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
78  clEnumValN(OnlyCheapRepl, "cheap",
79  "only replace exit value when the cost is cheap"),
80  clEnumValN(AlwaysRepl, "always",
81  "always replace exit value whenever possible"),
82  clEnumValEnd));
83 
84 namespace {
85 struct RewritePhi;
86 }
87 
88 namespace {
89  class IndVarSimplify : public LoopPass {
90  LoopInfo *LI;
91  ScalarEvolution *SE;
92  DominatorTree *DT;
93  TargetLibraryInfo *TLI;
94  const TargetTransformInfo *TTI;
95 
96  SmallVector<WeakVH, 16> DeadInsts;
97  bool Changed;
98  public:
99 
100  static char ID; // Pass identification, replacement for typeid
101  IndVarSimplify()
102  : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), Changed(false) {
104  }
105 
106  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
107 
108  void getAnalysisUsage(AnalysisUsage &AU) const override {
117  AU.setPreservesCFG();
118  }
119 
120  private:
121  void releaseMemory() override {
122  DeadInsts.clear();
123  }
124 
125  bool isValidRewrite(Value *FromVal, Value *ToVal);
126 
127  void HandleFloatingPointIV(Loop *L, PHINode *PH);
128  void RewriteNonIntegerIVs(Loop *L);
129 
130  void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM);
131 
132  bool CanLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
133  void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
134 
135  Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
136  PHINode *IndVar, SCEVExpander &Rewriter);
137 
138  void SinkUnusedInvariants(Loop *L);
139 
140  Value *ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
141  Instruction *InsertPt, Type *Ty,
142  bool &IsHighCostExpansion);
143  };
144 }
145 
146 char IndVarSimplify::ID = 0;
147 INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
148  "Induction Variable Simplification", false, false)
152 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
154 INITIALIZE_PASS_END(IndVarSimplify, "indvars",
155  "Induction Variable Simplification", false, false)
156 
158  return new IndVarSimplify();
159 }
160 
161 /// isValidRewrite - Return true if the SCEV expansion generated by the
162 /// rewriter can replace the original value. SCEV guarantees that it
163 /// produces the same value, but the way it is produced may be illegal IR.
164 /// Ideally, this function will only be called for verification.
165 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
166  // If an SCEV expression subsumed multiple pointers, its expansion could
167  // reassociate the GEP changing the base pointer. This is illegal because the
168  // final address produced by a GEP chain must be inbounds relative to its
169  // underlying object. Otherwise basic alias analysis, among other things,
170  // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
171  // producing an expression involving multiple pointers. Until then, we must
172  // bail out here.
173  //
174  // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
175  // because it understands lcssa phis while SCEV does not.
176  Value *FromPtr = FromVal;
177  Value *ToPtr = ToVal;
178  if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) {
179  FromPtr = GEP->getPointerOperand();
180  }
181  if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) {
182  ToPtr = GEP->getPointerOperand();
183  }
184  if (FromPtr != FromVal || ToPtr != ToVal) {
185  // Quickly check the common case
186  if (FromPtr == ToPtr)
187  return true;
188 
189  // SCEV may have rewritten an expression that produces the GEP's pointer
190  // operand. That's ok as long as the pointer operand has the same base
191  // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
192  // base of a recurrence. This handles the case in which SCEV expansion
193  // converts a pointer type recurrence into a nonrecurrent pointer base
194  // indexed by an integer recurrence.
195 
196  // If the GEP base pointer is a vector of pointers, abort.
197  if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
198  return false;
199 
200  const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
201  const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
202  if (FromBase == ToBase)
203  return true;
204 
205  DEBUG(dbgs() << "INDVARS: GEP rewrite bail out "
206  << *FromBase << " != " << *ToBase << "\n");
207 
208  return false;
209  }
210  return true;
211 }
212 
213 /// Determine the insertion point for this user. By default, insert immediately
214 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
215 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
216 /// common dominator for the incoming blocks.
218  DominatorTree *DT) {
219  PHINode *PHI = dyn_cast<PHINode>(User);
220  if (!PHI)
221  return User;
222 
223  Instruction *InsertPt = nullptr;
224  for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
225  if (PHI->getIncomingValue(i) != Def)
226  continue;
227 
228  BasicBlock *InsertBB = PHI->getIncomingBlock(i);
229  if (!InsertPt) {
230  InsertPt = InsertBB->getTerminator();
231  continue;
232  }
233  InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
234  InsertPt = InsertBB->getTerminator();
235  }
236  assert(InsertPt && "Missing phi operand");
237  assert((!isa<Instruction>(Def) ||
238  DT->dominates(cast<Instruction>(Def), InsertPt)) &&
239  "def does not dominate all uses");
240  return InsertPt;
241 }
242 
243 //===----------------------------------------------------------------------===//
244 // RewriteNonIntegerIVs and helpers. Prefer integer IVs.
245 //===----------------------------------------------------------------------===//
246 
247 /// ConvertToSInt - Convert APF to an integer, if possible.
248 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
249  bool isExact = false;
250  // See if we can convert this to an int64_t
251  uint64_t UIntVal;
252  if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
253  &isExact) != APFloat::opOK || !isExact)
254  return false;
255  IntVal = UIntVal;
256  return true;
257 }
258 
259 /// HandleFloatingPointIV - If the loop has floating induction variable
260 /// then insert corresponding integer induction variable if possible.
261 /// For example,
262 /// for(double i = 0; i < 10000; ++i)
263 /// bar(i)
264 /// is converted into
265 /// for(int i = 0; i < 10000; ++i)
266 /// bar((double)i);
267 ///
268 void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
269  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
270  unsigned BackEdge = IncomingEdge^1;
271 
272  // Check incoming value.
273  ConstantFP *InitValueVal =
274  dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
275 
276  int64_t InitValue;
277  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
278  return;
279 
280  // Check IV increment. Reject this PN if increment operation is not
281  // an add or increment value can not be represented by an integer.
282  BinaryOperator *Incr =
283  dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
284  if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return;
285 
286  // If this is not an add of the PHI with a constantfp, or if the constant fp
287  // is not an integer, bail out.
288  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
289  int64_t IncValue;
290  if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
291  !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
292  return;
293 
294  // Check Incr uses. One user is PN and the other user is an exit condition
295  // used by the conditional terminator.
296  Value::user_iterator IncrUse = Incr->user_begin();
297  Instruction *U1 = cast<Instruction>(*IncrUse++);
298  if (IncrUse == Incr->user_end()) return;
299  Instruction *U2 = cast<Instruction>(*IncrUse++);
300  if (IncrUse != Incr->user_end()) return;
301 
302  // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
303  // only used by a branch, we can't transform it.
305  if (!Compare)
306  Compare = dyn_cast<FCmpInst>(U2);
307  if (!Compare || !Compare->hasOneUse() ||
308  !isa<BranchInst>(Compare->user_back()))
309  return;
310 
311  BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
312 
313  // We need to verify that the branch actually controls the iteration count
314  // of the loop. If not, the new IV can overflow and no one will notice.
315  // The branch block must be in the loop and one of the successors must be out
316  // of the loop.
317  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
318  if (!L->contains(TheBr->getParent()) ||
319  (L->contains(TheBr->getSuccessor(0)) &&
320  L->contains(TheBr->getSuccessor(1))))
321  return;
322 
323 
324  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
325  // transform it.
326  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
327  int64_t ExitValue;
328  if (ExitValueVal == nullptr ||
329  !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
330  return;
331 
332  // Find new predicate for integer comparison.
334  switch (Compare->getPredicate()) {
335  default: return; // Unknown comparison.
336  case CmpInst::FCMP_OEQ:
337  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
338  case CmpInst::FCMP_ONE:
339  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
340  case CmpInst::FCMP_OGT:
341  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
342  case CmpInst::FCMP_OGE:
343  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
344  case CmpInst::FCMP_OLT:
345  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
346  case CmpInst::FCMP_OLE:
347  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
348  }
349 
350  // We convert the floating point induction variable to a signed i32 value if
351  // we can. This is only safe if the comparison will not overflow in a way
352  // that won't be trapped by the integer equivalent operations. Check for this
353  // now.
354  // TODO: We could use i64 if it is native and the range requires it.
355 
356  // The start/stride/exit values must all fit in signed i32.
357  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
358  return;
359 
360  // If not actually striding (add x, 0.0), avoid touching the code.
361  if (IncValue == 0)
362  return;
363 
364  // Positive and negative strides have different safety conditions.
365  if (IncValue > 0) {
366  // If we have a positive stride, we require the init to be less than the
367  // exit value.
368  if (InitValue >= ExitValue)
369  return;
370 
371  uint32_t Range = uint32_t(ExitValue-InitValue);
372  // Check for infinite loop, either:
373  // while (i <= Exit) or until (i > Exit)
374  if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
375  if (++Range == 0) return; // Range overflows.
376  }
377 
378  unsigned Leftover = Range % uint32_t(IncValue);
379 
380  // If this is an equality comparison, we require that the strided value
381  // exactly land on the exit value, otherwise the IV condition will wrap
382  // around and do things the fp IV wouldn't.
383  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
384  Leftover != 0)
385  return;
386 
387  // If the stride would wrap around the i32 before exiting, we can't
388  // transform the IV.
389  if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
390  return;
391 
392  } else {
393  // If we have a negative stride, we require the init to be greater than the
394  // exit value.
395  if (InitValue <= ExitValue)
396  return;
397 
398  uint32_t Range = uint32_t(InitValue-ExitValue);
399  // Check for infinite loop, either:
400  // while (i >= Exit) or until (i < Exit)
401  if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
402  if (++Range == 0) return; // Range overflows.
403  }
404 
405  unsigned Leftover = Range % uint32_t(-IncValue);
406 
407  // If this is an equality comparison, we require that the strided value
408  // exactly land on the exit value, otherwise the IV condition will wrap
409  // around and do things the fp IV wouldn't.
410  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
411  Leftover != 0)
412  return;
413 
414  // If the stride would wrap around the i32 before exiting, we can't
415  // transform the IV.
416  if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
417  return;
418  }
419 
420  IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
421 
422  // Insert new integer induction variable.
423  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
424  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
425  PN->getIncomingBlock(IncomingEdge));
426 
427  Value *NewAdd =
428  BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
429  Incr->getName()+".int", Incr);
430  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
431 
432  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
433  ConstantInt::get(Int32Ty, ExitValue),
434  Compare->getName());
435 
436  // In the following deletions, PN may become dead and may be deleted.
437  // Use a WeakVH to observe whether this happens.
438  WeakVH WeakPH = PN;
439 
440  // Delete the old floating point exit comparison. The branch starts using the
441  // new comparison.
442  NewCompare->takeName(Compare);
443  Compare->replaceAllUsesWith(NewCompare);
445 
446  // Delete the old floating point increment.
449 
450  // If the FP induction variable still has uses, this is because something else
451  // in the loop uses its value. In order to canonicalize the induction
452  // variable, we chose to eliminate the IV and rewrite it in terms of an
453  // int->fp cast.
454  //
455  // We give preference to sitofp over uitofp because it is faster on most
456  // platforms.
457  if (WeakPH) {
458  Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
459  PN->getParent()->getFirstInsertionPt());
460  PN->replaceAllUsesWith(Conv);
462  }
463  Changed = true;
464 }
465 
466 void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
467  // First step. Check to see if there are any floating-point recurrences.
468  // If there are, change them into integer recurrences, permitting analysis by
469  // the SCEV routines.
470  //
471  BasicBlock *Header = L->getHeader();
472 
474  for (BasicBlock::iterator I = Header->begin();
475  PHINode *PN = dyn_cast<PHINode>(I); ++I)
476  PHIs.push_back(PN);
477 
478  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
479  if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
480  HandleFloatingPointIV(L, PN);
481 
482  // If the loop previously had floating-point IV, ScalarEvolution
483  // may not have been able to compute a trip count. Now that we've done some
484  // re-writing, the trip count may be computable.
485  if (Changed)
486  SE->forgetLoop(L);
487 }
488 
489 namespace {
490 // Collect information about PHI nodes which can be transformed in
491 // RewriteLoopExitValues.
492 struct RewritePhi {
493  PHINode *PN;
494  unsigned Ith; // Ith incoming value.
495  Value *Val; // Exit value after expansion.
496  bool HighCost; // High Cost when expansion.
497  bool SafePhi; // LCSSASafePhiForRAUW.
498 
499  RewritePhi(PHINode *P, unsigned I, Value *V, bool H, bool S)
500  : PN(P), Ith(I), Val(V), HighCost(H), SafePhi(S) {}
501 };
502 }
503 
504 Value *IndVarSimplify::ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
505  Loop *L, Instruction *InsertPt,
506  Type *ResultTy,
507  bool &IsHighCostExpansion) {
508  using namespace llvm::PatternMatch;
509 
510  if (!Rewriter.isHighCostExpansion(S, L)) {
511  IsHighCostExpansion = false;
512  return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
513  }
514 
515  // Before expanding S into an expensive LLVM expression, see if we can use an
516  // already existing value as the expansion for S. There is potential to make
517  // this significantly smarter, but this simple heuristic already gets some
518  // interesting cases.
519 
521  L->getLoopLatches(Latches);
522 
523  for (BasicBlock *BB : Latches) {
524  ICmpInst::Predicate Pred;
525  Instruction *LHS, *RHS;
526  BasicBlock *TrueBB, *FalseBB;
527 
528  if (!match(BB->getTerminator(),
529  m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
530  TrueBB, FalseBB)))
531  continue;
532 
533  if (SE->getSCEV(LHS) == S && DT->dominates(LHS, InsertPt)) {
534  IsHighCostExpansion = false;
535  return LHS;
536  }
537 
538  if (SE->getSCEV(RHS) == S && DT->dominates(RHS, InsertPt)) {
539  IsHighCostExpansion = false;
540  return RHS;
541  }
542  }
543 
544  // We didn't find anything, fall back to using SCEVExpander.
545  assert(Rewriter.isHighCostExpansion(S, L) && "this should not have changed!");
546  IsHighCostExpansion = true;
547  return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
548 }
549 
550 //===----------------------------------------------------------------------===//
551 // RewriteLoopExitValues - Optimize IV users outside the loop.
552 // As a side effect, reduces the amount of IV processing within the loop.
553 //===----------------------------------------------------------------------===//
554 
555 /// RewriteLoopExitValues - Check to see if this loop has a computable
556 /// loop-invariant execution count. If so, this means that we can compute the
557 /// final value of any expressions that are recurrent in the loop, and
558 /// substitute the exit values from the loop into any instructions outside of
559 /// the loop that use the final values of the current expressions.
560 ///
561 /// This is mostly redundant with the regular IndVarSimplify activities that
562 /// happen later, except that it's more powerful in some cases, because it's
563 /// able to brute-force evaluate arbitrary instructions as long as they have
564 /// constant operands at the beginning of the loop.
565 void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
566  // Verify the input to the pass in already in LCSSA form.
567  assert(L->isLCSSAForm(*DT));
568 
569  SmallVector<BasicBlock*, 8> ExitBlocks;
570  L->getUniqueExitBlocks(ExitBlocks);
571 
572  SmallVector<RewritePhi, 8> RewritePhiSet;
573  // Find all values that are computed inside the loop, but used outside of it.
574  // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
575  // the exit blocks of the loop to find them.
576  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
577  BasicBlock *ExitBB = ExitBlocks[i];
578 
579  // If there are no PHI nodes in this exit block, then no values defined
580  // inside the loop are used on this path, skip it.
581  PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
582  if (!PN) continue;
583 
584  unsigned NumPreds = PN->getNumIncomingValues();
585 
586  // We would like to be able to RAUW single-incoming value PHI nodes. We
587  // have to be certain this is safe even when this is an LCSSA PHI node.
588  // While the computed exit value is no longer varying in *this* loop, the
589  // exit block may be an exit block for an outer containing loop as well,
590  // the exit value may be varying in the outer loop, and thus it may still
591  // require an LCSSA PHI node. The safe case is when this is
592  // single-predecessor PHI node (LCSSA) and the exit block containing it is
593  // part of the enclosing loop, or this is the outer most loop of the nest.
594  // In either case the exit value could (at most) be varying in the same
595  // loop body as the phi node itself. Thus if it is in turn used outside of
596  // an enclosing loop it will only be via a separate LCSSA node.
597  bool LCSSASafePhiForRAUW =
598  NumPreds == 1 &&
599  (!L->getParentLoop() || L->getParentLoop() == LI->getLoopFor(ExitBB));
600 
601  // Iterate over all of the PHI nodes.
602  BasicBlock::iterator BBI = ExitBB->begin();
603  while ((PN = dyn_cast<PHINode>(BBI++))) {
604  if (PN->use_empty())
605  continue; // dead use, don't replace it
606 
607  // SCEV only supports integer expressions for now.
608  if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy())
609  continue;
610 
611  // It's necessary to tell ScalarEvolution about this explicitly so that
612  // it can walk the def-use list and forget all SCEVs, as it may not be
613  // watching the PHI itself. Once the new exit value is in place, there
614  // may not be a def-use connection between the loop and every instruction
615  // which got a SCEVAddRecExpr for that loop.
616  SE->forgetValue(PN);
617 
618  // Iterate over all of the values in all the PHI nodes.
619  for (unsigned i = 0; i != NumPreds; ++i) {
620  // If the value being merged in is not integer or is not defined
621  // in the loop, skip it.
622  Value *InVal = PN->getIncomingValue(i);
623  if (!isa<Instruction>(InVal))
624  continue;
625 
626  // If this pred is for a subloop, not L itself, skip it.
627  if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
628  continue; // The Block is in a subloop, skip it.
629 
630  // Check that InVal is defined in the loop.
631  Instruction *Inst = cast<Instruction>(InVal);
632  if (!L->contains(Inst))
633  continue;
634 
635  // Okay, this instruction has a user outside of the current loop
636  // and varies predictably *inside* the loop. Evaluate the value it
637  // contains when the loop exits, if possible.
638  const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
639  if (!SE->isLoopInvariant(ExitValue, L) ||
640  !isSafeToExpand(ExitValue, *SE))
641  continue;
642 
643  // Computing the value outside of the loop brings no benefit if :
644  // - it is definitely used inside the loop in a way which can not be
645  // optimized away.
646  // - no use outside of the loop can take advantage of hoisting the
647  // computation out of the loop
648  if (ExitValue->getSCEVType()>=scMulExpr) {
649  unsigned NumHardInternalUses = 0;
650  unsigned NumSoftExternalUses = 0;
651  unsigned NumUses = 0;
652  for (auto IB = Inst->user_begin(), IE = Inst->user_end();
653  IB != IE && NumUses <= 6; ++IB) {
654  Instruction *UseInstr = cast<Instruction>(*IB);
655  unsigned Opc = UseInstr->getOpcode();
656  NumUses++;
657  if (L->contains(UseInstr)) {
658  if (Opc == Instruction::Call || Opc == Instruction::Ret)
659  NumHardInternalUses++;
660  } else {
661  if (Opc == Instruction::PHI) {
662  // Do not count the Phi as a use. LCSSA may have inserted
663  // plenty of trivial ones.
664  NumUses--;
665  for (auto PB = UseInstr->user_begin(),
666  PE = UseInstr->user_end();
667  PB != PE && NumUses <= 6; ++PB, ++NumUses) {
668  unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
669  if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
670  NumSoftExternalUses++;
671  }
672  continue;
673  }
674  if (Opc != Instruction::Call && Opc != Instruction::Ret)
675  NumSoftExternalUses++;
676  }
677  }
678  if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses)
679  continue;
680  }
681 
682  bool HighCost = false;
683  Value *ExitVal = ExpandSCEVIfNeeded(Rewriter, ExitValue, L, Inst,
684  PN->getType(), HighCost);
685 
686  DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
687  << " LoopVal = " << *Inst << "\n");
688 
689  if (!isValidRewrite(Inst, ExitVal)) {
690  DeadInsts.push_back(ExitVal);
691  continue;
692  }
693 
694  // Collect all the candidate PHINodes to be rewritten.
695  RewritePhiSet.push_back(
696  RewritePhi(PN, i, ExitVal, HighCost, LCSSASafePhiForRAUW));
697  }
698  }
699  }
700 
701  bool LoopCanBeDel = CanLoopBeDeleted(L, RewritePhiSet);
702 
703  // Transformation.
704  for (const RewritePhi &Phi : RewritePhiSet) {
705  PHINode *PN = Phi.PN;
706  Value *ExitVal = Phi.Val;
707 
708  // Only do the rewrite when the ExitValue can be expanded cheaply.
709  // If LoopCanBeDel is true, rewrite exit value aggressively.
710  if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
711  DeadInsts.push_back(ExitVal);
712  continue;
713  }
714 
715  Changed = true;
716  ++NumReplaced;
717  Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
718  PN->setIncomingValue(Phi.Ith, ExitVal);
719 
720  // If this instruction is dead now, delete it. Don't do it now to avoid
721  // invalidating iterators.
722  if (isInstructionTriviallyDead(Inst, TLI))
723  DeadInsts.push_back(Inst);
724 
725  // If we determined that this PHI is safe to replace even if an LCSSA
726  // PHI, do so.
727  if (Phi.SafePhi) {
728  PN->replaceAllUsesWith(ExitVal);
729  PN->eraseFromParent();
730  }
731  }
732 
733  // The insertion point instruction may have been deleted; clear it out
734  // so that the rewriter doesn't trip over it later.
735  Rewriter.clearInsertPoint();
736 }
737 
738 /// CanLoopBeDeleted - Check whether it is possible to delete the loop after
739 /// rewriting exit value. If it is possible, ignore ReplaceExitValue and
740 /// do rewriting aggressively.
741 bool IndVarSimplify::CanLoopBeDeleted(
742  Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
743 
744  BasicBlock *Preheader = L->getLoopPreheader();
745  // If there is no preheader, the loop will not be deleted.
746  if (!Preheader)
747  return false;
748 
749  // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
750  // We obviate multiple ExitingBlocks case for simplicity.
751  // TODO: If we see testcase with multiple ExitingBlocks can be deleted
752  // after exit value rewriting, we can enhance the logic here.
753  SmallVector<BasicBlock *, 4> ExitingBlocks;
754  L->getExitingBlocks(ExitingBlocks);
755  SmallVector<BasicBlock *, 8> ExitBlocks;
756  L->getUniqueExitBlocks(ExitBlocks);
757  if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
758  return false;
759 
760  BasicBlock *ExitBlock = ExitBlocks[0];
761  BasicBlock::iterator BI = ExitBlock->begin();
762  while (PHINode *P = dyn_cast<PHINode>(BI)) {
763  Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
764 
765  // If the Incoming value of P is found in RewritePhiSet, we know it
766  // could be rewritten to use a loop invariant value in transformation
767  // phase later. Skip it in the loop invariant check below.
768  bool found = false;
769  for (const RewritePhi &Phi : RewritePhiSet) {
770  unsigned i = Phi.Ith;
771  if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
772  found = true;
773  break;
774  }
775  }
776 
777  Instruction *I;
778  if (!found && (I = dyn_cast<Instruction>(Incoming)))
779  if (!L->hasLoopInvariantOperands(I))
780  return false;
781 
782  ++BI;
783  }
784 
785  for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
786  LI != LE; ++LI) {
787  for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end(); BI != BE;
788  ++BI) {
789  if (BI->mayHaveSideEffects())
790  return false;
791  }
792  }
793 
794  return true;
795 }
796 
797 //===----------------------------------------------------------------------===//
798 // IV Widening - Extend the width of an IV to cover its widest uses.
799 //===----------------------------------------------------------------------===//
800 
801 namespace {
802  // Collect information about induction variables that are used by sign/zero
803  // extend operations. This information is recorded by CollectExtend and
804  // provides the input to WidenIV.
805  struct WideIVInfo {
806  PHINode *NarrowIV;
807  Type *WidestNativeType; // Widest integer type created [sz]ext
808  bool IsSigned; // Was a sext user seen before a zext?
809 
810  WideIVInfo() : NarrowIV(nullptr), WidestNativeType(nullptr),
811  IsSigned(false) {}
812  };
813 }
814 
815 /// visitCast - Update information about the induction variable that is
816 /// extended by this sign or zero extend operation. This is used to determine
817 /// the final width of the IV before actually widening it.
818 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
819  const TargetTransformInfo *TTI) {
820  bool IsSigned = Cast->getOpcode() == Instruction::SExt;
821  if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
822  return;
823 
824  Type *Ty = Cast->getType();
825  uint64_t Width = SE->getTypeSizeInBits(Ty);
826  if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
827  return;
828 
829  // Cast is either an sext or zext up to this point.
830  // We should not widen an indvar if arithmetics on the wider indvar are more
831  // expensive than those on the narrower indvar. We check only the cost of ADD
832  // because at least an ADD is required to increment the induction variable. We
833  // could compute more comprehensively the cost of all instructions on the
834  // induction variable when necessary.
835  if (TTI &&
836  TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
837  TTI->getArithmeticInstrCost(Instruction::Add,
838  Cast->getOperand(0)->getType())) {
839  return;
840  }
841 
842  if (!WI.WidestNativeType) {
843  WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
844  WI.IsSigned = IsSigned;
845  return;
846  }
847 
848  // We extend the IV to satisfy the sign of its first user, arbitrarily.
849  if (WI.IsSigned != IsSigned)
850  return;
851 
852  if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
853  WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
854 }
855 
856 namespace {
857 
858 /// NarrowIVDefUse - Record a link in the Narrow IV def-use chain along with the
859 /// WideIV that computes the same value as the Narrow IV def. This avoids
860 /// caching Use* pointers.
861 struct NarrowIVDefUse {
862  Instruction *NarrowDef;
863  Instruction *NarrowUse;
864  Instruction *WideDef;
865 
866  NarrowIVDefUse(): NarrowDef(nullptr), NarrowUse(nullptr), WideDef(nullptr) {}
867 
868  NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD):
869  NarrowDef(ND), NarrowUse(NU), WideDef(WD) {}
870 };
871 
872 /// WidenIV - The goal of this transform is to remove sign and zero extends
873 /// without creating any new induction variables. To do this, it creates a new
874 /// phi of the wider type and redirects all users, either removing extends or
875 /// inserting truncs whenever we stop propagating the type.
876 ///
877 class WidenIV {
878  // Parameters
879  PHINode *OrigPhi;
880  Type *WideType;
881  bool IsSigned;
882 
883  // Context
884  LoopInfo *LI;
885  Loop *L;
886  ScalarEvolution *SE;
887  DominatorTree *DT;
888 
889  // Result
890  PHINode *WidePhi;
891  Instruction *WideInc;
892  const SCEV *WideIncExpr;
893  SmallVectorImpl<WeakVH> &DeadInsts;
894 
896  SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
897 
898 public:
899  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
900  ScalarEvolution *SEv, DominatorTree *DTree,
902  OrigPhi(WI.NarrowIV),
903  WideType(WI.WidestNativeType),
904  IsSigned(WI.IsSigned),
905  LI(LInfo),
906  L(LI->getLoopFor(OrigPhi->getParent())),
907  SE(SEv),
908  DT(DTree),
909  WidePhi(nullptr),
910  WideInc(nullptr),
911  WideIncExpr(nullptr),
912  DeadInsts(DI) {
913  assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
914  }
915 
916  PHINode *CreateWideIV(SCEVExpander &Rewriter);
917 
918 protected:
919  Value *getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
920  Instruction *Use);
921 
922  Instruction *CloneIVUser(NarrowIVDefUse DU);
923 
924  const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
925 
926  const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
927 
928  const SCEV *GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
929  unsigned OpCode) const;
930 
931  Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
932 
933  bool WidenLoopCompare(NarrowIVDefUse DU);
934 
935  void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
936 };
937 } // anonymous namespace
938 
939 /// isLoopInvariant - Perform a quick domtree based check for loop invariance
940 /// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems
941 /// gratuitous for this purpose.
942 static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
943  Instruction *Inst = dyn_cast<Instruction>(V);
944  if (!Inst)
945  return true;
946 
947  return DT->properlyDominates(Inst->getParent(), L->getHeader());
948 }
949 
950 Value *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
951  Instruction *Use) {
952  // Set the debug location and conservative insertion point.
953  IRBuilder<> Builder(Use);
954  // Hoist the insertion point into loop preheaders as far as possible.
955  for (const Loop *L = LI->getLoopFor(Use->getParent());
956  L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
957  L = L->getParentLoop())
958  Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
959 
960  return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
961  Builder.CreateZExt(NarrowOper, WideType);
962 }
963 
964 /// CloneIVUser - Instantiate a wide operation to replace a narrow
965 /// operation. This only needs to handle operations that can evaluation to
966 /// SCEVAddRec. It can safely return 0 for any operation we decide not to clone.
967 Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
968  unsigned Opcode = DU.NarrowUse->getOpcode();
969  switch (Opcode) {
970  default:
971  return nullptr;
972  case Instruction::Add:
973  case Instruction::Mul:
974  case Instruction::UDiv:
975  case Instruction::Sub:
976  case Instruction::And:
977  case Instruction::Or:
978  case Instruction::Xor:
979  case Instruction::Shl:
980  case Instruction::LShr:
981  case Instruction::AShr:
982  DEBUG(dbgs() << "Cloning IVUser: " << *DU.NarrowUse << "\n");
983 
984  // Replace NarrowDef operands with WideDef. Otherwise, we don't know
985  // anything about the narrow operand yet so must insert a [sz]ext. It is
986  // probably loop invariant and will be folded or hoisted. If it actually
987  // comes from a widened IV, it should be removed during a future call to
988  // WidenIVUse.
989  Value *LHS = (DU.NarrowUse->getOperand(0) == DU.NarrowDef) ? DU.WideDef :
990  getExtend(DU.NarrowUse->getOperand(0), WideType, IsSigned, DU.NarrowUse);
991  Value *RHS = (DU.NarrowUse->getOperand(1) == DU.NarrowDef) ? DU.WideDef :
992  getExtend(DU.NarrowUse->getOperand(1), WideType, IsSigned, DU.NarrowUse);
993 
994  BinaryOperator *NarrowBO = cast<BinaryOperator>(DU.NarrowUse);
995  BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(),
996  LHS, RHS,
997  NarrowBO->getName());
998  IRBuilder<> Builder(DU.NarrowUse);
999  Builder.Insert(WideBO);
1000  if (const OverflowingBinaryOperator *OBO =
1001  dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {
1002  if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
1003  if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
1004  }
1005  return WideBO;
1006  }
1007 }
1008 
1009 const SCEV *WidenIV::GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1010  unsigned OpCode) const {
1011  if (OpCode == Instruction::Add)
1012  return SE->getAddExpr(LHS, RHS);
1013  if (OpCode == Instruction::Sub)
1014  return SE->getMinusSCEV(LHS, RHS);
1015  if (OpCode == Instruction::Mul)
1016  return SE->getMulExpr(LHS, RHS);
1017 
1018  llvm_unreachable("Unsupported opcode.");
1019 }
1020 
1021 /// No-wrap operations can transfer sign extension of their result to their
1022 /// operands. Generate the SCEV value for the widened operation without
1023 /// actually modifying the IR yet. If the expression after extending the
1024 /// operands is an AddRec for this loop, return it.
1025 const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
1026 
1027  // Handle the common case of add<nsw/nuw>
1028  const unsigned OpCode = DU.NarrowUse->getOpcode();
1029  // Only Add/Sub/Mul instructions supported yet.
1030  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1031  OpCode != Instruction::Mul)
1032  return nullptr;
1033 
1034  // One operand (NarrowDef) has already been extended to WideDef. Now determine
1035  // if extending the other will lead to a recurrence.
1036  const unsigned ExtendOperIdx =
1037  DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1038  assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1039 
1040  const SCEV *ExtendOperExpr = nullptr;
1041  const OverflowingBinaryOperator *OBO =
1042  cast<OverflowingBinaryOperator>(DU.NarrowUse);
1043  if (IsSigned && OBO->hasNoSignedWrap())
1044  ExtendOperExpr = SE->getSignExtendExpr(
1045  SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1046  else if(!IsSigned && OBO->hasNoUnsignedWrap())
1047  ExtendOperExpr = SE->getZeroExtendExpr(
1048  SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1049  else
1050  return nullptr;
1051 
1052  // When creating this SCEV expr, don't apply the current operations NSW or NUW
1053  // flags. This instruction may be guarded by control flow that the no-wrap
1054  // behavior depends on. Non-control-equivalent instructions can be mapped to
1055  // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1056  // semantics to those operations.
1057  const SCEV *lhs = SE->getSCEV(DU.WideDef);
1058  const SCEV *rhs = ExtendOperExpr;
1059 
1060  // Let's swap operands to the initial order for the case of non-commutative
1061  // operations, like SUB. See PR21014.
1062  if (ExtendOperIdx == 0)
1063  std::swap(lhs, rhs);
1064  const SCEVAddRecExpr *AddRec =
1065  dyn_cast<SCEVAddRecExpr>(GetSCEVByOpCode(lhs, rhs, OpCode));
1066 
1067  if (!AddRec || AddRec->getLoop() != L)
1068  return nullptr;
1069  return AddRec;
1070 }
1071 
1072 /// GetWideRecurrence - Is this instruction potentially interesting for further
1073 /// simplification after widening it's type? In other words, can the
1074 /// extend be safely hoisted out of the loop with SCEV reducing the value to a
1075 /// recurrence on the same loop. If so, return the sign or zero extended
1076 /// recurrence. Otherwise return NULL.
1077 const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
1078  if (!SE->isSCEVable(NarrowUse->getType()))
1079  return nullptr;
1080 
1081  const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
1082  if (SE->getTypeSizeInBits(NarrowExpr->getType())
1083  >= SE->getTypeSizeInBits(WideType)) {
1084  // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1085  // index. So don't follow this use.
1086  return nullptr;
1087  }
1088 
1089  const SCEV *WideExpr = IsSigned ?
1090  SE->getSignExtendExpr(NarrowExpr, WideType) :
1091  SE->getZeroExtendExpr(NarrowExpr, WideType);
1092  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1093  if (!AddRec || AddRec->getLoop() != L)
1094  return nullptr;
1095  return AddRec;
1096 }
1097 
1098 /// This IV user cannot be widen. Replace this use of the original narrow IV
1099 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1100 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) {
1101  DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef
1102  << " for user " << *DU.NarrowUse << "\n");
1103  IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
1104  Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1105  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1106 }
1107 
1108 /// If the narrow use is a compare instruction, then widen the compare
1109 // (and possibly the other operand). The extend operation is hoisted into the
1110 // loop preheader as far as possible.
1111 bool WidenIV::WidenLoopCompare(NarrowIVDefUse DU) {
1112  ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1113  if (!Cmp)
1114  return false;
1115 
1116  // Sign of IV user and compare must match.
1117  if (IsSigned != CmpInst::isSigned(Cmp->getPredicate()))
1118  return false;
1119 
1120  Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1121  unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1122  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1123  assert (CastWidth <= IVWidth && "Unexpected width while widening compare.");
1124 
1125  // Widen the compare instruction.
1126  IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
1127  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1128 
1129  // Widen the other operand of the compare, if necessary.
1130  if (CastWidth < IVWidth) {
1131  Value *ExtOp = getExtend(Op, WideType, IsSigned, Cmp);
1132  DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1133  }
1134  return true;
1135 }
1136 
1137 /// WidenIVUse - Determine whether an individual user of the narrow IV can be
1138 /// widened. If so, return the wide clone of the user.
1139 Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1140 
1141  // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1142  if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1143  if (LI->getLoopFor(UsePhi->getParent()) != L) {
1144  // For LCSSA phis, sink the truncate outside the loop.
1145  // After SimplifyCFG most loop exit targets have a single predecessor.
1146  // Otherwise fall back to a truncate within the loop.
1147  if (UsePhi->getNumOperands() != 1)
1148  truncateIVUse(DU, DT);
1149  else {
1150  PHINode *WidePhi =
1151  PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1152  UsePhi);
1153  WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1154  IRBuilder<> Builder(WidePhi->getParent()->getFirstInsertionPt());
1155  Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1156  UsePhi->replaceAllUsesWith(Trunc);
1157  DeadInsts.emplace_back(UsePhi);
1158  DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi
1159  << " to " << *WidePhi << "\n");
1160  }
1161  return nullptr;
1162  }
1163  }
1164  // Our raison d'etre! Eliminate sign and zero extension.
1165  if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) {
1166  Value *NewDef = DU.WideDef;
1167  if (DU.NarrowUse->getType() != WideType) {
1168  unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1169  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1170  if (CastWidth < IVWidth) {
1171  // The cast isn't as wide as the IV, so insert a Trunc.
1172  IRBuilder<> Builder(DU.NarrowUse);
1173  NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1174  }
1175  else {
1176  // A wider extend was hidden behind a narrower one. This may induce
1177  // another round of IV widening in which the intermediate IV becomes
1178  // dead. It should be very rare.
1179  DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1180  << " not wide enough to subsume " << *DU.NarrowUse << "\n");
1181  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1182  NewDef = DU.NarrowUse;
1183  }
1184  }
1185  if (NewDef != DU.NarrowUse) {
1186  DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1187  << " replaced by " << *DU.WideDef << "\n");
1188  ++NumElimExt;
1189  DU.NarrowUse->replaceAllUsesWith(NewDef);
1190  DeadInsts.emplace_back(DU.NarrowUse);
1191  }
1192  // Now that the extend is gone, we want to expose it's uses for potential
1193  // further simplification. We don't need to directly inform SimplifyIVUsers
1194  // of the new users, because their parent IV will be processed later as a
1195  // new loop phi. If we preserved IVUsers analysis, we would also want to
1196  // push the uses of WideDef here.
1197 
1198  // No further widening is needed. The deceased [sz]ext had done it for us.
1199  return nullptr;
1200  }
1201 
1202  // Does this user itself evaluate to a recurrence after widening?
1203  const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse);
1204  if (!WideAddRec)
1205  WideAddRec = GetExtendedOperandRecurrence(DU);
1206 
1207  if (!WideAddRec) {
1208  // If use is a loop condition, try to promote the condition instead of
1209  // truncating the IV first.
1210  if (WidenLoopCompare(DU))
1211  return nullptr;
1212 
1213  // This user does not evaluate to a recurence after widening, so don't
1214  // follow it. Instead insert a Trunc to kill off the original use,
1215  // eventually isolating the original narrow IV so it can be removed.
1216  truncateIVUse(DU, DT);
1217  return nullptr;
1218  }
1219  // Assume block terminators cannot evaluate to a recurrence. We can't to
1220  // insert a Trunc after a terminator if there happens to be a critical edge.
1221  assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1222  "SCEV is not expected to evaluate a block terminator");
1223 
1224  // Reuse the IV increment that SCEVExpander created as long as it dominates
1225  // NarrowUse.
1226  Instruction *WideUse = nullptr;
1227  if (WideAddRec == WideIncExpr
1228  && Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1229  WideUse = WideInc;
1230  else {
1231  WideUse = CloneIVUser(DU);
1232  if (!WideUse)
1233  return nullptr;
1234  }
1235  // Evaluation of WideAddRec ensured that the narrow expression could be
1236  // extended outside the loop without overflow. This suggests that the wide use
1237  // evaluates to the same expression as the extended narrow use, but doesn't
1238  // absolutely guarantee it. Hence the following failsafe check. In rare cases
1239  // where it fails, we simply throw away the newly created wide use.
1240  if (WideAddRec != SE->getSCEV(WideUse)) {
1241  DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
1242  << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
1243  DeadInsts.emplace_back(WideUse);
1244  return nullptr;
1245  }
1246 
1247  // Returning WideUse pushes it on the worklist.
1248  return WideUse;
1249 }
1250 
1251 /// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers.
1252 ///
1253 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1254  for (User *U : NarrowDef->users()) {
1255  Instruction *NarrowUser = cast<Instruction>(U);
1256 
1257  // Handle data flow merges and bizarre phi cycles.
1258  if (!Widened.insert(NarrowUser).second)
1259  continue;
1260 
1261  NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUser, WideDef));
1262  }
1263 }
1264 
1265 /// CreateWideIV - Process a single induction variable. First use the
1266 /// SCEVExpander to create a wide induction variable that evaluates to the same
1267 /// recurrence as the original narrow IV. Then use a worklist to forward
1268 /// traverse the narrow IV's def-use chain. After WidenIVUse has processed all
1269 /// interesting IV users, the narrow IV will be isolated for removal by
1270 /// DeleteDeadPHIs.
1271 ///
1272 /// It would be simpler to delete uses as they are processed, but we must avoid
1273 /// invalidating SCEV expressions.
1274 ///
1275 PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
1276  // Is this phi an induction variable?
1277  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1278  if (!AddRec)
1279  return nullptr;
1280 
1281  // Widen the induction variable expression.
1282  const SCEV *WideIVExpr = IsSigned ?
1283  SE->getSignExtendExpr(AddRec, WideType) :
1284  SE->getZeroExtendExpr(AddRec, WideType);
1285 
1286  assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1287  "Expect the new IV expression to preserve its type");
1288 
1289  // Can the IV be extended outside the loop without overflow?
1290  AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1291  if (!AddRec || AddRec->getLoop() != L)
1292  return nullptr;
1293 
1294  // An AddRec must have loop-invariant operands. Since this AddRec is
1295  // materialized by a loop header phi, the expression cannot have any post-loop
1296  // operands, so they must dominate the loop header.
1297  assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1298  SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader())
1299  && "Loop header phi recurrence inputs do not dominate the loop");
1300 
1301  // The rewriter provides a value for the desired IV expression. This may
1302  // either find an existing phi or materialize a new one. Either way, we
1303  // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1304  // of the phi-SCC dominates the loop entry.
1305  Instruction *InsertPt = L->getHeader()->begin();
1306  WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1307 
1308  // Remembering the WideIV increment generated by SCEVExpander allows
1309  // WidenIVUse to reuse it when widening the narrow IV's increment. We don't
1310  // employ a general reuse mechanism because the call above is the only call to
1311  // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1312  if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1313  WideInc =
1314  cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1315  WideIncExpr = SE->getSCEV(WideInc);
1316  }
1317 
1318  DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1319  ++NumWidened;
1320 
1321  // Traverse the def-use chain using a worklist starting at the original IV.
1322  assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1323 
1324  Widened.insert(OrigPhi);
1325  pushNarrowIVUsers(OrigPhi, WidePhi);
1326 
1327  while (!NarrowIVUsers.empty()) {
1328  NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1329 
1330  // Process a def-use edge. This may replace the use, so don't hold a
1331  // use_iterator across it.
1332  Instruction *WideUse = WidenIVUse(DU, Rewriter);
1333 
1334  // Follow all def-use edges from the previous narrow use.
1335  if (WideUse)
1336  pushNarrowIVUsers(DU.NarrowUse, WideUse);
1337 
1338  // WidenIVUse may have removed the def-use edge.
1339  if (DU.NarrowDef->use_empty())
1340  DeadInsts.emplace_back(DU.NarrowDef);
1341  }
1342  return WidePhi;
1343 }
1344 
1345 //===----------------------------------------------------------------------===//
1346 // Live IV Reduction - Minimize IVs live across the loop.
1347 //===----------------------------------------------------------------------===//
1348 
1349 
1350 //===----------------------------------------------------------------------===//
1351 // Simplification of IV users based on SCEV evaluation.
1352 //===----------------------------------------------------------------------===//
1353 
1354 namespace {
1355  class IndVarSimplifyVisitor : public IVVisitor {
1356  ScalarEvolution *SE;
1357  const TargetTransformInfo *TTI;
1358  PHINode *IVPhi;
1359 
1360  public:
1361  WideIVInfo WI;
1362 
1363  IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1364  const TargetTransformInfo *TTI,
1365  const DominatorTree *DTree)
1366  : SE(SCEV), TTI(TTI), IVPhi(IV) {
1367  DT = DTree;
1368  WI.NarrowIV = IVPhi;
1369  if (ReduceLiveIVs)
1370  setSplitOverflowIntrinsics();
1371  }
1372 
1373  // Implement the interface used by simplifyUsersOfIV.
1374  void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1375  };
1376 }
1377 
1378 /// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV
1379 /// users. Each successive simplification may push more users which may
1380 /// themselves be candidates for simplification.
1381 ///
1382 /// Sign/Zero extend elimination is interleaved with IV simplification.
1383 ///
1384 void IndVarSimplify::SimplifyAndExtend(Loop *L,
1385  SCEVExpander &Rewriter,
1386  LPPassManager &LPM) {
1388 
1389  SmallVector<PHINode*, 8> LoopPhis;
1390  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1391  LoopPhis.push_back(cast<PHINode>(I));
1392  }
1393  // Each round of simplification iterates through the SimplifyIVUsers worklist
1394  // for all current phis, then determines whether any IVs can be
1395  // widened. Widening adds new phis to LoopPhis, inducing another round of
1396  // simplification on the wide IVs.
1397  while (!LoopPhis.empty()) {
1398  // Evaluate as many IV expressions as possible before widening any IVs. This
1399  // forces SCEV to set no-wrap flags before evaluating sign/zero
1400  // extension. The first time SCEV attempts to normalize sign/zero extension,
1401  // the result becomes final. So for the most predictable results, we delay
1402  // evaluation of sign/zero extend evaluation until needed, and avoid running
1403  // other SCEV based analysis prior to SimplifyAndExtend.
1404  do {
1405  PHINode *CurrIV = LoopPhis.pop_back_val();
1406 
1407  // Information about sign/zero extensions of CurrIV.
1408  IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1409 
1410  Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &Visitor);
1411 
1412  if (Visitor.WI.WidestNativeType) {
1413  WideIVs.push_back(Visitor.WI);
1414  }
1415  } while(!LoopPhis.empty());
1416 
1417  for (; !WideIVs.empty(); WideIVs.pop_back()) {
1418  WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts);
1419  if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
1420  Changed = true;
1421  LoopPhis.push_back(WidePhi);
1422  }
1423  }
1424  }
1425 }
1426 
1427 //===----------------------------------------------------------------------===//
1428 // LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1429 //===----------------------------------------------------------------------===//
1430 
1431 /// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
1432 /// count expression can be safely and cheaply expanded into an instruction
1433 /// sequence that can be used by LinearFunctionTestReplace.
1434 ///
1435 /// TODO: This fails for pointer-type loop counters with greater than one byte
1436 /// strides, consequently preventing LFTR from running. For the purpose of LFTR
1437 /// we could skip this check in the case that the LFTR loop counter (chosen by
1438 /// FindLoopCounter) is also pointer type. Instead, we could directly convert
1439 /// the loop test to an inequality test by checking the target data's alignment
1440 /// of element types (given that the initial pointer value originates from or is
1441 /// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
1442 /// However, we don't yet have a strong motivation for converting loop tests
1443 /// into inequality tests.
1445  SCEVExpander &Rewriter) {
1446  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1447  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
1448  BackedgeTakenCount->isZero())
1449  return false;
1450 
1451  if (!L->getExitingBlock())
1452  return false;
1453 
1454  // Can't rewrite non-branch yet.
1455  if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
1456  return false;
1457 
1458  if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
1459  return false;
1460 
1461  return true;
1462 }
1463 
1464 /// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop
1465 /// invariant value to the phi.
1467  Instruction *IncI = dyn_cast<Instruction>(IncV);
1468  if (!IncI)
1469  return nullptr;
1470 
1471  switch (IncI->getOpcode()) {
1472  case Instruction::Add:
1473  case Instruction::Sub:
1474  break;
1475  case Instruction::GetElementPtr:
1476  // An IV counter must preserve its type.
1477  if (IncI->getNumOperands() == 2)
1478  break;
1479  default:
1480  return nullptr;
1481  }
1482 
1483  PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
1484  if (Phi && Phi->getParent() == L->getHeader()) {
1485  if (isLoopInvariant(IncI->getOperand(1), L, DT))
1486  return Phi;
1487  return nullptr;
1488  }
1489  if (IncI->getOpcode() == Instruction::GetElementPtr)
1490  return nullptr;
1491 
1492  // Allow add/sub to be commuted.
1493  Phi = dyn_cast<PHINode>(IncI->getOperand(1));
1494  if (Phi && Phi->getParent() == L->getHeader()) {
1495  if (isLoopInvariant(IncI->getOperand(0), L, DT))
1496  return Phi;
1497  }
1498  return nullptr;
1499 }
1500 
1501 /// Return the compare guarding the loop latch, or NULL for unrecognized tests.
1503  assert(L->getExitingBlock() && "expected loop exit");
1504 
1505  BasicBlock *LatchBlock = L->getLoopLatch();
1506  // Don't bother with LFTR if the loop is not properly simplified.
1507  if (!LatchBlock)
1508  return nullptr;
1509 
1510  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
1511  assert(BI && "expected exit branch");
1512 
1513  return dyn_cast<ICmpInst>(BI->getCondition());
1514 }
1515 
1516 /// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show
1517 /// that the current exit test is already sufficiently canonical.
1518 static bool needsLFTR(Loop *L, DominatorTree *DT) {
1519  // Do LFTR to simplify the exit condition to an ICMP.
1520  ICmpInst *Cond = getLoopTest(L);
1521  if (!Cond)
1522  return true;
1523 
1524  // Do LFTR to simplify the exit ICMP to EQ/NE
1525  ICmpInst::Predicate Pred = Cond->getPredicate();
1526  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
1527  return true;
1528 
1529  // Look for a loop invariant RHS
1530  Value *LHS = Cond->getOperand(0);
1531  Value *RHS = Cond->getOperand(1);
1532  if (!isLoopInvariant(RHS, L, DT)) {
1533  if (!isLoopInvariant(LHS, L, DT))
1534  return true;
1535  std::swap(LHS, RHS);
1536  }
1537  // Look for a simple IV counter LHS
1538  PHINode *Phi = dyn_cast<PHINode>(LHS);
1539  if (!Phi)
1540  Phi = getLoopPhiForCounter(LHS, L, DT);
1541 
1542  if (!Phi)
1543  return true;
1544 
1545  // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
1546  int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
1547  if (Idx < 0)
1548  return true;
1549 
1550  // Do LFTR if the exit condition's IV is *not* a simple counter.
1551  Value *IncV = Phi->getIncomingValue(Idx);
1552  return Phi != getLoopPhiForCounter(IncV, L, DT);
1553 }
1554 
1555 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
1556 /// down to checking that all operands are constant and listing instructions
1557 /// that may hide undef.
1559  unsigned Depth) {
1560  if (isa<Constant>(V))
1561  return !isa<UndefValue>(V);
1562 
1563  if (Depth >= 6)
1564  return false;
1565 
1566  // Conservatively handle non-constant non-instructions. For example, Arguments
1567  // may be undef.
1568  Instruction *I = dyn_cast<Instruction>(V);
1569  if (!I)
1570  return false;
1571 
1572  // Load and return values may be undef.
1573  if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
1574  return false;
1575 
1576  // Optimistically handle other instructions.
1577  for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) {
1578  if (!Visited.insert(*OI).second)
1579  continue;
1580  if (!hasConcreteDefImpl(*OI, Visited, Depth+1))
1581  return false;
1582  }
1583  return true;
1584 }
1585 
1586 /// Return true if the given value is concrete. We must prove that undef can
1587 /// never reach it.
1588 ///
1589 /// TODO: If we decide that this is a good approach to checking for undef, we
1590 /// may factor it into a common location.
1591 static bool hasConcreteDef(Value *V) {
1592  SmallPtrSet<Value*, 8> Visited;
1593  Visited.insert(V);
1594  return hasConcreteDefImpl(V, Visited, 0);
1595 }
1596 
1597 /// AlmostDeadIV - Return true if this IV has any uses other than the (soon to
1598 /// be rewritten) loop exit test.
1599 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
1600  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
1601  Value *IncV = Phi->getIncomingValue(LatchIdx);
1602 
1603  for (User *U : Phi->users())
1604  if (U != Cond && U != IncV) return false;
1605 
1606  for (User *U : IncV->users())
1607  if (U != Cond && U != Phi) return false;
1608  return true;
1609 }
1610 
1611 /// FindLoopCounter - Find an affine IV in canonical form.
1612 ///
1613 /// BECount may be an i8* pointer type. The pointer difference is already
1614 /// valid count without scaling the address stride, so it remains a pointer
1615 /// expression as far as SCEV is concerned.
1616 ///
1617 /// Currently only valid for LFTR. See the comments on hasConcreteDef below.
1618 ///
1619 /// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
1620 ///
1621 /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
1622 /// This is difficult in general for SCEV because of potential overflow. But we
1623 /// could at least handle constant BECounts.
1624 static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
1625  ScalarEvolution *SE, DominatorTree *DT) {
1626  uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
1627 
1628  Value *Cond =
1629  cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
1630 
1631  // Loop over all of the PHI nodes, looking for a simple counter.
1632  PHINode *BestPhi = nullptr;
1633  const SCEV *BestInit = nullptr;
1634  BasicBlock *LatchBlock = L->getLoopLatch();
1635  assert(LatchBlock && "needsLFTR should guarantee a loop latch");
1636 
1637  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1638  PHINode *Phi = cast<PHINode>(I);
1639  if (!SE->isSCEVable(Phi->getType()))
1640  continue;
1641 
1642  // Avoid comparing an integer IV against a pointer Limit.
1643  if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
1644  continue;
1645 
1646  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1647  if (!AR || AR->getLoop() != L || !AR->isAffine())
1648  continue;
1649 
1650  // AR may be a pointer type, while BECount is an integer type.
1651  // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
1652  // AR may not be a narrower type, or we may never exit.
1653  uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
1654  if (PhiWidth < BCWidth ||
1655  !L->getHeader()->getModule()->getDataLayout().isLegalInteger(PhiWidth))
1656  continue;
1657 
1658  const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
1659  if (!Step || !Step->isOne())
1660  continue;
1661 
1662  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
1663  Value *IncV = Phi->getIncomingValue(LatchIdx);
1664  if (getLoopPhiForCounter(IncV, L, DT) != Phi)
1665  continue;
1666 
1667  // Avoid reusing a potentially undef value to compute other values that may
1668  // have originally had a concrete definition.
1669  if (!hasConcreteDef(Phi)) {
1670  // We explicitly allow unknown phis as long as they are already used by
1671  // the loop test. In this case we assume that performing LFTR could not
1672  // increase the number of undef users.
1673  if (ICmpInst *Cond = getLoopTest(L)) {
1674  if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT)
1675  && Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
1676  continue;
1677  }
1678  }
1679  }
1680  const SCEV *Init = AR->getStart();
1681 
1682  if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
1683  // Don't force a live loop counter if another IV can be used.
1684  if (AlmostDeadIV(Phi, LatchBlock, Cond))
1685  continue;
1686 
1687  // Prefer to count-from-zero. This is a more "canonical" counter form. It
1688  // also prefers integer to pointer IVs.
1689  if (BestInit->isZero() != Init->isZero()) {
1690  if (BestInit->isZero())
1691  continue;
1692  }
1693  // If two IVs both count from zero or both count from nonzero then the
1694  // narrower is likely a dead phi that has been widened. Use the wider phi
1695  // to allow the other to be eliminated.
1696  else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
1697  continue;
1698  }
1699  BestPhi = Phi;
1700  BestInit = Init;
1701  }
1702  return BestPhi;
1703 }
1704 
1705 /// genLoopLimit - Help LinearFunctionTestReplace by generating a value that
1706 /// holds the RHS of the new loop test.
1707 static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
1708  SCEVExpander &Rewriter, ScalarEvolution *SE) {
1709  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
1710  assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
1711  const SCEV *IVInit = AR->getStart();
1712 
1713  // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
1714  // finds a valid pointer IV. Sign extend BECount in order to materialize a
1715  // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
1716  // the existing GEPs whenever possible.
1717  if (IndVar->getType()->isPointerTy()
1718  && !IVCount->getType()->isPointerTy()) {
1719 
1720  // IVOffset will be the new GEP offset that is interpreted by GEP as a
1721  // signed value. IVCount on the other hand represents the loop trip count,
1722  // which is an unsigned value. FindLoopCounter only allows induction
1723  // variables that have a positive unit stride of one. This means we don't
1724  // have to handle the case of negative offsets (yet) and just need to zero
1725  // extend IVCount.
1726  Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
1727  const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
1728 
1729  // Expand the code for the iteration count.
1730  assert(SE->isLoopInvariant(IVOffset, L) &&
1731  "Computed iteration count is not loop invariant!");
1732  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
1733  Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
1734 
1735  Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
1736  assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
1737  // We could handle pointer IVs other than i8*, but we need to compensate for
1738  // gep index scaling. See canExpandBackedgeTakenCount comments.
1739  assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
1740  cast<PointerType>(GEPBase->getType())->getElementType())->isOne()
1741  && "unit stride pointer IV must be i8*");
1742 
1743  IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
1744  return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
1745  }
1746  else {
1747  // In any other case, convert both IVInit and IVCount to integers before
1748  // comparing. This may result in SCEV expension of pointers, but in practice
1749  // SCEV will fold the pointer arithmetic away as such:
1750  // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
1751  //
1752  // Valid Cases: (1) both integers is most common; (2) both may be pointers
1753  // for simple memset-style loops.
1754  //
1755  // IVInit integer and IVCount pointer would only occur if a canonical IV
1756  // were generated on top of case #2, which is not expected.
1757 
1758  const SCEV *IVLimit = nullptr;
1759  // For unit stride, IVCount = Start + BECount with 2's complement overflow.
1760  // For non-zero Start, compute IVCount here.
1761  if (AR->getStart()->isZero())
1762  IVLimit = IVCount;
1763  else {
1764  assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
1765  const SCEV *IVInit = AR->getStart();
1766 
1767  // For integer IVs, truncate the IV before computing IVInit + BECount.
1768  if (SE->getTypeSizeInBits(IVInit->getType())
1769  > SE->getTypeSizeInBits(IVCount->getType()))
1770  IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
1771 
1772  IVLimit = SE->getAddExpr(IVInit, IVCount);
1773  }
1774  // Expand the code for the iteration count.
1775  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
1776  IRBuilder<> Builder(BI);
1777  assert(SE->isLoopInvariant(IVLimit, L) &&
1778  "Computed iteration count is not loop invariant!");
1779  // Ensure that we generate the same type as IndVar, or a smaller integer
1780  // type. In the presence of null pointer values, we have an integer type
1781  // SCEV expression (IVInit) for a pointer type IV value (IndVar).
1782  Type *LimitTy = IVCount->getType()->isPointerTy() ?
1783  IndVar->getType() : IVCount->getType();
1784  return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
1785  }
1786 }
1787 
1788 /// LinearFunctionTestReplace - This method rewrites the exit condition of the
1789 /// loop to be a canonical != comparison against the incremented loop induction
1790 /// variable. This pass is able to rewrite the exit tests of any loop where the
1791 /// SCEV analysis can determine a loop-invariant trip count of the loop, which
1792 /// is actually a much broader range than just linear tests.
1793 Value *IndVarSimplify::
1794 LinearFunctionTestReplace(Loop *L,
1795  const SCEV *BackedgeTakenCount,
1796  PHINode *IndVar,
1797  SCEVExpander &Rewriter) {
1798  assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
1799 
1800  // Initialize CmpIndVar and IVCount to their preincremented values.
1801  Value *CmpIndVar = IndVar;
1802  const SCEV *IVCount = BackedgeTakenCount;
1803 
1804  // If the exiting block is the same as the backedge block, we prefer to
1805  // compare against the post-incremented value, otherwise we must compare
1806  // against the preincremented value.
1807  if (L->getExitingBlock() == L->getLoopLatch()) {
1808  // Add one to the "backedge-taken" count to get the trip count.
1809  // This addition may overflow, which is valid as long as the comparison is
1810  // truncated to BackedgeTakenCount->getType().
1811  IVCount = SE->getAddExpr(BackedgeTakenCount,
1812  SE->getConstant(BackedgeTakenCount->getType(), 1));
1813  // The BackedgeTaken expression contains the number of times that the
1814  // backedge branches to the loop header. This is one less than the
1815  // number of times the loop executes, so use the incremented indvar.
1816  CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
1817  }
1818 
1819  Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
1820  assert(ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy()
1821  && "genLoopLimit missed a cast");
1822 
1823  // Insert a new icmp_ne or icmp_eq instruction before the branch.
1824  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
1826  if (L->contains(BI->getSuccessor(0)))
1827  P = ICmpInst::ICMP_NE;
1828  else
1829  P = ICmpInst::ICMP_EQ;
1830 
1831  DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1832  << " LHS:" << *CmpIndVar << '\n'
1833  << " op:\t"
1834  << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
1835  << " RHS:\t" << *ExitCnt << "\n"
1836  << " IVCount:\t" << *IVCount << "\n");
1837 
1838  IRBuilder<> Builder(BI);
1839 
1840  // LFTR can ignore IV overflow and truncate to the width of
1841  // BECount. This avoids materializing the add(zext(add)) expression.
1842  unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1843  unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1844  if (CmpIndVarSize > ExitCntSize) {
1845  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
1846  const SCEV *ARStart = AR->getStart();
1847  const SCEV *ARStep = AR->getStepRecurrence(*SE);
1848  // For constant IVCount, avoid truncation.
1849  if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
1850  const APInt &Start = cast<SCEVConstant>(ARStart)->getValue()->getValue();
1851  APInt Count = cast<SCEVConstant>(IVCount)->getValue()->getValue();
1852  // Note that the post-inc value of BackedgeTakenCount may have overflowed
1853  // above such that IVCount is now zero.
1854  if (IVCount != BackedgeTakenCount && Count == 0) {
1855  Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
1856  ++Count;
1857  }
1858  else
1859  Count = Count.zext(CmpIndVarSize);
1860  APInt NewLimit;
1861  if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
1862  NewLimit = Start - Count;
1863  else
1864  NewLimit = Start + Count;
1865  ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
1866 
1867  DEBUG(dbgs() << " Widen RHS:\t" << *ExitCnt << "\n");
1868  } else {
1869  CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1870  "lftr.wideiv");
1871  }
1872  }
1873  Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1874  Value *OrigCond = BI->getCondition();
1875  // It's tempting to use replaceAllUsesWith here to fully replace the old
1876  // comparison, but that's not immediately safe, since users of the old
1877  // comparison may not be dominated by the new comparison. Instead, just
1878  // update the branch to use the new comparison; in the common case this
1879  // will make old comparison dead.
1880  BI->setCondition(Cond);
1881  DeadInsts.push_back(OrigCond);
1882 
1883  ++NumLFTR;
1884  Changed = true;
1885  return Cond;
1886 }
1887 
1888 //===----------------------------------------------------------------------===//
1889 // SinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1890 //===----------------------------------------------------------------------===//
1891 
1892 /// If there's a single exit block, sink any loop-invariant values that
1893 /// were defined in the preheader but not used inside the loop into the
1894 /// exit block to reduce register pressure in the loop.
1895 void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
1896  BasicBlock *ExitBlock = L->getExitBlock();
1897  if (!ExitBlock) return;
1898 
1899  BasicBlock *Preheader = L->getLoopPreheader();
1900  if (!Preheader) return;
1901 
1902  Instruction *InsertPt = ExitBlock->getFirstInsertionPt();
1903  BasicBlock::iterator I = Preheader->getTerminator();
1904  while (I != Preheader->begin()) {
1905  --I;
1906  // New instructions were inserted at the end of the preheader.
1907  if (isa<PHINode>(I))
1908  break;
1909 
1910  // Don't move instructions which might have side effects, since the side
1911  // effects need to complete before instructions inside the loop. Also don't
1912  // move instructions which might read memory, since the loop may modify
1913  // memory. Note that it's okay if the instruction might have undefined
1914  // behavior: LoopSimplify guarantees that the preheader dominates the exit
1915  // block.
1916  if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1917  continue;
1918 
1919  // Skip debug info intrinsics.
1920  if (isa<DbgInfoIntrinsic>(I))
1921  continue;
1922 
1923  // Skip landingpad instructions.
1924  if (isa<LandingPadInst>(I))
1925  continue;
1926 
1927  // Don't sink alloca: we never want to sink static alloca's out of the
1928  // entry block, and correctly sinking dynamic alloca's requires
1929  // checks for stacksave/stackrestore intrinsics.
1930  // FIXME: Refactor this check somehow?
1931  if (isa<AllocaInst>(I))
1932  continue;
1933 
1934  // Determine if there is a use in or before the loop (direct or
1935  // otherwise).
1936  bool UsedInLoop = false;
1937  for (Use &U : I->uses()) {
1938  Instruction *User = cast<Instruction>(U.getUser());
1939  BasicBlock *UseBB = User->getParent();
1940  if (PHINode *P = dyn_cast<PHINode>(User)) {
1941  unsigned i =
1942  PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1943  UseBB = P->getIncomingBlock(i);
1944  }
1945  if (UseBB == Preheader || L->contains(UseBB)) {
1946  UsedInLoop = true;
1947  break;
1948  }
1949  }
1950 
1951  // If there is, the def must remain in the preheader.
1952  if (UsedInLoop)
1953  continue;
1954 
1955  // Otherwise, sink it to the exit block.
1956  Instruction *ToMove = I;
1957  bool Done = false;
1958 
1959  if (I != Preheader->begin()) {
1960  // Skip debug info intrinsics.
1961  do {
1962  --I;
1963  } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
1964 
1965  if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
1966  Done = true;
1967  } else {
1968  Done = true;
1969  }
1970 
1971  ToMove->moveBefore(InsertPt);
1972  if (Done) break;
1973  InsertPt = ToMove;
1974  }
1975 }
1976 
1977 //===----------------------------------------------------------------------===//
1978 // IndVarSimplify driver. Manage several subpasses of IV simplification.
1979 //===----------------------------------------------------------------------===//
1980 
1981 bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
1982  if (skipOptnoneFunction(L))
1983  return false;
1984 
1985  // If LoopSimplify form is not available, stay out of trouble. Some notes:
1986  // - LSR currently only supports LoopSimplify-form loops. Indvars'
1987  // canonicalization can be a pessimization without LSR to "clean up"
1988  // afterwards.
1989  // - We depend on having a preheader; in particular,
1990  // Loop::getCanonicalInductionVariable only supports loops with preheaders,
1991  // and we're in trouble if we can't find the induction variable even when
1992  // we've manually inserted one.
1993  if (!L->isLoopSimplifyForm())
1994  return false;
1995 
1996  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1997  SE = &getAnalysis<ScalarEvolution>();
1998  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1999  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2000  TLI = TLIP ? &TLIP->getTLI() : nullptr;
2001  auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2002  TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2003  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2004 
2005  DeadInsts.clear();
2006  Changed = false;
2007 
2008  // If there are any floating-point recurrences, attempt to
2009  // transform them to use integer recurrences.
2010  RewriteNonIntegerIVs(L);
2011 
2012  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2013 
2014  // Create a rewriter object which we'll use to transform the code with.
2015  SCEVExpander Rewriter(*SE, DL, "indvars");
2016 #ifndef NDEBUG
2017  Rewriter.setDebugType(DEBUG_TYPE);
2018 #endif
2019 
2020  // Eliminate redundant IV users.
2021  //
2022  // Simplification works best when run before other consumers of SCEV. We
2023  // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2024  // other expressions involving loop IVs have been evaluated. This helps SCEV
2025  // set no-wrap flags before normalizing sign/zero extension.
2026  Rewriter.disableCanonicalMode();
2027  SimplifyAndExtend(L, Rewriter, LPM);
2028 
2029  // Check to see if this loop has a computable loop-invariant execution count.
2030  // If so, this means that we can compute the final value of any expressions
2031  // that are recurrent in the loop, and substitute the exit values from the
2032  // loop into any instructions outside of the loop that use the final values of
2033  // the current expressions.
2034  //
2035  if (ReplaceExitValue != NeverRepl &&
2036  !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2037  RewriteLoopExitValues(L, Rewriter);
2038 
2039  // Eliminate redundant IV cycles.
2040  NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2041 
2042  // If we have a trip count expression, rewrite the loop's exit condition
2043  // using it. We can currently only handle loops with a single exit.
2044  if (canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L, DT)) {
2045  PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
2046  if (IndVar) {
2047  // Check preconditions for proper SCEVExpander operation. SCEV does not
2048  // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
2049  // pass that uses the SCEVExpander must do it. This does not work well for
2050  // loop passes because SCEVExpander makes assumptions about all loops,
2051  // while LoopPassManager only forces the current loop to be simplified.
2052  //
2053  // FIXME: SCEV expansion has no way to bail out, so the caller must
2054  // explicitly check any assumptions made by SCEV. Brittle.
2055  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
2056  if (!AR || AR->getLoop()->getLoopPreheader())
2057  (void)LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
2058  Rewriter);
2059  }
2060  }
2061  // Clear the rewriter cache, because values that are in the rewriter's cache
2062  // can be deleted in the loop below, causing the AssertingVH in the cache to
2063  // trigger.
2064  Rewriter.clear();
2065 
2066  // Now that we're done iterating through lists, clean up any instructions
2067  // which are now dead.
2068  while (!DeadInsts.empty())
2069  if (Instruction *Inst =
2070  dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2072 
2073  // The Rewriter may not be used from this point on.
2074 
2075  // Loop-invariant instructions in the preheader that aren't used in the
2076  // loop may be sunk below the loop to reduce register pressure.
2077  SinkUnusedInvariants(L);
2078 
2079  // Clean up dead instructions.
2080  Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2081  // Check a post-condition.
2082  assert(L->isLCSSAForm(*DT) &&
2083  "Indvars did not leave the loop in lcssa form!");
2084 
2085  // Verify that LFTR, and any other change have not interfered with SCEV's
2086  // ability to compute trip count.
2087 #ifndef NDEBUG
2088  if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2089  SE->forgetLoop(L);
2090  const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2091  if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2092  SE->getTypeSizeInBits(NewBECount->getType()))
2093  NewBECount = SE->getTruncateOrNoop(NewBECount,
2094  BackedgeTakenCount->getType());
2095  else
2096  BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2097  NewBECount->getType());
2098  assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
2099  }
2100 #endif
2101 
2102  return Changed;
2103 }
bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:276
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:82
Value * CreateGEP(Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1032
ValuesClass< DataType > LLVM_END_WITH_NULL values(const char *Arg, DataType Val, const char *Desc,...)
Definition: CommandLine.h:536
iplist< Instruction >::iterator eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing basic block and deletes it...
Definition: Instruction.cpp:70
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:104
bool hoistIVInc(Instruction *IncV, Instruction *InsertPos)
Utility for hoisting an IV increment.
Induction Variable Simplification
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:649
Induction Variable false
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void addIncoming(Value *V, BasicBlock *BB)
addIncoming - Add an incoming value to the end of the PHI list
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT)
isLoopInvariant - Perform a quick domtree based check for loop invariance assuming that V is used wit...
bool isOne() const
isOne - Return true if the expression is a constant one.
STATISTIC(NumFunctions,"Total number of functions")
void getLoopLatches(SmallVectorImpl< BlockT * > &LoopLatches) const
getLoopLatches - Return all loop latch blocks of this loop.
Definition: LoopInfo.h:234
bool isZero() const
isZero - Return true if the expression is a constant zero.
#define clEnumValEnd
Definition: CommandLine.h:498
unsigned getNumOperands() const
Definition: User.h:138
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property...
Definition: Operator.h:102
ScalarEvolution - This class is the main scalar evolution driver.
static Instruction * getInsertPointForUses(Instruction *User, Value *Def, DominatorTree *DT)
Determine the insertion point for this user.
bool isSigned() const
Determine if this instruction is using a signed comparison.
Definition: InstrTypes.h:826
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:703
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
getStepRecurrence - This method constructs and returns the recurrence indicating how much this expres...
static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT)
This IV user cannot be widen.
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:713
void setDebugType(const char *s)
bool isLoopInvariant(const SCEV *S, const Loop *L)
isLoopInvariant - Return true if the value of the given SCEV is unchanging in the specified loop...
LoopT * getParentLoop() const
Definition: LoopInfo.h:97
bool hasLoopInvariantOperands(const Instruction *I) const
hasLoopInvariantOperands - Return true if all the operands of the specified instruction are loop inva...
Definition: LoopInfo.cpp:67
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:240
Hexagon Common GEP
BlockT * getExitBlock() const
getExitBlock - If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:78
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
ReplaceExitVal
op_iterator op_begin()
Definition: User.h:183
BlockT * getHeader() const
Definition: LoopInfo.h:96
static cl::opt< bool > ReduceLiveIVs("liv-reduce", cl::Hidden, cl::desc("Reduce live induction variables."))
const SCEV * getStart() const
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:188
BlockT * getLoopLatch() const
getLoopLatch - If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:156
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:242
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:231
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None) const
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:708
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:41
static Value * genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
genLoopLimit - Help LinearFunctionTestReplace by generating a value that holds the RHS of the new loo...
Interface for visiting interesting IV users that are recognized but not simplified by this utility...
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:70
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:389
T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val()
Definition: SmallVector.h:406
opStatus convertToInteger(integerPart *, unsigned int, bool, roundingMode, bool *) const
Definition: APFloat.cpp:2191
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
A Use represents the edge between a Value definition and its users.
Definition: Use.h:69
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
ConvertToSInt - Convert APF to an integer, if possible.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:75
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:517
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:704
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
getExitingBlocks - Return all blocks inside the loop that have successors outside of the loop...
Definition: LoopInfoImpl.h:35
uint64_t getTypeSizeInBits(Type *Ty) const
getTypeSizeInBits - Return the size in bits of the specified type, for which isSCEVable must return t...
bool isLoopSimplifyForm() const
isLoopSimplifyForm - Return true if the Loop is in the form that the LoopSimplify form transforms loo...
Definition: LoopInfo.cpp:199
user_iterator_impl< User > user_iterator
Definition: Value.h:292
bool mayReadFromMemory() const
mayReadFromMemory - Return true if this instruction may read memory.
SCEVAddRecExpr - This node represents a polynomial recurrence on the trip count of the specified loop...
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:57
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:141
This instruction compares its operands according to the predicate given to the constructor.
static cl::opt< bool > VerifyIndvars("verify-indvars", cl::Hidden, cl::desc("Verify the ScalarEvolution result after running indvars"))
BasicBlock * getSuccessor(unsigned i) const
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
DeleteDeadPHIs - Examine each PHI in the given block and delete it if it is dead. ...
const SCEV * getSizeOfExpr(Type *IntTy, Type *AllocTy)
getSizeOfExpr - Return an expression for sizeof AllocTy that is type IntTy
AnalysisUsage & addPreservedID(const void *ID)
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:351
Type * getEffectiveSCEVType(Type *Ty) const
getEffectiveSCEVType - Return a type with the same bitwidth as the given type and which represents ho...
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:256
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:67
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
unsigned getNumIncomingValues() const
getNumIncomingValues - Return the number of incoming edges
void clearInsertPoint()
Clear the current insertion point.
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
A self-contained host- and target-independent arbitrary-precision floating-point software implementat...
Definition: APFloat.h:122
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:325
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl,"never","never replace exit value"), clEnumValN(OnlyCheapRepl,"cheap","only replace exit value when the cost is cheap"), clEnumValN(AlwaysRepl,"always","always replace exit value whenever possible"), clEnumValEnd))
bool isAffine() const
isAffine - Return true if this represents an expression A + B*x where A and B are loop invariant valu...
bool isSCEVable(Type *Ty) const
isSCEVable - Test if values of the given type are analyzable within the SCEV framework.
BlockT * getLoopPreheader() const
getLoopPreheader - If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:108
static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE, SCEVExpander &Rewriter)
canExpandBackedgeTakenCount - Return true if this loop's backedge taken count expression can be safel...
LLVM Basic Block Representation.
Definition: BasicBlock.h:65
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
Type * getType() const
getType - Return the LLVM type of this SCEV expression.
BranchInst - Conditional or Unconditional Branch instruction.
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM, SmallVectorImpl< WeakVH > &Dead, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
bool isHighCostExpansion(const SCEV *Expr, Loop *L)
Return true for expressions that may incur non-trivial cost to evaluate at runtime.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define H(x, y, z)
Definition: MD5.cpp:53
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
Definition: APInt.h:1895
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:233
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
visitCast - Update information about the induction variable that is extended by this sign or zero ext...
char & LCSSAID
Definition: LCSSA.cpp:312
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:264
APInt Xor(const APInt &LHS, const APInt &RHS)
Bitwise XOR function for APInt.
Definition: APInt.h:1900
brc_match< Cond_t > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
Definition: PatternMatch.h:942
Represent the analysis usage information of a pass.
op_iterator op_end()
Definition: User.h:185
BasicBlock * getIncomingBlock(unsigned i) const
getIncomingBlock - Return incoming basic block number i.
bool contains(const LoopT *L) const
contains - Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:105
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:697
Utility class for integer arithmetic operators which may exhibit overflow - Add, Sub, and Mul.
Definition: Operator.h:74
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1273
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
BlockT * getExitingBlock() const
getExitingBlock - If getExitingBlocks would return exactly one block, return that block...
Definition: LoopInfoImpl.h:51
Value * getOperand(unsigned i) const
Definition: User.h:118
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
Class to represent integer types.
Definition: DerivedTypes.h:37
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:760
bool isPointerTy() const
isPointerTy - True if this is an instance of PointerType.
Definition: Type.h:217
static UndefValue * get(Type *T)
get() - Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1473
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr)
RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a trivially dead instruction...
Definition: Local.cpp:340
void getUniqueExitBlocks(SmallVectorImpl< BasicBlock * > &ExitBlocks) const
getUniqueExitBlocks - Return all unique successor blocks of this loop.
Definition: LoopInfo.cpp:347
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:519
static unsigned getIncomingValueNumForOperand(unsigned i)
static ICmpInst * getLoopTest(Loop *L)
Return the compare guarding the loop latch, or NULL for unrecognized tests.
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:712
signed greater than
Definition: InstrTypes.h:724
#define DEBUG_TYPE
char & LoopSimplifyID
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:214
bool isConditional() const
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:701
INITIALIZE_PASS_BEGIN(IndVarSimplify,"indvars","Induction Variable Simplification", false, false) INITIALIZE_PASS_END(IndVarSimplify
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl< WeakVH > &DeadInsts, const TargetTransformInfo *TTI=nullptr)
replace congruent phis with their most canonical representative.
BinaryOps getOpcode() const
Definition: InstrTypes.h:323
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:299
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Value * getIncomingValue(unsigned i) const
getIncomingValue - Return incoming value number x
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty)
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:276
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:57
bool isLCSSAForm(DominatorTree &DT) const
isLCSSAForm - Return true if the Loop is in LCSSA form
Definition: LoopInfo.cpp:172
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:711
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
Instruction * user_back()
user_back - Specialize the methods defined in Value, as we know that an instruction can only be used ...
Definition: Instruction.h:69
Provides information about what library functions are available for the current target.
signed less than
Definition: InstrTypes.h:726
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:582
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT)
getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop invariant value to the phi...
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
signed less or equal
Definition: InstrTypes.h:727
Class for arbitrary precision integers.
Definition: APInt.h:73
Value * getIncomingValueForBlock(const BasicBlock *BB) const
bool isIntegerTy() const
isIntegerTy - True if this is an instance of IntegerType.
Definition: Type.h:193
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:300
void initializeIndVarSimplifyPass(PassRegistry &)
This class uses information about analyze scalars to rewrite expressions in canonical form...
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
getAddExpr - Get a canonical add expression, or something simpler if possible.
std::vector< BlockT * >::const_iterator block_iterator
Definition: LoopInfo.h:140
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:421
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1890
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:497
block_iterator block_end() const
Definition: LoopInfo.h:142
Virtual Register Rewriter
Definition: VirtRegMap.cpp:190
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:372
Value * getCondition() const
SCEV - This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
bool isLegalInteger(unsigned Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
Definition: DataLayout.h:239
#define I(x, y, z)
Definition: MD5.cpp:54
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:124
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:311
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:705
static bool needsLFTR(Loop *L, DominatorTree *DT)
needsLFTR - LinearFunctionTestReplace policy.
const Loop * getLoop() const
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:709
const APFloat & getValueAPF() const
Definition: Constants.h:270
const SCEV * getBackedgeTakenCount(const Loop *L)
getBackedgeTakenCount - If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCouldNotCompute object.
bool use_empty() const
Definition: Value.h:275
This class represents a cast from signed integer to floating point.
user_iterator user_begin()
Definition: Value.h:294
unsigned getSCEVType() const
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:700
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
isInstructionTriviallyDead - Return true if the result produced by the instruction is not used...
Definition: Local.cpp:282
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1189
LLVM Value Representation.
Definition: Value.h:69
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:710
const SCEV * getSCEV(Value *V)
getSCEV - Return a SCEV expression for the full generality of the specified expression.
unsigned getOpcode() const
getOpcode() returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:112
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond)
AlmostDeadIV - Return true if this IV has any uses other than the (soon to be rewritten) loop exit te...
static const Function * getParent(const Value *V)
void moveBefore(Instruction *MovePos)
moveBefore - Unlink this instruction from its current basic block and insert it into the basic block ...
Definition: Instruction.cpp:89
#define DEBUG(X)
Definition: Debug.h:92
void disableCanonicalMode()
Disable the behavior of expanding expressions in canonical form rather than in a more literal form...
block_iterator block_begin() const
Definition: LoopInfo.h:141
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:737
APInt LLVM_ATTRIBUTE_UNUSED_RESULT zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:996
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:203
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty)
getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the input value to the speci...
This pass exposes codegen information to IR-level passes.
iterator getFirstInsertionPt()
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:194
void setIncomingValue(unsigned i, Value *V)
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:702
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value * > &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
int getBasicBlockIndex(const BasicBlock *BB) const
getBasicBlockIndex - Return the first index of the specified basic block in the value list for this P...
const BasicBlock * getParent() const
Definition: Instruction.h:72
Pass * createIndVarSimplifyPass()
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property...
Definition: Operator.h:96
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:299
signed greater or equal
Definition: InstrTypes.h:725
SCEVConstant - This class represents a constant integer value.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:726
user_iterator user_end()
Definition: Value.h:296
static PHINode * FindLoopCounter(Loop *L, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
FindLoopCounter - Find an affine IV in canonical form.