LLVM  4.0.0
IndVarSimplify.cpp
Go to the documentation of this file.
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This transformation analyzes and transforms the induction variables (and
11 // computations derived from them) into simpler forms suitable for subsequent
12 // analysis and transformation.
13 //
14 // If the trip count of a loop is computable, this pass also makes the following
15 // changes:
16 // 1. The exit condition for the loop is canonicalized to compare the
17 // induction value against the exit value. This turns loops like:
18 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
19 // 2. Any use outside of the loop of an expression derived from the indvar
20 // is changed to compute the derived value outside of the loop, eliminating
21 // the dependence on the exit value of the induction variable. If the only
22 // purpose of the loop is to compute the exit value of some derived
23 // expression, this transformation will make the loop dead.
24 //
25 //===----------------------------------------------------------------------===//
26 
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/Statistic.h"
31 #include "llvm/Analysis/LoopInfo.h"
32 #include "llvm/Analysis/LoopPass.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/CFG.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DataLayout.h"
41 #include "llvm/IR/Dominators.h"
42 #include "llvm/IR/Instructions.h"
43 #include "llvm/IR/IntrinsicInst.h"
44 #include "llvm/IR/LLVMContext.h"
45 #include "llvm/IR/PatternMatch.h"
46 #include "llvm/IR/Type.h"
48 #include "llvm/Support/Debug.h"
50 #include "llvm/Transforms/Scalar.h"
56 using namespace llvm;
57 
58 #define DEBUG_TYPE "indvars"
59 
60 STATISTIC(NumWidened , "Number of indvars widened");
61 STATISTIC(NumReplaced , "Number of exit values replaced");
62 STATISTIC(NumLFTR , "Number of loop exit tests replaced");
63 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
64 STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
65 
66 // Trip count verification can be enabled by default under NDEBUG if we
67 // implement a strong expression equivalence checker in SCEV. Until then, we
68 // use the verify-indvars flag, which may assert in some cases.
70  "verify-indvars", cl::Hidden,
71  cl::desc("Verify the ScalarEvolution result after running indvars"));
72 
74 
76  "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
77  cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
78  cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
79  clEnumValN(OnlyCheapRepl, "cheap",
80  "only replace exit value when the cost is cheap"),
81  clEnumValN(AlwaysRepl, "always",
82  "always replace exit value whenever possible")));
83 
85  "indvars-post-increment-ranges", cl::Hidden,
86  cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
87  cl::init(true));
88 
89 namespace {
90 struct RewritePhi;
91 
92 class IndVarSimplify {
93  LoopInfo *LI;
94  ScalarEvolution *SE;
95  DominatorTree *DT;
96  const DataLayout &DL;
97  TargetLibraryInfo *TLI;
98  const TargetTransformInfo *TTI;
99 
100  SmallVector<WeakVH, 16> DeadInsts;
101  bool Changed = false;
102 
103  bool isValidRewrite(Value *FromVal, Value *ToVal);
104 
105  void handleFloatingPointIV(Loop *L, PHINode *PH);
106  void rewriteNonIntegerIVs(Loop *L);
107 
108  void simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
109 
110  bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
111  void rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
112  void rewriteFirstIterationLoopExitValues(Loop *L);
113 
114  Value *linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
115  PHINode *IndVar, SCEVExpander &Rewriter);
116 
117  void sinkUnusedInvariants(Loop *L);
118 
119  Value *expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
120  Instruction *InsertPt, Type *Ty);
121 
122 public:
123  IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
124  const DataLayout &DL, TargetLibraryInfo *TLI,
125  TargetTransformInfo *TTI)
126  : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
127 
128  bool run(Loop *L);
129 };
130 }
131 
132 /// Return true if the SCEV expansion generated by the rewriter can replace the
133 /// original value. SCEV guarantees that it produces the same value, but the way
134 /// it is produced may be illegal IR. Ideally, this function will only be
135 /// called for verification.
136 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
137  // If an SCEV expression subsumed multiple pointers, its expansion could
138  // reassociate the GEP changing the base pointer. This is illegal because the
139  // final address produced by a GEP chain must be inbounds relative to its
140  // underlying object. Otherwise basic alias analysis, among other things,
141  // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
142  // producing an expression involving multiple pointers. Until then, we must
143  // bail out here.
144  //
145  // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
146  // because it understands lcssa phis while SCEV does not.
147  Value *FromPtr = FromVal;
148  Value *ToPtr = ToVal;
149  if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
150  FromPtr = GEP->getPointerOperand();
151  }
152  if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
153  ToPtr = GEP->getPointerOperand();
154  }
155  if (FromPtr != FromVal || ToPtr != ToVal) {
156  // Quickly check the common case
157  if (FromPtr == ToPtr)
158  return true;
159 
160  // SCEV may have rewritten an expression that produces the GEP's pointer
161  // operand. That's ok as long as the pointer operand has the same base
162  // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
163  // base of a recurrence. This handles the case in which SCEV expansion
164  // converts a pointer type recurrence into a nonrecurrent pointer base
165  // indexed by an integer recurrence.
166 
167  // If the GEP base pointer is a vector of pointers, abort.
168  if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
169  return false;
170 
171  const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
172  const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
173  if (FromBase == ToBase)
174  return true;
175 
176  DEBUG(dbgs() << "INDVARS: GEP rewrite bail out "
177  << *FromBase << " != " << *ToBase << "\n");
178 
179  return false;
180  }
181  return true;
182 }
183 
184 /// Determine the insertion point for this user. By default, insert immediately
185 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
186 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
187 /// common dominator for the incoming blocks.
189  DominatorTree *DT, LoopInfo *LI) {
190  PHINode *PHI = dyn_cast<PHINode>(User);
191  if (!PHI)
192  return User;
193 
194  Instruction *InsertPt = nullptr;
195  for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
196  if (PHI->getIncomingValue(i) != Def)
197  continue;
198 
199  BasicBlock *InsertBB = PHI->getIncomingBlock(i);
200  if (!InsertPt) {
201  InsertPt = InsertBB->getTerminator();
202  continue;
203  }
204  InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
205  InsertPt = InsertBB->getTerminator();
206  }
207  assert(InsertPt && "Missing phi operand");
208 
209  auto *DefI = dyn_cast<Instruction>(Def);
210  if (!DefI)
211  return InsertPt;
212 
213  assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
214 
215  auto *L = LI->getLoopFor(DefI->getParent());
216  assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
217 
218  for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
219  if (LI->getLoopFor(DTN->getBlock()) == L)
220  return DTN->getBlock()->getTerminator();
221 
222  llvm_unreachable("DefI dominates InsertPt!");
223 }
224 
225 //===----------------------------------------------------------------------===//
226 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
227 //===----------------------------------------------------------------------===//
228 
229 /// Convert APF to an integer, if possible.
230 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
231  bool isExact = false;
232  // See if we can convert this to an int64_t
233  uint64_t UIntVal;
234  if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
235  &isExact) != APFloat::opOK || !isExact)
236  return false;
237  IntVal = UIntVal;
238  return true;
239 }
240 
241 /// If the loop has floating induction variable then insert corresponding
242 /// integer induction variable if possible.
243 /// For example,
244 /// for(double i = 0; i < 10000; ++i)
245 /// bar(i)
246 /// is converted into
247 /// for(int i = 0; i < 10000; ++i)
248 /// bar((double)i);
249 ///
250 void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
251  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
252  unsigned BackEdge = IncomingEdge^1;
253 
254  // Check incoming value.
255  auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
256 
257  int64_t InitValue;
258  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
259  return;
260 
261  // Check IV increment. Reject this PN if increment operation is not
262  // an add or increment value can not be represented by an integer.
263  auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
264  if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return;
265 
266  // If this is not an add of the PHI with a constantfp, or if the constant fp
267  // is not an integer, bail out.
268  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
269  int64_t IncValue;
270  if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
271  !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
272  return;
273 
274  // Check Incr uses. One user is PN and the other user is an exit condition
275  // used by the conditional terminator.
276  Value::user_iterator IncrUse = Incr->user_begin();
277  Instruction *U1 = cast<Instruction>(*IncrUse++);
278  if (IncrUse == Incr->user_end()) return;
279  Instruction *U2 = cast<Instruction>(*IncrUse++);
280  if (IncrUse != Incr->user_end()) return;
281 
282  // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
283  // only used by a branch, we can't transform it.
285  if (!Compare)
286  Compare = dyn_cast<FCmpInst>(U2);
287  if (!Compare || !Compare->hasOneUse() ||
288  !isa<BranchInst>(Compare->user_back()))
289  return;
290 
291  BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
292 
293  // We need to verify that the branch actually controls the iteration count
294  // of the loop. If not, the new IV can overflow and no one will notice.
295  // The branch block must be in the loop and one of the successors must be out
296  // of the loop.
297  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
298  if (!L->contains(TheBr->getParent()) ||
299  (L->contains(TheBr->getSuccessor(0)) &&
300  L->contains(TheBr->getSuccessor(1))))
301  return;
302 
303 
304  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
305  // transform it.
306  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
307  int64_t ExitValue;
308  if (ExitValueVal == nullptr ||
309  !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
310  return;
311 
312  // Find new predicate for integer comparison.
314  switch (Compare->getPredicate()) {
315  default: return; // Unknown comparison.
316  case CmpInst::FCMP_OEQ:
317  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
318  case CmpInst::FCMP_ONE:
319  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
320  case CmpInst::FCMP_OGT:
321  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
322  case CmpInst::FCMP_OGE:
323  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
324  case CmpInst::FCMP_OLT:
325  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
326  case CmpInst::FCMP_OLE:
327  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
328  }
329 
330  // We convert the floating point induction variable to a signed i32 value if
331  // we can. This is only safe if the comparison will not overflow in a way
332  // that won't be trapped by the integer equivalent operations. Check for this
333  // now.
334  // TODO: We could use i64 if it is native and the range requires it.
335 
336  // The start/stride/exit values must all fit in signed i32.
337  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
338  return;
339 
340  // If not actually striding (add x, 0.0), avoid touching the code.
341  if (IncValue == 0)
342  return;
343 
344  // Positive and negative strides have different safety conditions.
345  if (IncValue > 0) {
346  // If we have a positive stride, we require the init to be less than the
347  // exit value.
348  if (InitValue >= ExitValue)
349  return;
350 
351  uint32_t Range = uint32_t(ExitValue-InitValue);
352  // Check for infinite loop, either:
353  // while (i <= Exit) or until (i > Exit)
354  if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
355  if (++Range == 0) return; // Range overflows.
356  }
357 
358  unsigned Leftover = Range % uint32_t(IncValue);
359 
360  // If this is an equality comparison, we require that the strided value
361  // exactly land on the exit value, otherwise the IV condition will wrap
362  // around and do things the fp IV wouldn't.
363  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
364  Leftover != 0)
365  return;
366 
367  // If the stride would wrap around the i32 before exiting, we can't
368  // transform the IV.
369  if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
370  return;
371 
372  } else {
373  // If we have a negative stride, we require the init to be greater than the
374  // exit value.
375  if (InitValue <= ExitValue)
376  return;
377 
378  uint32_t Range = uint32_t(InitValue-ExitValue);
379  // Check for infinite loop, either:
380  // while (i >= Exit) or until (i < Exit)
381  if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
382  if (++Range == 0) return; // Range overflows.
383  }
384 
385  unsigned Leftover = Range % uint32_t(-IncValue);
386 
387  // If this is an equality comparison, we require that the strided value
388  // exactly land on the exit value, otherwise the IV condition will wrap
389  // around and do things the fp IV wouldn't.
390  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
391  Leftover != 0)
392  return;
393 
394  // If the stride would wrap around the i32 before exiting, we can't
395  // transform the IV.
396  if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
397  return;
398  }
399 
401 
402  // Insert new integer induction variable.
403  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
404  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
405  PN->getIncomingBlock(IncomingEdge));
406 
407  Value *NewAdd =
408  BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
409  Incr->getName()+".int", Incr);
410  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
411 
412  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
413  ConstantInt::get(Int32Ty, ExitValue),
414  Compare->getName());
415 
416  // In the following deletions, PN may become dead and may be deleted.
417  // Use a WeakVH to observe whether this happens.
418  WeakVH WeakPH = PN;
419 
420  // Delete the old floating point exit comparison. The branch starts using the
421  // new comparison.
422  NewCompare->takeName(Compare);
423  Compare->replaceAllUsesWith(NewCompare);
425 
426  // Delete the old floating point increment.
427  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
429 
430  // If the FP induction variable still has uses, this is because something else
431  // in the loop uses its value. In order to canonicalize the induction
432  // variable, we chose to eliminate the IV and rewrite it in terms of an
433  // int->fp cast.
434  //
435  // We give preference to sitofp over uitofp because it is faster on most
436  // platforms.
437  if (WeakPH) {
438  Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
439  &*PN->getParent()->getFirstInsertionPt());
440  PN->replaceAllUsesWith(Conv);
442  }
443  Changed = true;
444 }
445 
446 void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
447  // First step. Check to see if there are any floating-point recurrences.
448  // If there are, change them into integer recurrences, permitting analysis by
449  // the SCEV routines.
450  //
451  BasicBlock *Header = L->getHeader();
452 
454  for (BasicBlock::iterator I = Header->begin();
455  PHINode *PN = dyn_cast<PHINode>(I); ++I)
456  PHIs.push_back(PN);
457 
458  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
459  if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
460  handleFloatingPointIV(L, PN);
461 
462  // If the loop previously had floating-point IV, ScalarEvolution
463  // may not have been able to compute a trip count. Now that we've done some
464  // re-writing, the trip count may be computable.
465  if (Changed)
466  SE->forgetLoop(L);
467 }
468 
469 namespace {
470 // Collect information about PHI nodes which can be transformed in
471 // rewriteLoopExitValues.
472 struct RewritePhi {
473  PHINode *PN;
474  unsigned Ith; // Ith incoming value.
475  Value *Val; // Exit value after expansion.
476  bool HighCost; // High Cost when expansion.
477 
478  RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
479  : PN(P), Ith(I), Val(V), HighCost(H) {}
480 };
481 }
482 
483 Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
484  Loop *L, Instruction *InsertPt,
485  Type *ResultTy) {
486  // Before expanding S into an expensive LLVM expression, see if we can use an
487  // already existing value as the expansion for S.
488  if (Value *ExistingValue = Rewriter.getExactExistingExpansion(S, InsertPt, L))
489  if (ExistingValue->getType() == ResultTy)
490  return ExistingValue;
491 
492  // We didn't find anything, fall back to using SCEVExpander.
493  return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
494 }
495 
496 //===----------------------------------------------------------------------===//
497 // rewriteLoopExitValues - Optimize IV users outside the loop.
498 // As a side effect, reduces the amount of IV processing within the loop.
499 //===----------------------------------------------------------------------===//
500 
501 /// Check to see if this loop has a computable loop-invariant execution count.
502 /// If so, this means that we can compute the final value of any expressions
503 /// that are recurrent in the loop, and substitute the exit values from the loop
504 /// into any instructions outside of the loop that use the final values of the
505 /// current expressions.
506 ///
507 /// This is mostly redundant with the regular IndVarSimplify activities that
508 /// happen later, except that it's more powerful in some cases, because it's
509 /// able to brute-force evaluate arbitrary instructions as long as they have
510 /// constant operands at the beginning of the loop.
511 void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
512  // Check a pre-condition.
513  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
514  "Indvars did not preserve LCSSA!");
515 
516  SmallVector<BasicBlock*, 8> ExitBlocks;
517  L->getUniqueExitBlocks(ExitBlocks);
518 
519  SmallVector<RewritePhi, 8> RewritePhiSet;
520  // Find all values that are computed inside the loop, but used outside of it.
521  // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
522  // the exit blocks of the loop to find them.
523  for (BasicBlock *ExitBB : ExitBlocks) {
524  // If there are no PHI nodes in this exit block, then no values defined
525  // inside the loop are used on this path, skip it.
526  PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
527  if (!PN) continue;
528 
529  unsigned NumPreds = PN->getNumIncomingValues();
530 
531  // Iterate over all of the PHI nodes.
532  BasicBlock::iterator BBI = ExitBB->begin();
533  while ((PN = dyn_cast<PHINode>(BBI++))) {
534  if (PN->use_empty())
535  continue; // dead use, don't replace it
536 
537  if (!SE->isSCEVable(PN->getType()))
538  continue;
539 
540  // It's necessary to tell ScalarEvolution about this explicitly so that
541  // it can walk the def-use list and forget all SCEVs, as it may not be
542  // watching the PHI itself. Once the new exit value is in place, there
543  // may not be a def-use connection between the loop and every instruction
544  // which got a SCEVAddRecExpr for that loop.
545  SE->forgetValue(PN);
546 
547  // Iterate over all of the values in all the PHI nodes.
548  for (unsigned i = 0; i != NumPreds; ++i) {
549  // If the value being merged in is not integer or is not defined
550  // in the loop, skip it.
551  Value *InVal = PN->getIncomingValue(i);
552  if (!isa<Instruction>(InVal))
553  continue;
554 
555  // If this pred is for a subloop, not L itself, skip it.
556  if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
557  continue; // The Block is in a subloop, skip it.
558 
559  // Check that InVal is defined in the loop.
560  Instruction *Inst = cast<Instruction>(InVal);
561  if (!L->contains(Inst))
562  continue;
563 
564  // Okay, this instruction has a user outside of the current loop
565  // and varies predictably *inside* the loop. Evaluate the value it
566  // contains when the loop exits, if possible.
567  const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
568  if (!SE->isLoopInvariant(ExitValue, L) ||
569  !isSafeToExpand(ExitValue, *SE))
570  continue;
571 
572  // Computing the value outside of the loop brings no benefit if :
573  // - it is definitely used inside the loop in a way which can not be
574  // optimized away.
575  // - no use outside of the loop can take advantage of hoisting the
576  // computation out of the loop
577  if (ExitValue->getSCEVType()>=scMulExpr) {
578  unsigned NumHardInternalUses = 0;
579  unsigned NumSoftExternalUses = 0;
580  unsigned NumUses = 0;
581  for (auto IB = Inst->user_begin(), IE = Inst->user_end();
582  IB != IE && NumUses <= 6; ++IB) {
583  Instruction *UseInstr = cast<Instruction>(*IB);
584  unsigned Opc = UseInstr->getOpcode();
585  NumUses++;
586  if (L->contains(UseInstr)) {
587  if (Opc == Instruction::Call || Opc == Instruction::Ret)
588  NumHardInternalUses++;
589  } else {
590  if (Opc == Instruction::PHI) {
591  // Do not count the Phi as a use. LCSSA may have inserted
592  // plenty of trivial ones.
593  NumUses--;
594  for (auto PB = UseInstr->user_begin(),
595  PE = UseInstr->user_end();
596  PB != PE && NumUses <= 6; ++PB, ++NumUses) {
597  unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
598  if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
599  NumSoftExternalUses++;
600  }
601  continue;
602  }
603  if (Opc != Instruction::Call && Opc != Instruction::Ret)
604  NumSoftExternalUses++;
605  }
606  }
607  if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses)
608  continue;
609  }
610 
611  bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
612  Value *ExitVal =
613  expandSCEVIfNeeded(Rewriter, ExitValue, L, Inst, PN->getType());
614 
615  DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
616  << " LoopVal = " << *Inst << "\n");
617 
618  if (!isValidRewrite(Inst, ExitVal)) {
619  DeadInsts.push_back(ExitVal);
620  continue;
621  }
622 
623  // Collect all the candidate PHINodes to be rewritten.
624  RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
625  }
626  }
627  }
628 
629  bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
630 
631  // Transformation.
632  for (const RewritePhi &Phi : RewritePhiSet) {
633  PHINode *PN = Phi.PN;
634  Value *ExitVal = Phi.Val;
635 
636  // Only do the rewrite when the ExitValue can be expanded cheaply.
637  // If LoopCanBeDel is true, rewrite exit value aggressively.
638  if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
639  DeadInsts.push_back(ExitVal);
640  continue;
641  }
642 
643  Changed = true;
644  ++NumReplaced;
645  Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
646  PN->setIncomingValue(Phi.Ith, ExitVal);
647 
648  // If this instruction is dead now, delete it. Don't do it now to avoid
649  // invalidating iterators.
650  if (isInstructionTriviallyDead(Inst, TLI))
651  DeadInsts.push_back(Inst);
652 
653  // Replace PN with ExitVal if that is legal and does not break LCSSA.
654  if (PN->getNumIncomingValues() == 1 &&
655  LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
656  PN->replaceAllUsesWith(ExitVal);
657  PN->eraseFromParent();
658  }
659  }
660 
661  // The insertion point instruction may have been deleted; clear it out
662  // so that the rewriter doesn't trip over it later.
663  Rewriter.clearInsertPoint();
664 }
665 
666 //===---------------------------------------------------------------------===//
667 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
668 // they will exit at the first iteration.
669 //===---------------------------------------------------------------------===//
670 
671 /// Check to see if this loop has loop invariant conditions which lead to loop
672 /// exits. If so, we know that if the exit path is taken, it is at the first
673 /// loop iteration. This lets us predict exit values of PHI nodes that live in
674 /// loop header.
675 void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
676  // Verify the input to the pass is already in LCSSA form.
677  assert(L->isLCSSAForm(*DT));
678 
679  SmallVector<BasicBlock *, 8> ExitBlocks;
680  L->getUniqueExitBlocks(ExitBlocks);
681  auto *LoopHeader = L->getHeader();
682  assert(LoopHeader && "Invalid loop");
683 
684  for (auto *ExitBB : ExitBlocks) {
685  BasicBlock::iterator BBI = ExitBB->begin();
686  // If there are no more PHI nodes in this exit block, then no more
687  // values defined inside the loop are used on this path.
688  while (auto *PN = dyn_cast<PHINode>(BBI++)) {
689  for (unsigned IncomingValIdx = 0, E = PN->getNumIncomingValues();
690  IncomingValIdx != E; ++IncomingValIdx) {
691  auto *IncomingBB = PN->getIncomingBlock(IncomingValIdx);
692 
693  // We currently only support loop exits from loop header. If the
694  // incoming block is not loop header, we need to recursively check
695  // all conditions starting from loop header are loop invariants.
696  // Additional support might be added in the future.
697  if (IncomingBB != LoopHeader)
698  continue;
699 
700  // Get condition that leads to the exit path.
701  auto *TermInst = IncomingBB->getTerminator();
702 
703  Value *Cond = nullptr;
704  if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
705  // Must be a conditional branch, otherwise the block
706  // should not be in the loop.
707  Cond = BI->getCondition();
708  } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
709  Cond = SI->getCondition();
710  else
711  continue;
712 
713  if (!L->isLoopInvariant(Cond))
714  continue;
715 
716  auto *ExitVal =
717  dyn_cast<PHINode>(PN->getIncomingValue(IncomingValIdx));
718 
719  // Only deal with PHIs.
720  if (!ExitVal)
721  continue;
722 
723  // If ExitVal is a PHI on the loop header, then we know its
724  // value along this exit because the exit can only be taken
725  // on the first iteration.
726  auto *LoopPreheader = L->getLoopPreheader();
727  assert(LoopPreheader && "Invalid loop");
728  int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
729  if (PreheaderIdx != -1) {
730  assert(ExitVal->getParent() == LoopHeader &&
731  "ExitVal must be in loop header");
732  PN->setIncomingValue(IncomingValIdx,
733  ExitVal->getIncomingValue(PreheaderIdx));
734  }
735  }
736  }
737  }
738 }
739 
740 /// Check whether it is possible to delete the loop after rewriting exit
741 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
742 /// aggressively.
743 bool IndVarSimplify::canLoopBeDeleted(
744  Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
745 
746  BasicBlock *Preheader = L->getLoopPreheader();
747  // If there is no preheader, the loop will not be deleted.
748  if (!Preheader)
749  return false;
750 
751  // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
752  // We obviate multiple ExitingBlocks case for simplicity.
753  // TODO: If we see testcase with multiple ExitingBlocks can be deleted
754  // after exit value rewriting, we can enhance the logic here.
755  SmallVector<BasicBlock *, 4> ExitingBlocks;
756  L->getExitingBlocks(ExitingBlocks);
757  SmallVector<BasicBlock *, 8> ExitBlocks;
758  L->getUniqueExitBlocks(ExitBlocks);
759  if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
760  return false;
761 
762  BasicBlock *ExitBlock = ExitBlocks[0];
763  BasicBlock::iterator BI = ExitBlock->begin();
764  while (PHINode *P = dyn_cast<PHINode>(BI)) {
765  Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
766 
767  // If the Incoming value of P is found in RewritePhiSet, we know it
768  // could be rewritten to use a loop invariant value in transformation
769  // phase later. Skip it in the loop invariant check below.
770  bool found = false;
771  for (const RewritePhi &Phi : RewritePhiSet) {
772  unsigned i = Phi.Ith;
773  if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
774  found = true;
775  break;
776  }
777  }
778 
779  Instruction *I;
780  if (!found && (I = dyn_cast<Instruction>(Incoming)))
781  if (!L->hasLoopInvariantOperands(I))
782  return false;
783 
784  ++BI;
785  }
786 
787  for (auto *BB : L->blocks())
788  if (any_of(*BB, [](Instruction &I) { return I.mayHaveSideEffects(); }))
789  return false;
790 
791  return true;
792 }
793 
794 //===----------------------------------------------------------------------===//
795 // IV Widening - Extend the width of an IV to cover its widest uses.
796 //===----------------------------------------------------------------------===//
797 
798 namespace {
799 // Collect information about induction variables that are used by sign/zero
800 // extend operations. This information is recorded by CollectExtend and provides
801 // the input to WidenIV.
802 struct WideIVInfo {
803  PHINode *NarrowIV = nullptr;
804  Type *WidestNativeType = nullptr; // Widest integer type created [sz]ext
805  bool IsSigned = false; // Was a sext user seen before a zext?
806 };
807 }
808 
809 /// Update information about the induction variable that is extended by this
810 /// sign or zero extend operation. This is used to determine the final width of
811 /// the IV before actually widening it.
812 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
813  const TargetTransformInfo *TTI) {
814  bool IsSigned = Cast->getOpcode() == Instruction::SExt;
815  if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
816  return;
817 
818  Type *Ty = Cast->getType();
819  uint64_t Width = SE->getTypeSizeInBits(Ty);
820  if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
821  return;
822 
823  // Check that `Cast` actually extends the induction variable (we rely on this
824  // later). This takes care of cases where `Cast` is extending a truncation of
825  // the narrow induction variable, and thus can end up being narrower than the
826  // "narrow" induction variable.
827  uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
828  if (NarrowIVWidth >= Width)
829  return;
830 
831  // Cast is either an sext or zext up to this point.
832  // We should not widen an indvar if arithmetics on the wider indvar are more
833  // expensive than those on the narrower indvar. We check only the cost of ADD
834  // because at least an ADD is required to increment the induction variable. We
835  // could compute more comprehensively the cost of all instructions on the
836  // induction variable when necessary.
837  if (TTI &&
840  Cast->getOperand(0)->getType())) {
841  return;
842  }
843 
844  if (!WI.WidestNativeType) {
845  WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
846  WI.IsSigned = IsSigned;
847  return;
848  }
849 
850  // We extend the IV to satisfy the sign of its first user, arbitrarily.
851  if (WI.IsSigned != IsSigned)
852  return;
853 
854  if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
855  WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
856 }
857 
858 namespace {
859 
860 /// Record a link in the Narrow IV def-use chain along with the WideIV that
861 /// computes the same value as the Narrow IV def. This avoids caching Use*
862 /// pointers.
863 struct NarrowIVDefUse {
864  Instruction *NarrowDef = nullptr;
865  Instruction *NarrowUse = nullptr;
866  Instruction *WideDef = nullptr;
867 
868  // True if the narrow def is never negative. Tracking this information lets
869  // us use a sign extension instead of a zero extension or vice versa, when
870  // profitable and legal.
871  bool NeverNegative = false;
872 
873  NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
874  bool NeverNegative)
875  : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
876  NeverNegative(NeverNegative) {}
877 };
878 
879 /// The goal of this transform is to remove sign and zero extends without
880 /// creating any new induction variables. To do this, it creates a new phi of
881 /// the wider type and redirects all users, either removing extends or inserting
882 /// truncs whenever we stop propagating the type.
883 ///
884 class WidenIV {
885  // Parameters
886  PHINode *OrigPhi;
887  Type *WideType;
888 
889  // Context
890  LoopInfo *LI;
891  Loop *L;
892  ScalarEvolution *SE;
893  DominatorTree *DT;
894 
895  // Does the module have any calls to the llvm.experimental.guard intrinsic
896  // at all? If not we can avoid scanning instructions looking for guards.
897  bool HasGuards;
898 
899  // Result
900  PHINode *WidePhi;
901  Instruction *WideInc;
902  const SCEV *WideIncExpr;
903  SmallVectorImpl<WeakVH> &DeadInsts;
904 
906  SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
907 
908  enum ExtendKind { ZeroExtended, SignExtended, Unknown };
909  // A map tracking the kind of extension used to widen each narrow IV
910  // and narrow IV user.
911  // Key: pointer to a narrow IV or IV user.
912  // Value: the kind of extension used to widen this Instruction.
913  DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
914 
915  typedef std::pair<AssertingVH<Value>, AssertingVH<Instruction>> DefUserPair;
916  // A map with control-dependent ranges for post increment IV uses. The key is
917  // a pair of IV def and a use of this def denoting the context. The value is
918  // a ConstantRange representing possible values of the def at the given
919  // context.
920  DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
921 
922  Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
923  Instruction *UseI) {
924  DefUserPair Key(Def, UseI);
925  auto It = PostIncRangeInfos.find(Key);
926  return It == PostIncRangeInfos.end()
928  : Optional<ConstantRange>(It->second);
929  }
930 
931  void calculatePostIncRanges(PHINode *OrigPhi);
932  void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
933  void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
934  DefUserPair Key(Def, UseI);
935  auto It = PostIncRangeInfos.find(Key);
936  if (It == PostIncRangeInfos.end())
937  PostIncRangeInfos.insert({Key, R});
938  else
939  It->second = R.intersectWith(It->second);
940  }
941 
942 public:
943  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
944  ScalarEvolution *SEv, DominatorTree *DTree,
945  SmallVectorImpl<WeakVH> &DI, bool HasGuards) :
946  OrigPhi(WI.NarrowIV),
947  WideType(WI.WidestNativeType),
948  LI(LInfo),
949  L(LI->getLoopFor(OrigPhi->getParent())),
950  SE(SEv),
951  DT(DTree),
952  HasGuards(HasGuards),
953  WidePhi(nullptr),
954  WideInc(nullptr),
955  WideIncExpr(nullptr),
956  DeadInsts(DI) {
957  assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
958  ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
959  }
960 
961  PHINode *createWideIV(SCEVExpander &Rewriter);
962 
963 protected:
964  Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
965  Instruction *Use);
966 
967  Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
968  Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
969  const SCEVAddRecExpr *WideAR);
970  Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
971 
972  ExtendKind getExtendKind(Instruction *I);
973 
974  typedef std::pair<const SCEVAddRecExpr *, ExtendKind> WidenedRecTy;
975 
976  WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
977 
978  WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
979 
980  const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
981  unsigned OpCode) const;
982 
983  Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
984 
985  bool widenLoopCompare(NarrowIVDefUse DU);
986 
987  void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
988 };
989 } // anonymous namespace
990 
991 /// Perform a quick domtree based check for loop invariance assuming that V is
992 /// used within the loop. LoopInfo::isLoopInvariant() seems gratuitous for this
993 /// purpose.
994 static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
995  Instruction *Inst = dyn_cast<Instruction>(V);
996  if (!Inst)
997  return true;
998 
999  return DT->properlyDominates(Inst->getParent(), L->getHeader());
1000 }
1001 
1002 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1003  bool IsSigned, Instruction *Use) {
1004  // Set the debug location and conservative insertion point.
1005  IRBuilder<> Builder(Use);
1006  // Hoist the insertion point into loop preheaders as far as possible.
1007  for (const Loop *L = LI->getLoopFor(Use->getParent());
1008  L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
1009  L = L->getParentLoop())
1010  Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1011 
1012  return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1013  Builder.CreateZExt(NarrowOper, WideType);
1014 }
1015 
1016 /// Instantiate a wide operation to replace a narrow operation. This only needs
1017 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1018 /// 0 for any operation we decide not to clone.
1019 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1020  const SCEVAddRecExpr *WideAR) {
1021  unsigned Opcode = DU.NarrowUse->getOpcode();
1022  switch (Opcode) {
1023  default:
1024  return nullptr;
1025  case Instruction::Add:
1026  case Instruction::Mul:
1027  case Instruction::UDiv:
1028  case Instruction::Sub:
1029  return cloneArithmeticIVUser(DU, WideAR);
1030 
1031  case Instruction::And:
1032  case Instruction::Or:
1033  case Instruction::Xor:
1034  case Instruction::Shl:
1035  case Instruction::LShr:
1036  case Instruction::AShr:
1037  return cloneBitwiseIVUser(DU);
1038  }
1039 }
1040 
1041 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1042  Instruction *NarrowUse = DU.NarrowUse;
1043  Instruction *NarrowDef = DU.NarrowDef;
1044  Instruction *WideDef = DU.WideDef;
1045 
1046  DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1047 
1048  // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1049  // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1050  // invariant and will be folded or hoisted. If it actually comes from a
1051  // widened IV, it should be removed during a future call to widenIVUse.
1052  bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1053  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1054  ? WideDef
1055  : createExtendInst(NarrowUse->getOperand(0), WideType,
1056  IsSigned, NarrowUse);
1057  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1058  ? WideDef
1059  : createExtendInst(NarrowUse->getOperand(1), WideType,
1060  IsSigned, NarrowUse);
1061 
1062  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1063  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1064  NarrowBO->getName());
1065  IRBuilder<> Builder(NarrowUse);
1066  Builder.Insert(WideBO);
1067  WideBO->copyIRFlags(NarrowBO);
1068  return WideBO;
1069 }
1070 
1071 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1072  const SCEVAddRecExpr *WideAR) {
1073  Instruction *NarrowUse = DU.NarrowUse;
1074  Instruction *NarrowDef = DU.NarrowDef;
1075  Instruction *WideDef = DU.WideDef;
1076 
1077  DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1078 
1079  unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1080 
1081  // We're trying to find X such that
1082  //
1083  // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1084  //
1085  // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1086  // and check using SCEV if any of them are correct.
1087 
1088  // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1089  // correct solution to X.
1090  auto GuessNonIVOperand = [&](bool SignExt) {
1091  const SCEV *WideLHS;
1092  const SCEV *WideRHS;
1093 
1094  auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1095  if (SignExt)
1096  return SE->getSignExtendExpr(S, Ty);
1097  return SE->getZeroExtendExpr(S, Ty);
1098  };
1099 
1100  if (IVOpIdx == 0) {
1101  WideLHS = SE->getSCEV(WideDef);
1102  const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1103  WideRHS = GetExtend(NarrowRHS, WideType);
1104  } else {
1105  const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1106  WideLHS = GetExtend(NarrowLHS, WideType);
1107  WideRHS = SE->getSCEV(WideDef);
1108  }
1109 
1110  // WideUse is "WideDef `op.wide` X" as described in the comment.
1111  const SCEV *WideUse = nullptr;
1112 
1113  switch (NarrowUse->getOpcode()) {
1114  default:
1115  llvm_unreachable("No other possibility!");
1116 
1117  case Instruction::Add:
1118  WideUse = SE->getAddExpr(WideLHS, WideRHS);
1119  break;
1120 
1121  case Instruction::Mul:
1122  WideUse = SE->getMulExpr(WideLHS, WideRHS);
1123  break;
1124 
1125  case Instruction::UDiv:
1126  WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1127  break;
1128 
1129  case Instruction::Sub:
1130  WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1131  break;
1132  }
1133 
1134  return WideUse == WideAR;
1135  };
1136 
1137  bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1138  if (!GuessNonIVOperand(SignExtend)) {
1139  SignExtend = !SignExtend;
1140  if (!GuessNonIVOperand(SignExtend))
1141  return nullptr;
1142  }
1143 
1144  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1145  ? WideDef
1146  : createExtendInst(NarrowUse->getOperand(0), WideType,
1147  SignExtend, NarrowUse);
1148  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1149  ? WideDef
1150  : createExtendInst(NarrowUse->getOperand(1), WideType,
1151  SignExtend, NarrowUse);
1152 
1153  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1154  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1155  NarrowBO->getName());
1156 
1157  IRBuilder<> Builder(NarrowUse);
1158  Builder.Insert(WideBO);
1159  WideBO->copyIRFlags(NarrowBO);
1160  return WideBO;
1161 }
1162 
1163 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1164  auto It = ExtendKindMap.find(I);
1165  assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1166  return It->second;
1167 }
1168 
1169 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1170  unsigned OpCode) const {
1171  if (OpCode == Instruction::Add)
1172  return SE->getAddExpr(LHS, RHS);
1173  if (OpCode == Instruction::Sub)
1174  return SE->getMinusSCEV(LHS, RHS);
1175  if (OpCode == Instruction::Mul)
1176  return SE->getMulExpr(LHS, RHS);
1177 
1178  llvm_unreachable("Unsupported opcode.");
1179 }
1180 
1181 /// No-wrap operations can transfer sign extension of their result to their
1182 /// operands. Generate the SCEV value for the widened operation without
1183 /// actually modifying the IR yet. If the expression after extending the
1184 /// operands is an AddRec for this loop, return the AddRec and the kind of
1185 /// extension used.
1186 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1187 
1188  // Handle the common case of add<nsw/nuw>
1189  const unsigned OpCode = DU.NarrowUse->getOpcode();
1190  // Only Add/Sub/Mul instructions supported yet.
1191  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1192  OpCode != Instruction::Mul)
1193  return {nullptr, Unknown};
1194 
1195  // One operand (NarrowDef) has already been extended to WideDef. Now determine
1196  // if extending the other will lead to a recurrence.
1197  const unsigned ExtendOperIdx =
1198  DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1199  assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1200 
1201  const SCEV *ExtendOperExpr = nullptr;
1202  const OverflowingBinaryOperator *OBO =
1203  cast<OverflowingBinaryOperator>(DU.NarrowUse);
1204  ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1205  if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1206  ExtendOperExpr = SE->getSignExtendExpr(
1207  SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1208  else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1209  ExtendOperExpr = SE->getZeroExtendExpr(
1210  SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1211  else
1212  return {nullptr, Unknown};
1213 
1214  // When creating this SCEV expr, don't apply the current operations NSW or NUW
1215  // flags. This instruction may be guarded by control flow that the no-wrap
1216  // behavior depends on. Non-control-equivalent instructions can be mapped to
1217  // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1218  // semantics to those operations.
1219  const SCEV *lhs = SE->getSCEV(DU.WideDef);
1220  const SCEV *rhs = ExtendOperExpr;
1221 
1222  // Let's swap operands to the initial order for the case of non-commutative
1223  // operations, like SUB. See PR21014.
1224  if (ExtendOperIdx == 0)
1225  std::swap(lhs, rhs);
1226  const SCEVAddRecExpr *AddRec =
1227  dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1228 
1229  if (!AddRec || AddRec->getLoop() != L)
1230  return {nullptr, Unknown};
1231 
1232  return {AddRec, ExtKind};
1233 }
1234 
1235 /// Is this instruction potentially interesting for further simplification after
1236 /// widening it's type? In other words, can the extend be safely hoisted out of
1237 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1238 /// so, return the extended recurrence and the kind of extension used. Otherwise
1239 /// return {nullptr, Unknown}.
1240 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1241  if (!SE->isSCEVable(DU.NarrowUse->getType()))
1242  return {nullptr, Unknown};
1243 
1244  const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1245  if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1246  SE->getTypeSizeInBits(WideType)) {
1247  // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1248  // index. So don't follow this use.
1249  return {nullptr, Unknown};
1250  }
1251 
1252  const SCEV *WideExpr;
1253  ExtendKind ExtKind;
1254  if (DU.NeverNegative) {
1255  WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1256  if (isa<SCEVAddRecExpr>(WideExpr))
1257  ExtKind = SignExtended;
1258  else {
1259  WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1260  ExtKind = ZeroExtended;
1261  }
1262  } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1263  WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1264  ExtKind = SignExtended;
1265  } else {
1266  WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1267  ExtKind = ZeroExtended;
1268  }
1269  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1270  if (!AddRec || AddRec->getLoop() != L)
1271  return {nullptr, Unknown};
1272  return {AddRec, ExtKind};
1273 }
1274 
1275 /// This IV user cannot be widen. Replace this use of the original narrow IV
1276 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1277 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1278  DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef
1279  << " for user " << *DU.NarrowUse << "\n");
1280  IRBuilder<> Builder(
1281  getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
1282  Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1283  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1284 }
1285 
1286 /// If the narrow use is a compare instruction, then widen the compare
1287 // (and possibly the other operand). The extend operation is hoisted into the
1288 // loop preheader as far as possible.
1289 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1290  ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1291  if (!Cmp)
1292  return false;
1293 
1294  // We can legally widen the comparison in the following two cases:
1295  //
1296  // - The signedness of the IV extension and comparison match
1297  //
1298  // - The narrow IV is always positive (and thus its sign extension is equal
1299  // to its zero extension). For instance, let's say we're zero extending
1300  // %narrow for the following use
1301  //
1302  // icmp slt i32 %narrow, %val ... (A)
1303  //
1304  // and %narrow is always positive. Then
1305  //
1306  // (A) == icmp slt i32 sext(%narrow), sext(%val)
1307  // == icmp slt i32 zext(%narrow), sext(%val)
1308  bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1309  if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1310  return false;
1311 
1312  Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1313  unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1314  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1315  assert (CastWidth <= IVWidth && "Unexpected width while widening compare.");
1316 
1317  // Widen the compare instruction.
1318  IRBuilder<> Builder(
1319  getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
1320  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1321 
1322  // Widen the other operand of the compare, if necessary.
1323  if (CastWidth < IVWidth) {
1324  Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1325  DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1326  }
1327  return true;
1328 }
1329 
1330 /// Determine whether an individual user of the narrow IV can be widened. If so,
1331 /// return the wide clone of the user.
1332 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1333  assert(ExtendKindMap.count(DU.NarrowDef) &&
1334  "Should already know the kind of extension used to widen NarrowDef");
1335 
1336  // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1337  if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1338  if (LI->getLoopFor(UsePhi->getParent()) != L) {
1339  // For LCSSA phis, sink the truncate outside the loop.
1340  // After SimplifyCFG most loop exit targets have a single predecessor.
1341  // Otherwise fall back to a truncate within the loop.
1342  if (UsePhi->getNumOperands() != 1)
1343  truncateIVUse(DU, DT, LI);
1344  else {
1345  // Widening the PHI requires us to insert a trunc. The logical place
1346  // for this trunc is in the same BB as the PHI. This is not possible if
1347  // the BB is terminated by a catchswitch.
1348  if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1349  return nullptr;
1350 
1351  PHINode *WidePhi =
1352  PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1353  UsePhi);
1354  WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1355  IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1356  Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1357  UsePhi->replaceAllUsesWith(Trunc);
1358  DeadInsts.emplace_back(UsePhi);
1359  DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi
1360  << " to " << *WidePhi << "\n");
1361  }
1362  return nullptr;
1363  }
1364  }
1365 
1366  // This narrow use can be widened by a sext if it's non-negative or its narrow
1367  // def was widended by a sext. Same for zext.
1368  auto canWidenBySExt = [&]() {
1369  return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1370  };
1371  auto canWidenByZExt = [&]() {
1372  return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1373  };
1374 
1375  // Our raison d'etre! Eliminate sign and zero extension.
1376  if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1377  (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1378  Value *NewDef = DU.WideDef;
1379  if (DU.NarrowUse->getType() != WideType) {
1380  unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1381  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1382  if (CastWidth < IVWidth) {
1383  // The cast isn't as wide as the IV, so insert a Trunc.
1384  IRBuilder<> Builder(DU.NarrowUse);
1385  NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1386  }
1387  else {
1388  // A wider extend was hidden behind a narrower one. This may induce
1389  // another round of IV widening in which the intermediate IV becomes
1390  // dead. It should be very rare.
1391  DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1392  << " not wide enough to subsume " << *DU.NarrowUse << "\n");
1393  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1394  NewDef = DU.NarrowUse;
1395  }
1396  }
1397  if (NewDef != DU.NarrowUse) {
1398  DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1399  << " replaced by " << *DU.WideDef << "\n");
1400  ++NumElimExt;
1401  DU.NarrowUse->replaceAllUsesWith(NewDef);
1402  DeadInsts.emplace_back(DU.NarrowUse);
1403  }
1404  // Now that the extend is gone, we want to expose it's uses for potential
1405  // further simplification. We don't need to directly inform SimplifyIVUsers
1406  // of the new users, because their parent IV will be processed later as a
1407  // new loop phi. If we preserved IVUsers analysis, we would also want to
1408  // push the uses of WideDef here.
1409 
1410  // No further widening is needed. The deceased [sz]ext had done it for us.
1411  return nullptr;
1412  }
1413 
1414  // Does this user itself evaluate to a recurrence after widening?
1415  WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1416  if (!WideAddRec.first)
1417  WideAddRec = getWideRecurrence(DU);
1418 
1419  assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1420  if (!WideAddRec.first) {
1421  // If use is a loop condition, try to promote the condition instead of
1422  // truncating the IV first.
1423  if (widenLoopCompare(DU))
1424  return nullptr;
1425 
1426  // This user does not evaluate to a recurrence after widening, so don't
1427  // follow it. Instead insert a Trunc to kill off the original use,
1428  // eventually isolating the original narrow IV so it can be removed.
1429  truncateIVUse(DU, DT, LI);
1430  return nullptr;
1431  }
1432  // Assume block terminators cannot evaluate to a recurrence. We can't to
1433  // insert a Trunc after a terminator if there happens to be a critical edge.
1434  assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1435  "SCEV is not expected to evaluate a block terminator");
1436 
1437  // Reuse the IV increment that SCEVExpander created as long as it dominates
1438  // NarrowUse.
1439  Instruction *WideUse = nullptr;
1440  if (WideAddRec.first == WideIncExpr &&
1441  Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1442  WideUse = WideInc;
1443  else {
1444  WideUse = cloneIVUser(DU, WideAddRec.first);
1445  if (!WideUse)
1446  return nullptr;
1447  }
1448  // Evaluation of WideAddRec ensured that the narrow expression could be
1449  // extended outside the loop without overflow. This suggests that the wide use
1450  // evaluates to the same expression as the extended narrow use, but doesn't
1451  // absolutely guarantee it. Hence the following failsafe check. In rare cases
1452  // where it fails, we simply throw away the newly created wide use.
1453  if (WideAddRec.first != SE->getSCEV(WideUse)) {
1454  DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
1455  << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first << "\n");
1456  DeadInsts.emplace_back(WideUse);
1457  return nullptr;
1458  }
1459 
1460  ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1461  // Returning WideUse pushes it on the worklist.
1462  return WideUse;
1463 }
1464 
1465 /// Add eligible users of NarrowDef to NarrowIVUsers.
1466 ///
1467 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1468  const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1469  bool NonNegativeDef =
1470  SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1471  SE->getConstant(NarrowSCEV->getType(), 0));
1472  for (User *U : NarrowDef->users()) {
1473  Instruction *NarrowUser = cast<Instruction>(U);
1474 
1475  // Handle data flow merges and bizarre phi cycles.
1476  if (!Widened.insert(NarrowUser).second)
1477  continue;
1478 
1479  bool NonNegativeUse = false;
1480  if (!NonNegativeDef) {
1481  // We might have a control-dependent range information for this context.
1482  if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1483  NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1484  }
1485 
1486  NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1487  NonNegativeDef || NonNegativeUse);
1488  }
1489 }
1490 
1491 /// Process a single induction variable. First use the SCEVExpander to create a
1492 /// wide induction variable that evaluates to the same recurrence as the
1493 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1494 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1495 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1496 ///
1497 /// It would be simpler to delete uses as they are processed, but we must avoid
1498 /// invalidating SCEV expressions.
1499 ///
1500 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1501  // Is this phi an induction variable?
1502  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1503  if (!AddRec)
1504  return nullptr;
1505 
1506  // Widen the induction variable expression.
1507  const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1508  ? SE->getSignExtendExpr(AddRec, WideType)
1509  : SE->getZeroExtendExpr(AddRec, WideType);
1510 
1511  assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1512  "Expect the new IV expression to preserve its type");
1513 
1514  // Can the IV be extended outside the loop without overflow?
1515  AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1516  if (!AddRec || AddRec->getLoop() != L)
1517  return nullptr;
1518 
1519  // An AddRec must have loop-invariant operands. Since this AddRec is
1520  // materialized by a loop header phi, the expression cannot have any post-loop
1521  // operands, so they must dominate the loop header.
1522  assert(
1523  SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1524  SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1525  "Loop header phi recurrence inputs do not dominate the loop");
1526 
1527  // Iterate over IV uses (including transitive ones) looking for IV increments
1528  // of the form 'add nsw %iv, <const>'. For each increment and each use of
1529  // the increment calculate control-dependent range information basing on
1530  // dominating conditions inside of the loop (e.g. a range check inside of the
1531  // loop). Calculated ranges are stored in PostIncRangeInfos map.
1532  //
1533  // Control-dependent range information is later used to prove that a narrow
1534  // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1535  // this on demand because when pushNarrowIVUsers needs this information some
1536  // of the dominating conditions might be already widened.
1538  calculatePostIncRanges(OrigPhi);
1539 
1540  // The rewriter provides a value for the desired IV expression. This may
1541  // either find an existing phi or materialize a new one. Either way, we
1542  // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1543  // of the phi-SCC dominates the loop entry.
1544  Instruction *InsertPt = &L->getHeader()->front();
1545  WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1546 
1547  // Remembering the WideIV increment generated by SCEVExpander allows
1548  // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1549  // employ a general reuse mechanism because the call above is the only call to
1550  // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1551  if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1552  WideInc =
1553  cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1554  WideIncExpr = SE->getSCEV(WideInc);
1555  // Propagate the debug location associated with the original loop increment
1556  // to the new (widened) increment.
1557  auto *OrigInc =
1558  cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1559  WideInc->setDebugLoc(OrigInc->getDebugLoc());
1560  }
1561 
1562  DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1563  ++NumWidened;
1564 
1565  // Traverse the def-use chain using a worklist starting at the original IV.
1566  assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1567 
1568  Widened.insert(OrigPhi);
1569  pushNarrowIVUsers(OrigPhi, WidePhi);
1570 
1571  while (!NarrowIVUsers.empty()) {
1572  NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1573 
1574  // Process a def-use edge. This may replace the use, so don't hold a
1575  // use_iterator across it.
1576  Instruction *WideUse = widenIVUse(DU, Rewriter);
1577 
1578  // Follow all def-use edges from the previous narrow use.
1579  if (WideUse)
1580  pushNarrowIVUsers(DU.NarrowUse, WideUse);
1581 
1582  // widenIVUse may have removed the def-use edge.
1583  if (DU.NarrowDef->use_empty())
1584  DeadInsts.emplace_back(DU.NarrowDef);
1585  }
1586  return WidePhi;
1587 }
1588 
1589 /// Calculates control-dependent range for the given def at the given context
1590 /// by looking at dominating conditions inside of the loop
1591 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1592  Instruction *NarrowUser) {
1593  using namespace llvm::PatternMatch;
1594 
1595  Value *NarrowDefLHS;
1596  const APInt *NarrowDefRHS;
1597  if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1598  m_APInt(NarrowDefRHS))) ||
1599  !NarrowDefRHS->isNonNegative())
1600  return;
1601 
1602  auto UpdateRangeFromCondition = [&] (Value *Condition,
1603  bool TrueDest) {
1604  CmpInst::Predicate Pred;
1605  Value *CmpRHS;
1606  if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1607  m_Value(CmpRHS))))
1608  return;
1609 
1611  TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1612 
1613  auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1614  auto CmpConstrainedLHSRange =
1615  ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1616  auto NarrowDefRange =
1617  CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
1618 
1619  updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1620  };
1621 
1622  auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1623  if (!HasGuards)
1624  return;
1625 
1626  for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1627  Ctx->getParent()->rend())) {
1628  Value *C = nullptr;
1629  if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1630  UpdateRangeFromCondition(C, /*TrueDest=*/true);
1631  }
1632  };
1633 
1634  UpdateRangeFromGuards(NarrowUser);
1635 
1636  BasicBlock *NarrowUserBB = NarrowUser->getParent();
1637  // If NarrowUserBB is statically unreachable asking dominator queries may
1638  // yield surprising results. (e.g. the block may not have a dom tree node)
1639  if (!DT->isReachableFromEntry(NarrowUserBB))
1640  return;
1641 
1642  for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1643  L->contains(DTB->getBlock());
1644  DTB = DTB->getIDom()) {
1645  auto *BB = DTB->getBlock();
1646  auto *TI = BB->getTerminator();
1647  UpdateRangeFromGuards(TI);
1648 
1649  auto *BI = dyn_cast<BranchInst>(TI);
1650  if (!BI || !BI->isConditional())
1651  continue;
1652 
1653  auto *TrueSuccessor = BI->getSuccessor(0);
1654  auto *FalseSuccessor = BI->getSuccessor(1);
1655 
1656  auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1657  return BBE.isSingleEdge() &&
1658  DT->dominates(BBE, NarrowUser->getParent());
1659  };
1660 
1661  if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1662  UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1663 
1664  if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1665  UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1666  }
1667 }
1668 
1669 /// Calculates PostIncRangeInfos map for the given IV
1670 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1673  Worklist.push_back(OrigPhi);
1674  Visited.insert(OrigPhi);
1675 
1676  while (!Worklist.empty()) {
1677  Instruction *NarrowDef = Worklist.pop_back_val();
1678 
1679  for (Use &U : NarrowDef->uses()) {
1680  auto *NarrowUser = cast<Instruction>(U.getUser());
1681 
1682  // Don't go looking outside the current loop.
1683  auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1684  if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1685  continue;
1686 
1687  if (!Visited.insert(NarrowUser).second)
1688  continue;
1689 
1690  Worklist.push_back(NarrowUser);
1691 
1692  calculatePostIncRange(NarrowDef, NarrowUser);
1693  }
1694  }
1695 }
1696 
1697 //===----------------------------------------------------------------------===//
1698 // Live IV Reduction - Minimize IVs live across the loop.
1699 //===----------------------------------------------------------------------===//
1700 
1701 
1702 //===----------------------------------------------------------------------===//
1703 // Simplification of IV users based on SCEV evaluation.
1704 //===----------------------------------------------------------------------===//
1705 
1706 namespace {
1707 class IndVarSimplifyVisitor : public IVVisitor {
1708  ScalarEvolution *SE;
1709  const TargetTransformInfo *TTI;
1710  PHINode *IVPhi;
1711 
1712 public:
1713  WideIVInfo WI;
1714 
1715  IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1716  const TargetTransformInfo *TTI,
1717  const DominatorTree *DTree)
1718  : SE(SCEV), TTI(TTI), IVPhi(IV) {
1719  DT = DTree;
1720  WI.NarrowIV = IVPhi;
1721  }
1722 
1723  // Implement the interface used by simplifyUsersOfIV.
1724  void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1725 };
1726 }
1727 
1728 /// Iteratively perform simplification on a worklist of IV users. Each
1729 /// successive simplification may push more users which may themselves be
1730 /// candidates for simplification.
1731 ///
1732 /// Sign/Zero extend elimination is interleaved with IV simplification.
1733 ///
1734 void IndVarSimplify::simplifyAndExtend(Loop *L,
1735  SCEVExpander &Rewriter,
1736  LoopInfo *LI) {
1738 
1739  auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1740  Intrinsic::getName(Intrinsic::experimental_guard));
1741  bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1742 
1743  SmallVector<PHINode*, 8> LoopPhis;
1744  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1745  LoopPhis.push_back(cast<PHINode>(I));
1746  }
1747  // Each round of simplification iterates through the SimplifyIVUsers worklist
1748  // for all current phis, then determines whether any IVs can be
1749  // widened. Widening adds new phis to LoopPhis, inducing another round of
1750  // simplification on the wide IVs.
1751  while (!LoopPhis.empty()) {
1752  // Evaluate as many IV expressions as possible before widening any IVs. This
1753  // forces SCEV to set no-wrap flags before evaluating sign/zero
1754  // extension. The first time SCEV attempts to normalize sign/zero extension,
1755  // the result becomes final. So for the most predictable results, we delay
1756  // evaluation of sign/zero extend evaluation until needed, and avoid running
1757  // other SCEV based analysis prior to simplifyAndExtend.
1758  do {
1759  PHINode *CurrIV = LoopPhis.pop_back_val();
1760 
1761  // Information about sign/zero extensions of CurrIV.
1762  IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1763 
1764  Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, &Visitor);
1765 
1766  if (Visitor.WI.WidestNativeType) {
1767  WideIVs.push_back(Visitor.WI);
1768  }
1769  } while(!LoopPhis.empty());
1770 
1771  for (; !WideIVs.empty(); WideIVs.pop_back()) {
1772  WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1773  if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1774  Changed = true;
1775  LoopPhis.push_back(WidePhi);
1776  }
1777  }
1778  }
1779 }
1780 
1781 //===----------------------------------------------------------------------===//
1782 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1783 //===----------------------------------------------------------------------===//
1784 
1785 /// Return true if this loop's backedge taken count expression can be safely and
1786 /// cheaply expanded into an instruction sequence that can be used by
1787 /// linearFunctionTestReplace.
1788 ///
1789 /// TODO: This fails for pointer-type loop counters with greater than one byte
1790 /// strides, consequently preventing LFTR from running. For the purpose of LFTR
1791 /// we could skip this check in the case that the LFTR loop counter (chosen by
1792 /// FindLoopCounter) is also pointer type. Instead, we could directly convert
1793 /// the loop test to an inequality test by checking the target data's alignment
1794 /// of element types (given that the initial pointer value originates from or is
1795 /// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
1796 /// However, we don't yet have a strong motivation for converting loop tests
1797 /// into inequality tests.
1799  SCEVExpander &Rewriter) {
1800  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1801  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
1802  BackedgeTakenCount->isZero())
1803  return false;
1804 
1805  if (!L->getExitingBlock())
1806  return false;
1807 
1808  // Can't rewrite non-branch yet.
1809  if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
1810  return false;
1811 
1812  if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
1813  return false;
1814 
1815  return true;
1816 }
1817 
1818 /// Return the loop header phi IFF IncV adds a loop invariant value to the phi.
1820  Instruction *IncI = dyn_cast<Instruction>(IncV);
1821  if (!IncI)
1822  return nullptr;
1823 
1824  switch (IncI->getOpcode()) {
1825  case Instruction::Add:
1826  case Instruction::Sub:
1827  break;
1828  case Instruction::GetElementPtr:
1829  // An IV counter must preserve its type.
1830  if (IncI->getNumOperands() == 2)
1831  break;
1832  default:
1833  return nullptr;
1834  }
1835 
1836  PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
1837  if (Phi && Phi->getParent() == L->getHeader()) {
1838  if (isLoopInvariant(IncI->getOperand(1), L, DT))
1839  return Phi;
1840  return nullptr;
1841  }
1842  if (IncI->getOpcode() == Instruction::GetElementPtr)
1843  return nullptr;
1844 
1845  // Allow add/sub to be commuted.
1846  Phi = dyn_cast<PHINode>(IncI->getOperand(1));
1847  if (Phi && Phi->getParent() == L->getHeader()) {
1848  if (isLoopInvariant(IncI->getOperand(0), L, DT))
1849  return Phi;
1850  }
1851  return nullptr;
1852 }
1853 
1854 /// Return the compare guarding the loop latch, or NULL for unrecognized tests.
1856  assert(L->getExitingBlock() && "expected loop exit");
1857 
1858  BasicBlock *LatchBlock = L->getLoopLatch();
1859  // Don't bother with LFTR if the loop is not properly simplified.
1860  if (!LatchBlock)
1861  return nullptr;
1862 
1863  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
1864  assert(BI && "expected exit branch");
1865 
1866  return dyn_cast<ICmpInst>(BI->getCondition());
1867 }
1868 
1869 /// linearFunctionTestReplace policy. Return true unless we can show that the
1870 /// current exit test is already sufficiently canonical.
1871 static bool needsLFTR(Loop *L, DominatorTree *DT) {
1872  // Do LFTR to simplify the exit condition to an ICMP.
1873  ICmpInst *Cond = getLoopTest(L);
1874  if (!Cond)
1875  return true;
1876 
1877  // Do LFTR to simplify the exit ICMP to EQ/NE
1878  ICmpInst::Predicate Pred = Cond->getPredicate();
1879  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
1880  return true;
1881 
1882  // Look for a loop invariant RHS
1883  Value *LHS = Cond->getOperand(0);
1884  Value *RHS = Cond->getOperand(1);
1885  if (!isLoopInvariant(RHS, L, DT)) {
1886  if (!isLoopInvariant(LHS, L, DT))
1887  return true;
1888  std::swap(LHS, RHS);
1889  }
1890  // Look for a simple IV counter LHS
1891  PHINode *Phi = dyn_cast<PHINode>(LHS);
1892  if (!Phi)
1893  Phi = getLoopPhiForCounter(LHS, L, DT);
1894 
1895  if (!Phi)
1896  return true;
1897 
1898  // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
1899  int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
1900  if (Idx < 0)
1901  return true;
1902 
1903  // Do LFTR if the exit condition's IV is *not* a simple counter.
1904  Value *IncV = Phi->getIncomingValue(Idx);
1905  return Phi != getLoopPhiForCounter(IncV, L, DT);
1906 }
1907 
1908 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
1909 /// down to checking that all operands are constant and listing instructions
1910 /// that may hide undef.
1912  unsigned Depth) {
1913  if (isa<Constant>(V))
1914  return !isa<UndefValue>(V);
1915 
1916  if (Depth >= 6)
1917  return false;
1918 
1919  // Conservatively handle non-constant non-instructions. For example, Arguments
1920  // may be undef.
1921  Instruction *I = dyn_cast<Instruction>(V);
1922  if (!I)
1923  return false;
1924 
1925  // Load and return values may be undef.
1926  if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
1927  return false;
1928 
1929  // Optimistically handle other instructions.
1930  for (Value *Op : I->operands()) {
1931  if (!Visited.insert(Op).second)
1932  continue;
1933  if (!hasConcreteDefImpl(Op, Visited, Depth+1))
1934  return false;
1935  }
1936  return true;
1937 }
1938 
1939 /// Return true if the given value is concrete. We must prove that undef can
1940 /// never reach it.
1941 ///
1942 /// TODO: If we decide that this is a good approach to checking for undef, we
1943 /// may factor it into a common location.
1944 static bool hasConcreteDef(Value *V) {
1945  SmallPtrSet<Value*, 8> Visited;
1946  Visited.insert(V);
1947  return hasConcreteDefImpl(V, Visited, 0);
1948 }
1949 
1950 /// Return true if this IV has any uses other than the (soon to be rewritten)
1951 /// loop exit test.
1952 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
1953  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
1954  Value *IncV = Phi->getIncomingValue(LatchIdx);
1955 
1956  for (User *U : Phi->users())
1957  if (U != Cond && U != IncV) return false;
1958 
1959  for (User *U : IncV->users())
1960  if (U != Cond && U != Phi) return false;
1961  return true;
1962 }
1963 
1964 /// Find an affine IV in canonical form.
1965 ///
1966 /// BECount may be an i8* pointer type. The pointer difference is already
1967 /// valid count without scaling the address stride, so it remains a pointer
1968 /// expression as far as SCEV is concerned.
1969 ///
1970 /// Currently only valid for LFTR. See the comments on hasConcreteDef below.
1971 ///
1972 /// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
1973 ///
1974 /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
1975 /// This is difficult in general for SCEV because of potential overflow. But we
1976 /// could at least handle constant BECounts.
1977 static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
1978  ScalarEvolution *SE, DominatorTree *DT) {
1979  uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
1980 
1981  Value *Cond =
1982  cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
1983 
1984  // Loop over all of the PHI nodes, looking for a simple counter.
1985  PHINode *BestPhi = nullptr;
1986  const SCEV *BestInit = nullptr;
1987  BasicBlock *LatchBlock = L->getLoopLatch();
1988  assert(LatchBlock && "needsLFTR should guarantee a loop latch");
1989  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
1990 
1991  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1992  PHINode *Phi = cast<PHINode>(I);
1993  if (!SE->isSCEVable(Phi->getType()))
1994  continue;
1995 
1996  // Avoid comparing an integer IV against a pointer Limit.
1997  if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
1998  continue;
1999 
2000  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2001  if (!AR || AR->getLoop() != L || !AR->isAffine())
2002  continue;
2003 
2004  // AR may be a pointer type, while BECount is an integer type.
2005  // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2006  // AR may not be a narrower type, or we may never exit.
2007  uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2008  if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2009  continue;
2010 
2011  const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2012  if (!Step || !Step->isOne())
2013  continue;
2014 
2015  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2016  Value *IncV = Phi->getIncomingValue(LatchIdx);
2017  if (getLoopPhiForCounter(IncV, L, DT) != Phi)
2018  continue;
2019 
2020  // Avoid reusing a potentially undef value to compute other values that may
2021  // have originally had a concrete definition.
2022  if (!hasConcreteDef(Phi)) {
2023  // We explicitly allow unknown phis as long as they are already used by
2024  // the loop test. In this case we assume that performing LFTR could not
2025  // increase the number of undef users.
2026  if (ICmpInst *Cond = getLoopTest(L)) {
2027  if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT) &&
2028  Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
2029  continue;
2030  }
2031  }
2032  }
2033  const SCEV *Init = AR->getStart();
2034 
2035  if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2036  // Don't force a live loop counter if another IV can be used.
2037  if (AlmostDeadIV(Phi, LatchBlock, Cond))
2038  continue;
2039 
2040  // Prefer to count-from-zero. This is a more "canonical" counter form. It
2041  // also prefers integer to pointer IVs.
2042  if (BestInit->isZero() != Init->isZero()) {
2043  if (BestInit->isZero())
2044  continue;
2045  }
2046  // If two IVs both count from zero or both count from nonzero then the
2047  // narrower is likely a dead phi that has been widened. Use the wider phi
2048  // to allow the other to be eliminated.
2049  else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2050  continue;
2051  }
2052  BestPhi = Phi;
2053  BestInit = Init;
2054  }
2055  return BestPhi;
2056 }
2057 
2058 /// Help linearFunctionTestReplace by generating a value that holds the RHS of
2059 /// the new loop test.
2060 static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
2061  SCEVExpander &Rewriter, ScalarEvolution *SE) {
2062  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2063  assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
2064  const SCEV *IVInit = AR->getStart();
2065 
2066  // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
2067  // finds a valid pointer IV. Sign extend BECount in order to materialize a
2068  // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2069  // the existing GEPs whenever possible.
2070  if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) {
2071  // IVOffset will be the new GEP offset that is interpreted by GEP as a
2072  // signed value. IVCount on the other hand represents the loop trip count,
2073  // which is an unsigned value. FindLoopCounter only allows induction
2074  // variables that have a positive unit stride of one. This means we don't
2075  // have to handle the case of negative offsets (yet) and just need to zero
2076  // extend IVCount.
2077  Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2078  const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
2079 
2080  // Expand the code for the iteration count.
2081  assert(SE->isLoopInvariant(IVOffset, L) &&
2082  "Computed iteration count is not loop invariant!");
2083  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2084  Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
2085 
2086  Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
2087  assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
2088  // We could handle pointer IVs other than i8*, but we need to compensate for
2089  // gep index scaling. See canExpandBackedgeTakenCount comments.
2090  assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2091  cast<PointerType>(GEPBase->getType())
2092  ->getElementType())->isOne() &&
2093  "unit stride pointer IV must be i8*");
2094 
2095  IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
2096  return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
2097  } else {
2098  // In any other case, convert both IVInit and IVCount to integers before
2099  // comparing. This may result in SCEV expansion of pointers, but in practice
2100  // SCEV will fold the pointer arithmetic away as such:
2101  // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2102  //
2103  // Valid Cases: (1) both integers is most common; (2) both may be pointers
2104  // for simple memset-style loops.
2105  //
2106  // IVInit integer and IVCount pointer would only occur if a canonical IV
2107  // were generated on top of case #2, which is not expected.
2108 
2109  const SCEV *IVLimit = nullptr;
2110  // For unit stride, IVCount = Start + BECount with 2's complement overflow.
2111  // For non-zero Start, compute IVCount here.
2112  if (AR->getStart()->isZero())
2113  IVLimit = IVCount;
2114  else {
2115  assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2116  const SCEV *IVInit = AR->getStart();
2117 
2118  // For integer IVs, truncate the IV before computing IVInit + BECount.
2119  if (SE->getTypeSizeInBits(IVInit->getType())
2120  > SE->getTypeSizeInBits(IVCount->getType()))
2121  IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
2122 
2123  IVLimit = SE->getAddExpr(IVInit, IVCount);
2124  }
2125  // Expand the code for the iteration count.
2126  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2127  IRBuilder<> Builder(BI);
2128  assert(SE->isLoopInvariant(IVLimit, L) &&
2129  "Computed iteration count is not loop invariant!");
2130  // Ensure that we generate the same type as IndVar, or a smaller integer
2131  // type. In the presence of null pointer values, we have an integer type
2132  // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2133  Type *LimitTy = IVCount->getType()->isPointerTy() ?
2134  IndVar->getType() : IVCount->getType();
2135  return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2136  }
2137 }
2138 
2139 /// This method rewrites the exit condition of the loop to be a canonical !=
2140 /// comparison against the incremented loop induction variable. This pass is
2141 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2142 /// determine a loop-invariant trip count of the loop, which is actually a much
2143 /// broader range than just linear tests.
2144 Value *IndVarSimplify::
2145 linearFunctionTestReplace(Loop *L,
2146  const SCEV *BackedgeTakenCount,
2147  PHINode *IndVar,
2148  SCEVExpander &Rewriter) {
2149  assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
2150 
2151  // Initialize CmpIndVar and IVCount to their preincremented values.
2152  Value *CmpIndVar = IndVar;
2153  const SCEV *IVCount = BackedgeTakenCount;
2154 
2155  // If the exiting block is the same as the backedge block, we prefer to
2156  // compare against the post-incremented value, otherwise we must compare
2157  // against the preincremented value.
2158  if (L->getExitingBlock() == L->getLoopLatch()) {
2159  // Add one to the "backedge-taken" count to get the trip count.
2160  // This addition may overflow, which is valid as long as the comparison is
2161  // truncated to BackedgeTakenCount->getType().
2162  IVCount = SE->getAddExpr(BackedgeTakenCount,
2163  SE->getOne(BackedgeTakenCount->getType()));
2164  // The BackedgeTaken expression contains the number of times that the
2165  // backedge branches to the loop header. This is one less than the
2166  // number of times the loop executes, so use the incremented indvar.
2167  CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
2168  }
2169 
2170  Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
2171  assert(ExitCnt->getType()->isPointerTy() ==
2172  IndVar->getType()->isPointerTy() &&
2173  "genLoopLimit missed a cast");
2174 
2175  // Insert a new icmp_ne or icmp_eq instruction before the branch.
2176  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2178  if (L->contains(BI->getSuccessor(0)))
2179  P = ICmpInst::ICMP_NE;
2180  else
2181  P = ICmpInst::ICMP_EQ;
2182 
2183  DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2184  << " LHS:" << *CmpIndVar << '\n'
2185  << " op:\t"
2186  << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
2187  << " RHS:\t" << *ExitCnt << "\n"
2188  << " IVCount:\t" << *IVCount << "\n");
2189 
2190  IRBuilder<> Builder(BI);
2191 
2192  // The new loop exit condition should reuse the debug location of the
2193  // original loop exit condition.
2194  if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2195  Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2196 
2197  // LFTR can ignore IV overflow and truncate to the width of
2198  // BECount. This avoids materializing the add(zext(add)) expression.
2199  unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2200  unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2201  if (CmpIndVarSize > ExitCntSize) {
2202  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2203  const SCEV *ARStart = AR->getStart();
2204  const SCEV *ARStep = AR->getStepRecurrence(*SE);
2205  // For constant IVCount, avoid truncation.
2206  if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
2207  const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt();
2208  APInt Count = cast<SCEVConstant>(IVCount)->getAPInt();
2209  // Note that the post-inc value of BackedgeTakenCount may have overflowed
2210  // above such that IVCount is now zero.
2211  if (IVCount != BackedgeTakenCount && Count == 0) {
2212  Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
2213  ++Count;
2214  }
2215  else
2216  Count = Count.zext(CmpIndVarSize);
2217  APInt NewLimit;
2218  if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
2219  NewLimit = Start - Count;
2220  else
2221  NewLimit = Start + Count;
2222  ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
2223 
2224  DEBUG(dbgs() << " Widen RHS:\t" << *ExitCnt << "\n");
2225  } else {
2226  // We try to extend trip count first. If that doesn't work we truncate IV.
2227  // Zext(trunc(IV)) == IV implies equivalence of the following two:
2228  // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If
2229  // one of the two holds, extend the trip count, otherwise we truncate IV.
2230  bool Extended = false;
2231  const SCEV *IV = SE->getSCEV(CmpIndVar);
2232  const SCEV *ZExtTrunc =
2233  SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2234  ExitCnt->getType()),
2235  CmpIndVar->getType());
2236 
2237  if (ZExtTrunc == IV) {
2238  Extended = true;
2239  ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2240  "wide.trip.count");
2241  } else {
2242  const SCEV *SExtTrunc =
2243  SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2244  ExitCnt->getType()),
2245  CmpIndVar->getType());
2246  if (SExtTrunc == IV) {
2247  Extended = true;
2248  ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2249  "wide.trip.count");
2250  }
2251  }
2252 
2253  if (!Extended)
2254  CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2255  "lftr.wideiv");
2256  }
2257  }
2258  Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2259  Value *OrigCond = BI->getCondition();
2260  // It's tempting to use replaceAllUsesWith here to fully replace the old
2261  // comparison, but that's not immediately safe, since users of the old
2262  // comparison may not be dominated by the new comparison. Instead, just
2263  // update the branch to use the new comparison; in the common case this
2264  // will make old comparison dead.
2265  BI->setCondition(Cond);
2266  DeadInsts.push_back(OrigCond);
2267 
2268  ++NumLFTR;
2269  Changed = true;
2270  return Cond;
2271 }
2272 
2273 //===----------------------------------------------------------------------===//
2274 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2275 //===----------------------------------------------------------------------===//
2276 
2277 /// If there's a single exit block, sink any loop-invariant values that
2278 /// were defined in the preheader but not used inside the loop into the
2279 /// exit block to reduce register pressure in the loop.
2280 void IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2281  BasicBlock *ExitBlock = L->getExitBlock();
2282  if (!ExitBlock) return;
2283 
2284  BasicBlock *Preheader = L->getLoopPreheader();
2285  if (!Preheader) return;
2286 
2287  BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2288  BasicBlock::iterator I(Preheader->getTerminator());
2289  while (I != Preheader->begin()) {
2290  --I;
2291  // New instructions were inserted at the end of the preheader.
2292  if (isa<PHINode>(I))
2293  break;
2294 
2295  // Don't move instructions which might have side effects, since the side
2296  // effects need to complete before instructions inside the loop. Also don't
2297  // move instructions which might read memory, since the loop may modify
2298  // memory. Note that it's okay if the instruction might have undefined
2299  // behavior: LoopSimplify guarantees that the preheader dominates the exit
2300  // block.
2301  if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2302  continue;
2303 
2304  // Skip debug info intrinsics.
2305  if (isa<DbgInfoIntrinsic>(I))
2306  continue;
2307 
2308  // Skip eh pad instructions.
2309  if (I->isEHPad())
2310  continue;
2311 
2312  // Don't sink alloca: we never want to sink static alloca's out of the
2313  // entry block, and correctly sinking dynamic alloca's requires
2314  // checks for stacksave/stackrestore intrinsics.
2315  // FIXME: Refactor this check somehow?
2316  if (isa<AllocaInst>(I))
2317  continue;
2318 
2319  // Determine if there is a use in or before the loop (direct or
2320  // otherwise).
2321  bool UsedInLoop = false;
2322  for (Use &U : I->uses()) {
2323  Instruction *User = cast<Instruction>(U.getUser());
2324  BasicBlock *UseBB = User->getParent();
2325  if (PHINode *P = dyn_cast<PHINode>(User)) {
2326  unsigned i =
2327  PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2328  UseBB = P->getIncomingBlock(i);
2329  }
2330  if (UseBB == Preheader || L->contains(UseBB)) {
2331  UsedInLoop = true;
2332  break;
2333  }
2334  }
2335 
2336  // If there is, the def must remain in the preheader.
2337  if (UsedInLoop)
2338  continue;
2339 
2340  // Otherwise, sink it to the exit block.
2341  Instruction *ToMove = &*I;
2342  bool Done = false;
2343 
2344  if (I != Preheader->begin()) {
2345  // Skip debug info intrinsics.
2346  do {
2347  --I;
2348  } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2349 
2350  if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2351  Done = true;
2352  } else {
2353  Done = true;
2354  }
2355 
2356  ToMove->moveBefore(*ExitBlock, InsertPt);
2357  if (Done) break;
2358  InsertPt = ToMove->getIterator();
2359  }
2360 }
2361 
2362 //===----------------------------------------------------------------------===//
2363 // IndVarSimplify driver. Manage several subpasses of IV simplification.
2364 //===----------------------------------------------------------------------===//
2365 
2366 bool IndVarSimplify::run(Loop *L) {
2367  // We need (and expect!) the incoming loop to be in LCSSA.
2368  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2369  "LCSSA required to run indvars!");
2370 
2371  // If LoopSimplify form is not available, stay out of trouble. Some notes:
2372  // - LSR currently only supports LoopSimplify-form loops. Indvars'
2373  // canonicalization can be a pessimization without LSR to "clean up"
2374  // afterwards.
2375  // - We depend on having a preheader; in particular,
2376  // Loop::getCanonicalInductionVariable only supports loops with preheaders,
2377  // and we're in trouble if we can't find the induction variable even when
2378  // we've manually inserted one.
2379  if (!L->isLoopSimplifyForm())
2380  return false;
2381 
2382  // If there are any floating-point recurrences, attempt to
2383  // transform them to use integer recurrences.
2384  rewriteNonIntegerIVs(L);
2385 
2386  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2387 
2388  // Create a rewriter object which we'll use to transform the code with.
2389  SCEVExpander Rewriter(*SE, DL, "indvars");
2390 #ifndef NDEBUG
2391  Rewriter.setDebugType(DEBUG_TYPE);
2392 #endif
2393 
2394  // Eliminate redundant IV users.
2395  //
2396  // Simplification works best when run before other consumers of SCEV. We
2397  // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2398  // other expressions involving loop IVs have been evaluated. This helps SCEV
2399  // set no-wrap flags before normalizing sign/zero extension.
2400  Rewriter.disableCanonicalMode();
2401  simplifyAndExtend(L, Rewriter, LI);
2402 
2403  // Check to see if this loop has a computable loop-invariant execution count.
2404  // If so, this means that we can compute the final value of any expressions
2405  // that are recurrent in the loop, and substitute the exit values from the
2406  // loop into any instructions outside of the loop that use the final values of
2407  // the current expressions.
2408  //
2409  if (ReplaceExitValue != NeverRepl &&
2410  !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2411  rewriteLoopExitValues(L, Rewriter);
2412 
2413  // Eliminate redundant IV cycles.
2414  NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2415 
2416  // If we have a trip count expression, rewrite the loop's exit condition
2417  // using it. We can currently only handle loops with a single exit.
2418  if (canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L, DT)) {
2419  PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
2420  if (IndVar) {
2421  // Check preconditions for proper SCEVExpander operation. SCEV does not
2422  // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
2423  // pass that uses the SCEVExpander must do it. This does not work well for
2424  // loop passes because SCEVExpander makes assumptions about all loops,
2425  // while LoopPassManager only forces the current loop to be simplified.
2426  //
2427  // FIXME: SCEV expansion has no way to bail out, so the caller must
2428  // explicitly check any assumptions made by SCEV. Brittle.
2429  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
2430  if (!AR || AR->getLoop()->getLoopPreheader())
2431  (void)linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
2432  Rewriter);
2433  }
2434  }
2435  // Clear the rewriter cache, because values that are in the rewriter's cache
2436  // can be deleted in the loop below, causing the AssertingVH in the cache to
2437  // trigger.
2438  Rewriter.clear();
2439 
2440  // Now that we're done iterating through lists, clean up any instructions
2441  // which are now dead.
2442  while (!DeadInsts.empty())
2443  if (Instruction *Inst =
2444  dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2446 
2447  // The Rewriter may not be used from this point on.
2448 
2449  // Loop-invariant instructions in the preheader that aren't used in the
2450  // loop may be sunk below the loop to reduce register pressure.
2451  sinkUnusedInvariants(L);
2452 
2453  // rewriteFirstIterationLoopExitValues does not rely on the computation of
2454  // trip count and therefore can further simplify exit values in addition to
2455  // rewriteLoopExitValues.
2456  rewriteFirstIterationLoopExitValues(L);
2457 
2458  // Clean up dead instructions.
2459  Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2460 
2461  // Check a post-condition.
2462  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2463  "Indvars did not preserve LCSSA!");
2464 
2465  // Verify that LFTR, and any other change have not interfered with SCEV's
2466  // ability to compute trip count.
2467 #ifndef NDEBUG
2468  if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2469  SE->forgetLoop(L);
2470  const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2471  if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2472  SE->getTypeSizeInBits(NewBECount->getType()))
2473  NewBECount = SE->getTruncateOrNoop(NewBECount,
2474  BackedgeTakenCount->getType());
2475  else
2476  BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2477  NewBECount->getType());
2478  assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
2479  }
2480 #endif
2481 
2482  return Changed;
2483 }
2484 
2485 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2487  LPMUpdater &) {
2488  Function *F = L.getHeader()->getParent();
2489  const DataLayout &DL = F->getParent()->getDataLayout();
2490 
2491  IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
2492  if (!IVS.run(&L))
2493  return PreservedAnalyses::all();
2494 
2495  // FIXME: This should also 'preserve the CFG'.
2497 }
2498 
2499 namespace {
2500 struct IndVarSimplifyLegacyPass : public LoopPass {
2501  static char ID; // Pass identification, replacement for typeid
2502  IndVarSimplifyLegacyPass() : LoopPass(ID) {
2504  }
2505 
2506  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2507  if (skipLoop(L))
2508  return false;
2509 
2510  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2511  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2512  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2513  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2514  auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
2515  auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2516  auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2517  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2518 
2519  IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
2520  return IVS.run(L);
2521  }
2522 
2523  void getAnalysisUsage(AnalysisUsage &AU) const override {
2524  AU.setPreservesCFG();
2526  }
2527 };
2528 }
2529 
2531 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2532  "Induction Variable Simplification", false, false)
2534 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2535  "Induction Variable Simplification", false, false)
2536 
2538  return new IndVarSimplifyLegacyPass();
2539 }
MachineLoop * L
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:81
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:76
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
bool hoistIVInc(Instruction *IncV, Instruction *InsertPos)
Utility for hoisting an IV increment.
Induction Variable Simplification
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:840
Induction Variable false
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:64
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
iterator_range< use_iterator > uses()
Definition: Value.h:326
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Value * getExactExistingExpansion(const SCEV *S, const Instruction *At, Loop *L)
Try to find existing LLVM IR value for S available at the point At.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT)
Perform a quick domtree based check for loop invariance assuming that V is used within the loop...
bool isOne() const
Return true if the expression is a constant one.
STATISTIC(NumFunctions,"Total number of functions")
size_t i
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
This is the interface for a simple mod/ref and alias analysis over globals.
bool isZero() const
Return true if the expression is a constant zero.
unsigned getNumOperands() const
Definition: User.h:167
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property...
Definition: Operator.h:104
The main scalar evolution driver.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:984
bool isHighCostExpansion(const SCEV *Expr, Loop *L, const Instruction *At=nullptr)
Return true for expressions that may incur non-trivial cost to evaluate at runtime.
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:450
bool isSigned() const
Determine if this instruction is using a signed comparison.
Definition: InstrTypes.h:1027
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:886
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:896
void setDebugType(const char *s)
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LoopT * getParentLoop() const
Definition: LoopInfo.h:103
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:61
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
Hexagon Common GEP
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:575
static ConstantRange makeAllowedICmpRegion(CmpInst::Predicate Pred, const ConstantRange &Other)
Produce the smallest range such that all values that may satisfy the given predicate with any value c...
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:79
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
const std::vector< BlockT * > & getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:139
ReplaceExitVal
BlockT * getHeader() const
Definition: LoopInfo.h:102
const SCEV * getStart() const
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:191
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:157
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:345
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:228
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:891
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:41
static Value * genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Help linearFunctionTestReplace by generating a value that holds the RHS of the new loop test...
Interface for visiting interesting IV users that are recognized but not simplified by this utility...
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
This is the interface for a SCEV-based alias analysis.
INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass,"indvars","Induction Variable Simplification", false, false) INITIALIZE_PASS_END(IndVarSimplifyLegacyPass
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:578
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:588
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, SmallVectorImpl< WeakVH > &Dead, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
static StringRef getName(Value *V)
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:887
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:36
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:55
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:190
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
user_iterator_impl< User > user_iterator
Definition: Value.h:340
#define F(x, y, z)
Definition: MD5.cpp:51
int getMaxValue(MCInstrInfo const &MCII, MCInst const &MCI)
bool mayReadFromMemory() const
Return true if this instruction may read memory.
This node represents a polynomial recurrence on the trip count of the specified loop.
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:134
This instruction compares its operands according to the predicate given to the constructor.
static Instruction * getInsertPointForUses(Instruction *User, Value *Def, DominatorTree *DT, LoopInfo *LI)
Determine the insertion point for this user.
static cl::opt< bool > VerifyIndvars("verify-indvars", cl::Hidden, cl::desc("Verify the ScalarEvolution result after running indvars"))
BasicBlock * getSuccessor(unsigned i) const
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Examine each PHI in the given block and delete it if it is dead.
const SCEV * getSizeOfExpr(Type *IntTy, Type *AllocTy)
Return an expression for sizeof AllocTy that is type IntTy.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:401
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:263
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:96
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
unsigned getNumIncomingValues() const
Return the number of incoming edges.
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:143
void initializeIndVarSimplifyLegacyPassPass(PassRegistry &)
void clearInsertPoint()
Clear the current insertion point.
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
ValuesClass values(OptsTy...Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:615
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:107
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt...
Definition: PatternMatch.h:180
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:256
ConstantRange intersectWith(const ConstantRange &CR) const
Return the range that results from the intersection of this range with another range.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:109
static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE, SCEVExpander &Rewriter)
Return true if this loop's backedge taken count expression can be safely and cheaply expanded into an...
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
Type * getType() const
Return the LLVM type of this SCEV expression.
Conditional or Unconditional Branch instruction.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define H(x, y, z)
Definition: MD5.cpp:53
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
Definition: APInt.h:1947
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
APInt Xor(const APInt &LHS, const APInt &RHS)
Bitwise XOR function for APInt.
Definition: APInt.h:1952
bool isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:181
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:322
Represent the analysis usage information of a pass.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:109
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:743
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:880
Utility class for integer arithmetic operators which may exhibit overflow - Add, Sub, and Mul.
Definition: Operator.h:75
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1255
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:52
Value * getOperand(unsigned i) const
Definition: User.h:145
op_range operands()
Definition: User.h:213
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
self_iterator getIterator()
Definition: ilist_node.h:81
Class to represent integer types.
Definition: DerivedTypes.h:39
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:960
opStatus convertToInteger(integerPart *Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:986
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:213
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1337
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:355
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:113
void getUniqueExitBlocks(SmallVectorImpl< BasicBlock * > &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfo.cpp:358
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:654
static ICmpInst * getLoopTest(Loop *L)
Return the compare guarding the loop latch, or NULL for unrecognized tests.
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:895
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1298
signed greater than
Definition: InstrTypes.h:907
#define DEBUG_TYPE
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:218
bool isConditional() const
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:884
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl< WeakVH > &DeadInsts, const TargetTransformInfo *TTI=nullptr)
replace congruent phis with their most canonical representative.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:425
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:274
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:58
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:174
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:894
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:59
Provides information about what library functions are available for the current target.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:27
This class represents a range of values.
Definition: ConstantRange.h:45
signed less than
Definition: InstrTypes.h:909
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:558
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT)
Return the loop header phi IFF IncV adds a loop invariant value to the phi.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:276
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:182
static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI)
This IV user cannot be widen.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:569
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
signed less or equal
Definition: InstrTypes.h:910
Value * CreateGEP(Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1141
Class for arbitrary precision integers.
Definition: APInt.h:77
Value * getIncomingValueForBlock(const BasicBlock *BB) const
iterator_range< user_iterator > users()
Definition: Value.h:370
This class uses information about analyze scalars to rewrite expressions in canonical form...
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Get a canonical add expression, or something simpler if possible.
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1942
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:590
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:453
Virtual Register Rewriter
Definition: VirtRegMap.cpp:194
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:384
Value * getCondition() const
void emplace_back(ArgTypes &&...Args)
Definition: SmallVector.h:635
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:169
virtual void visitCast(CastInst *Cast)=0
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:368
#define I(x, y, z)
Definition: MD5.cpp:54
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:124
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:383
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:938
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:888
static bool needsLFTR(Loop *L, DominatorTree *DT)
linearFunctionTestReplace policy.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
const Loop * getLoop() const
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:892
const APFloat & getValueAPF() const
Definition: Constants.h:300
const SCEV * getBackedgeTakenCount(const Loop *L)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
bool use_empty() const
Definition: Value.h:299
This class represents a cast from signed integer to floating point.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:346
unsigned getSCEVType() const
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl,"never","never replace exit value"), clEnumValN(OnlyCheapRepl,"cheap","only replace exit value when the cost is cheap"), clEnumValN(AlwaysRepl,"always","always replace exit value whenever possible")))
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:883
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:537
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:288
LLVM Value Representation.
Definition: Value.h:71
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:893
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:111
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond)
Return true if this IV has any uses other than the (soon to be rewritten) loop exit test...
static const Function * getParent(const Value *V)
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:95
#define DEBUG(X)
Definition: Debug.h:100
void disableCanonicalMode()
Disable the behavior of expanding expressions in canonical form rather than in a more literal form...
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:980
A container for analyses that lazily runs them and caches their results.
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
This pass exposes codegen information to IR-level passes.
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
Definition: DataLayout.h:242
iterator getFirstInsertionPt()
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:209
void setIncomingValue(unsigned i, Value *V)
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:885
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value * > &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
const BasicBlock * getParent() const
Definition: Instruction.h:62
Pass * createIndVarSimplifyPass()
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property...
Definition: Operator.h:98
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >()) const
signed greater or equal
Definition: InstrTypes.h:908
IntegerType * Int32Ty
This class represents a constant integer value.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:726
user_iterator user_end()
Definition: Value.h:354
static PHINode * FindLoopCounter(Loop *L, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Find an affine IV in canonical form.