LLVM  7.0.0svn
IndVarSimplify.cpp
Go to the documentation of this file.
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This transformation analyzes and transforms the induction variables (and
11 // computations derived from them) into simpler forms suitable for subsequent
12 // analysis and transformation.
13 //
14 // If the trip count of a loop is computable, this pass also makes the following
15 // changes:
16 // 1. The exit condition for the loop is canonicalized to compare the
17 // induction value against the exit value. This turns loops like:
18 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
19 // 2. Any use outside of the loop of an expression derived from the indvar
20 // is changed to compute the derived value outside of the loop, eliminating
21 // the dependence on the exit value of the induction variable. If the only
22 // purpose of the loop is to compute the exit value of some derived
23 // expression, this transformation will make the loop dead.
24 //
25 //===----------------------------------------------------------------------===//
26 
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/DenseMap.h"
32 #include "llvm/ADT/None.h"
33 #include "llvm/ADT/Optional.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
46 #include "llvm/IR/BasicBlock.h"
47 #include "llvm/IR/Constant.h"
48 #include "llvm/IR/ConstantRange.h"
49 #include "llvm/IR/Constants.h"
50 #include "llvm/IR/DataLayout.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/Dominators.h"
53 #include "llvm/IR/Function.h"
54 #include "llvm/IR/IRBuilder.h"
55 #include "llvm/IR/InstrTypes.h"
56 #include "llvm/IR/Instruction.h"
57 #include "llvm/IR/Instructions.h"
58 #include "llvm/IR/IntrinsicInst.h"
59 #include "llvm/IR/Intrinsics.h"
60 #include "llvm/IR/Module.h"
61 #include "llvm/IR/Operator.h"
62 #include "llvm/IR/PassManager.h"
63 #include "llvm/IR/PatternMatch.h"
64 #include "llvm/IR/Type.h"
65 #include "llvm/IR/Use.h"
66 #include "llvm/IR/User.h"
67 #include "llvm/IR/Value.h"
68 #include "llvm/IR/ValueHandle.h"
69 #include "llvm/Pass.h"
70 #include "llvm/Support/Casting.h"
72 #include "llvm/Support/Compiler.h"
73 #include "llvm/Support/Debug.h"
77 #include "llvm/Transforms/Scalar.h"
83 #include <cassert>
84 #include <cstdint>
85 #include <utility>
86 
87 using namespace llvm;
88 
89 #define DEBUG_TYPE "indvars"
90 
91 STATISTIC(NumWidened , "Number of indvars widened");
92 STATISTIC(NumReplaced , "Number of exit values replaced");
93 STATISTIC(NumLFTR , "Number of loop exit tests replaced");
94 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
95 STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
96 
97 // Trip count verification can be enabled by default under NDEBUG if we
98 // implement a strong expression equivalence checker in SCEV. Until then, we
99 // use the verify-indvars flag, which may assert in some cases.
101  "verify-indvars", cl::Hidden,
102  cl::desc("Verify the ScalarEvolution result after running indvars"));
103 
105 
107  "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
108  cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
109  cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
110  clEnumValN(OnlyCheapRepl, "cheap",
111  "only replace exit value when the cost is cheap"),
112  clEnumValN(AlwaysRepl, "always",
113  "always replace exit value whenever possible")));
114 
116  "indvars-post-increment-ranges", cl::Hidden,
117  cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
118  cl::init(true));
119 
120 static cl::opt<bool>
121 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
122  cl::desc("Disable Linear Function Test Replace optimization"));
123 
124 namespace {
125 
126 struct RewritePhi;
127 
128 class IndVarSimplify {
129  LoopInfo *LI;
130  ScalarEvolution *SE;
131  DominatorTree *DT;
132  const DataLayout &DL;
133  TargetLibraryInfo *TLI;
134  const TargetTransformInfo *TTI;
135 
137  bool Changed = false;
138 
139  bool isValidRewrite(Value *FromVal, Value *ToVal);
140 
141  void handleFloatingPointIV(Loop *L, PHINode *PH);
142  void rewriteNonIntegerIVs(Loop *L);
143 
144  void simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
145 
146  bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
147  void rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
148  void rewriteFirstIterationLoopExitValues(Loop *L);
149 
150  Value *linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
151  PHINode *IndVar, SCEVExpander &Rewriter);
152 
153  void sinkUnusedInvariants(Loop *L);
154 
155  Value *expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
156  Instruction *InsertPt, Type *Ty);
157 
158 public:
159  IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
160  const DataLayout &DL, TargetLibraryInfo *TLI,
161  TargetTransformInfo *TTI)
162  : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
163 
164  bool run(Loop *L);
165 };
166 
167 } // end anonymous namespace
168 
169 /// Return true if the SCEV expansion generated by the rewriter can replace the
170 /// original value. SCEV guarantees that it produces the same value, but the way
171 /// it is produced may be illegal IR. Ideally, this function will only be
172 /// called for verification.
173 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
174  // If an SCEV expression subsumed multiple pointers, its expansion could
175  // reassociate the GEP changing the base pointer. This is illegal because the
176  // final address produced by a GEP chain must be inbounds relative to its
177  // underlying object. Otherwise basic alias analysis, among other things,
178  // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
179  // producing an expression involving multiple pointers. Until then, we must
180  // bail out here.
181  //
182  // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
183  // because it understands lcssa phis while SCEV does not.
184  Value *FromPtr = FromVal;
185  Value *ToPtr = ToVal;
186  if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
187  FromPtr = GEP->getPointerOperand();
188  }
189  if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
190  ToPtr = GEP->getPointerOperand();
191  }
192  if (FromPtr != FromVal || ToPtr != ToVal) {
193  // Quickly check the common case
194  if (FromPtr == ToPtr)
195  return true;
196 
197  // SCEV may have rewritten an expression that produces the GEP's pointer
198  // operand. That's ok as long as the pointer operand has the same base
199  // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
200  // base of a recurrence. This handles the case in which SCEV expansion
201  // converts a pointer type recurrence into a nonrecurrent pointer base
202  // indexed by an integer recurrence.
203 
204  // If the GEP base pointer is a vector of pointers, abort.
205  if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
206  return false;
207 
208  const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
209  const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
210  if (FromBase == ToBase)
211  return true;
212 
213  DEBUG(dbgs() << "INDVARS: GEP rewrite bail out "
214  << *FromBase << " != " << *ToBase << "\n");
215 
216  return false;
217  }
218  return true;
219 }
220 
221 /// Determine the insertion point for this user. By default, insert immediately
222 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
223 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
224 /// common dominator for the incoming blocks.
226  DominatorTree *DT, LoopInfo *LI) {
227  PHINode *PHI = dyn_cast<PHINode>(User);
228  if (!PHI)
229  return User;
230 
231  Instruction *InsertPt = nullptr;
232  for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
233  if (PHI->getIncomingValue(i) != Def)
234  continue;
235 
236  BasicBlock *InsertBB = PHI->getIncomingBlock(i);
237  if (!InsertPt) {
238  InsertPt = InsertBB->getTerminator();
239  continue;
240  }
241  InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
242  InsertPt = InsertBB->getTerminator();
243  }
244  assert(InsertPt && "Missing phi operand");
245 
246  auto *DefI = dyn_cast<Instruction>(Def);
247  if (!DefI)
248  return InsertPt;
249 
250  assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
251 
252  auto *L = LI->getLoopFor(DefI->getParent());
253  assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
254 
255  for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
256  if (LI->getLoopFor(DTN->getBlock()) == L)
257  return DTN->getBlock()->getTerminator();
258 
259  llvm_unreachable("DefI dominates InsertPt!");
260 }
261 
262 //===----------------------------------------------------------------------===//
263 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
264 //===----------------------------------------------------------------------===//
265 
266 /// Convert APF to an integer, if possible.
267 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
268  bool isExact = false;
269  // See if we can convert this to an int64_t
270  uint64_t UIntVal;
271  if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
272  APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
273  !isExact)
274  return false;
275  IntVal = UIntVal;
276  return true;
277 }
278 
279 /// If the loop has floating induction variable then insert corresponding
280 /// integer induction variable if possible.
281 /// For example,
282 /// for(double i = 0; i < 10000; ++i)
283 /// bar(i)
284 /// is converted into
285 /// for(int i = 0; i < 10000; ++i)
286 /// bar((double)i);
287 void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
288  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
289  unsigned BackEdge = IncomingEdge^1;
290 
291  // Check incoming value.
292  auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
293 
294  int64_t InitValue;
295  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
296  return;
297 
298  // Check IV increment. Reject this PN if increment operation is not
299  // an add or increment value can not be represented by an integer.
300  auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
301  if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return;
302 
303  // If this is not an add of the PHI with a constantfp, or if the constant fp
304  // is not an integer, bail out.
305  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
306  int64_t IncValue;
307  if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
308  !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
309  return;
310 
311  // Check Incr uses. One user is PN and the other user is an exit condition
312  // used by the conditional terminator.
313  Value::user_iterator IncrUse = Incr->user_begin();
314  Instruction *U1 = cast<Instruction>(*IncrUse++);
315  if (IncrUse == Incr->user_end()) return;
316  Instruction *U2 = cast<Instruction>(*IncrUse++);
317  if (IncrUse != Incr->user_end()) return;
318 
319  // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
320  // only used by a branch, we can't transform it.
322  if (!Compare)
323  Compare = dyn_cast<FCmpInst>(U2);
324  if (!Compare || !Compare->hasOneUse() ||
325  !isa<BranchInst>(Compare->user_back()))
326  return;
327 
328  BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
329 
330  // We need to verify that the branch actually controls the iteration count
331  // of the loop. If not, the new IV can overflow and no one will notice.
332  // The branch block must be in the loop and one of the successors must be out
333  // of the loop.
334  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
335  if (!L->contains(TheBr->getParent()) ||
336  (L->contains(TheBr->getSuccessor(0)) &&
337  L->contains(TheBr->getSuccessor(1))))
338  return;
339 
340  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
341  // transform it.
342  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
343  int64_t ExitValue;
344  if (ExitValueVal == nullptr ||
345  !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
346  return;
347 
348  // Find new predicate for integer comparison.
350  switch (Compare->getPredicate()) {
351  default: return; // Unknown comparison.
352  case CmpInst::FCMP_OEQ:
353  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
354  case CmpInst::FCMP_ONE:
355  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
356  case CmpInst::FCMP_OGT:
357  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
358  case CmpInst::FCMP_OGE:
359  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
360  case CmpInst::FCMP_OLT:
361  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
362  case CmpInst::FCMP_OLE:
363  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
364  }
365 
366  // We convert the floating point induction variable to a signed i32 value if
367  // we can. This is only safe if the comparison will not overflow in a way
368  // that won't be trapped by the integer equivalent operations. Check for this
369  // now.
370  // TODO: We could use i64 if it is native and the range requires it.
371 
372  // The start/stride/exit values must all fit in signed i32.
373  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
374  return;
375 
376  // If not actually striding (add x, 0.0), avoid touching the code.
377  if (IncValue == 0)
378  return;
379 
380  // Positive and negative strides have different safety conditions.
381  if (IncValue > 0) {
382  // If we have a positive stride, we require the init to be less than the
383  // exit value.
384  if (InitValue >= ExitValue)
385  return;
386 
387  uint32_t Range = uint32_t(ExitValue-InitValue);
388  // Check for infinite loop, either:
389  // while (i <= Exit) or until (i > Exit)
390  if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
391  if (++Range == 0) return; // Range overflows.
392  }
393 
394  unsigned Leftover = Range % uint32_t(IncValue);
395 
396  // If this is an equality comparison, we require that the strided value
397  // exactly land on the exit value, otherwise the IV condition will wrap
398  // around and do things the fp IV wouldn't.
399  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
400  Leftover != 0)
401  return;
402 
403  // If the stride would wrap around the i32 before exiting, we can't
404  // transform the IV.
405  if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
406  return;
407  } else {
408  // If we have a negative stride, we require the init to be greater than the
409  // exit value.
410  if (InitValue <= ExitValue)
411  return;
412 
413  uint32_t Range = uint32_t(InitValue-ExitValue);
414  // Check for infinite loop, either:
415  // while (i >= Exit) or until (i < Exit)
416  if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
417  if (++Range == 0) return; // Range overflows.
418  }
419 
420  unsigned Leftover = Range % uint32_t(-IncValue);
421 
422  // If this is an equality comparison, we require that the strided value
423  // exactly land on the exit value, otherwise the IV condition will wrap
424  // around and do things the fp IV wouldn't.
425  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
426  Leftover != 0)
427  return;
428 
429  // If the stride would wrap around the i32 before exiting, we can't
430  // transform the IV.
431  if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
432  return;
433  }
434 
436 
437  // Insert new integer induction variable.
438  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
439  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
440  PN->getIncomingBlock(IncomingEdge));
441 
442  Value *NewAdd =
443  BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
444  Incr->getName()+".int", Incr);
445  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
446 
447  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
448  ConstantInt::get(Int32Ty, ExitValue),
449  Compare->getName());
450 
451  // In the following deletions, PN may become dead and may be deleted.
452  // Use a WeakTrackingVH to observe whether this happens.
453  WeakTrackingVH WeakPH = PN;
454 
455  // Delete the old floating point exit comparison. The branch starts using the
456  // new comparison.
457  NewCompare->takeName(Compare);
458  Compare->replaceAllUsesWith(NewCompare);
460 
461  // Delete the old floating point increment.
462  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
464 
465  // If the FP induction variable still has uses, this is because something else
466  // in the loop uses its value. In order to canonicalize the induction
467  // variable, we chose to eliminate the IV and rewrite it in terms of an
468  // int->fp cast.
469  //
470  // We give preference to sitofp over uitofp because it is faster on most
471  // platforms.
472  if (WeakPH) {
473  Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
474  &*PN->getParent()->getFirstInsertionPt());
475  PN->replaceAllUsesWith(Conv);
477  }
478  Changed = true;
479 }
480 
481 void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
482  // First step. Check to see if there are any floating-point recurrences.
483  // If there are, change them into integer recurrences, permitting analysis by
484  // the SCEV routines.
485  BasicBlock *Header = L->getHeader();
486 
488  for (PHINode &PN : Header->phis())
489  PHIs.push_back(&PN);
490 
491  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
492  if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
493  handleFloatingPointIV(L, PN);
494 
495  // If the loop previously had floating-point IV, ScalarEvolution
496  // may not have been able to compute a trip count. Now that we've done some
497  // re-writing, the trip count may be computable.
498  if (Changed)
499  SE->forgetLoop(L);
500 }
501 
502 namespace {
503 
504 // Collect information about PHI nodes which can be transformed in
505 // rewriteLoopExitValues.
506 struct RewritePhi {
507  PHINode *PN;
508 
509  // Ith incoming value.
510  unsigned Ith;
511 
512  // Exit value after expansion.
513  Value *Val;
514 
515  // High Cost when expansion.
516  bool HighCost;
517 
518  RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
519  : PN(P), Ith(I), Val(V), HighCost(H) {}
520 };
521 
522 } // end anonymous namespace
523 
524 Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
525  Loop *L, Instruction *InsertPt,
526  Type *ResultTy) {
527  // Before expanding S into an expensive LLVM expression, see if we can use an
528  // already existing value as the expansion for S.
529  if (Value *ExistingValue = Rewriter.getExactExistingExpansion(S, InsertPt, L))
530  if (ExistingValue->getType() == ResultTy)
531  return ExistingValue;
532 
533  // We didn't find anything, fall back to using SCEVExpander.
534  return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
535 }
536 
537 //===----------------------------------------------------------------------===//
538 // rewriteLoopExitValues - Optimize IV users outside the loop.
539 // As a side effect, reduces the amount of IV processing within the loop.
540 //===----------------------------------------------------------------------===//
541 
542 /// Check to see if this loop has a computable loop-invariant execution count.
543 /// If so, this means that we can compute the final value of any expressions
544 /// that are recurrent in the loop, and substitute the exit values from the loop
545 /// into any instructions outside of the loop that use the final values of the
546 /// current expressions.
547 ///
548 /// This is mostly redundant with the regular IndVarSimplify activities that
549 /// happen later, except that it's more powerful in some cases, because it's
550 /// able to brute-force evaluate arbitrary instructions as long as they have
551 /// constant operands at the beginning of the loop.
552 void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
553  // Check a pre-condition.
554  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
555  "Indvars did not preserve LCSSA!");
556 
557  SmallVector<BasicBlock*, 8> ExitBlocks;
558  L->getUniqueExitBlocks(ExitBlocks);
559 
560  SmallVector<RewritePhi, 8> RewritePhiSet;
561  // Find all values that are computed inside the loop, but used outside of it.
562  // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
563  // the exit blocks of the loop to find them.
564  for (BasicBlock *ExitBB : ExitBlocks) {
565  // If there are no PHI nodes in this exit block, then no values defined
566  // inside the loop are used on this path, skip it.
567  PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
568  if (!PN) continue;
569 
570  unsigned NumPreds = PN->getNumIncomingValues();
571 
572  // Iterate over all of the PHI nodes.
573  BasicBlock::iterator BBI = ExitBB->begin();
574  while ((PN = dyn_cast<PHINode>(BBI++))) {
575  if (PN->use_empty())
576  continue; // dead use, don't replace it
577 
578  if (!SE->isSCEVable(PN->getType()))
579  continue;
580 
581  // It's necessary to tell ScalarEvolution about this explicitly so that
582  // it can walk the def-use list and forget all SCEVs, as it may not be
583  // watching the PHI itself. Once the new exit value is in place, there
584  // may not be a def-use connection between the loop and every instruction
585  // which got a SCEVAddRecExpr for that loop.
586  SE->forgetValue(PN);
587 
588  // Iterate over all of the values in all the PHI nodes.
589  for (unsigned i = 0; i != NumPreds; ++i) {
590  // If the value being merged in is not integer or is not defined
591  // in the loop, skip it.
592  Value *InVal = PN->getIncomingValue(i);
593  if (!isa<Instruction>(InVal))
594  continue;
595 
596  // If this pred is for a subloop, not L itself, skip it.
597  if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
598  continue; // The Block is in a subloop, skip it.
599 
600  // Check that InVal is defined in the loop.
601  Instruction *Inst = cast<Instruction>(InVal);
602  if (!L->contains(Inst))
603  continue;
604 
605  // Okay, this instruction has a user outside of the current loop
606  // and varies predictably *inside* the loop. Evaluate the value it
607  // contains when the loop exits, if possible.
608  const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
609  if (!SE->isLoopInvariant(ExitValue, L) ||
610  !isSafeToExpand(ExitValue, *SE))
611  continue;
612 
613  // Computing the value outside of the loop brings no benefit if :
614  // - it is definitely used inside the loop in a way which can not be
615  // optimized away.
616  // - no use outside of the loop can take advantage of hoisting the
617  // computation out of the loop
618  if (ExitValue->getSCEVType()>=scMulExpr) {
619  unsigned NumHardInternalUses = 0;
620  unsigned NumSoftExternalUses = 0;
621  unsigned NumUses = 0;
622  for (auto IB = Inst->user_begin(), IE = Inst->user_end();
623  IB != IE && NumUses <= 6; ++IB) {
624  Instruction *UseInstr = cast<Instruction>(*IB);
625  unsigned Opc = UseInstr->getOpcode();
626  NumUses++;
627  if (L->contains(UseInstr)) {
628  if (Opc == Instruction::Call || Opc == Instruction::Ret)
629  NumHardInternalUses++;
630  } else {
631  if (Opc == Instruction::PHI) {
632  // Do not count the Phi as a use. LCSSA may have inserted
633  // plenty of trivial ones.
634  NumUses--;
635  for (auto PB = UseInstr->user_begin(),
636  PE = UseInstr->user_end();
637  PB != PE && NumUses <= 6; ++PB, ++NumUses) {
638  unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
639  if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
640  NumSoftExternalUses++;
641  }
642  continue;
643  }
644  if (Opc != Instruction::Call && Opc != Instruction::Ret)
645  NumSoftExternalUses++;
646  }
647  }
648  if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses)
649  continue;
650  }
651 
652  bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
653  Value *ExitVal =
654  expandSCEVIfNeeded(Rewriter, ExitValue, L, Inst, PN->getType());
655 
656  DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
657  << " LoopVal = " << *Inst << "\n");
658 
659  if (!isValidRewrite(Inst, ExitVal)) {
660  DeadInsts.push_back(ExitVal);
661  continue;
662  }
663 
664  // Collect all the candidate PHINodes to be rewritten.
665  RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
666  }
667  }
668  }
669 
670  bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
671 
672  // Transformation.
673  for (const RewritePhi &Phi : RewritePhiSet) {
674  PHINode *PN = Phi.PN;
675  Value *ExitVal = Phi.Val;
676 
677  // Only do the rewrite when the ExitValue can be expanded cheaply.
678  // If LoopCanBeDel is true, rewrite exit value aggressively.
679  if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
680  DeadInsts.push_back(ExitVal);
681  continue;
682  }
683 
684  Changed = true;
685  ++NumReplaced;
686  Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
687  PN->setIncomingValue(Phi.Ith, ExitVal);
688 
689  // If this instruction is dead now, delete it. Don't do it now to avoid
690  // invalidating iterators.
691  if (isInstructionTriviallyDead(Inst, TLI))
692  DeadInsts.push_back(Inst);
693 
694  // Replace PN with ExitVal if that is legal and does not break LCSSA.
695  if (PN->getNumIncomingValues() == 1 &&
696  LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
697  PN->replaceAllUsesWith(ExitVal);
698  PN->eraseFromParent();
699  }
700  }
701 
702  // The insertion point instruction may have been deleted; clear it out
703  // so that the rewriter doesn't trip over it later.
704  Rewriter.clearInsertPoint();
705 }
706 
707 //===---------------------------------------------------------------------===//
708 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
709 // they will exit at the first iteration.
710 //===---------------------------------------------------------------------===//
711 
712 /// Check to see if this loop has loop invariant conditions which lead to loop
713 /// exits. If so, we know that if the exit path is taken, it is at the first
714 /// loop iteration. This lets us predict exit values of PHI nodes that live in
715 /// loop header.
716 void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
717  // Verify the input to the pass is already in LCSSA form.
718  assert(L->isLCSSAForm(*DT));
719 
720  SmallVector<BasicBlock *, 8> ExitBlocks;
721  L->getUniqueExitBlocks(ExitBlocks);
722  auto *LoopHeader = L->getHeader();
723  assert(LoopHeader && "Invalid loop");
724 
725  for (auto *ExitBB : ExitBlocks) {
726  // If there are no more PHI nodes in this exit block, then no more
727  // values defined inside the loop are used on this path.
728  for (PHINode &PN : ExitBB->phis()) {
729  for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
730  IncomingValIdx != E; ++IncomingValIdx) {
731  auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
732 
733  // We currently only support loop exits from loop header. If the
734  // incoming block is not loop header, we need to recursively check
735  // all conditions starting from loop header are loop invariants.
736  // Additional support might be added in the future.
737  if (IncomingBB != LoopHeader)
738  continue;
739 
740  // Get condition that leads to the exit path.
741  auto *TermInst = IncomingBB->getTerminator();
742 
743  Value *Cond = nullptr;
744  if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
745  // Must be a conditional branch, otherwise the block
746  // should not be in the loop.
747  Cond = BI->getCondition();
748  } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
749  Cond = SI->getCondition();
750  else
751  continue;
752 
753  if (!L->isLoopInvariant(Cond))
754  continue;
755 
756  auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
757 
758  // Only deal with PHIs.
759  if (!ExitVal)
760  continue;
761 
762  // If ExitVal is a PHI on the loop header, then we know its
763  // value along this exit because the exit can only be taken
764  // on the first iteration.
765  auto *LoopPreheader = L->getLoopPreheader();
766  assert(LoopPreheader && "Invalid loop");
767  int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
768  if (PreheaderIdx != -1) {
769  assert(ExitVal->getParent() == LoopHeader &&
770  "ExitVal must be in loop header");
771  PN.setIncomingValue(IncomingValIdx,
772  ExitVal->getIncomingValue(PreheaderIdx));
773  }
774  }
775  }
776  }
777 }
778 
779 /// Check whether it is possible to delete the loop after rewriting exit
780 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
781 /// aggressively.
782 bool IndVarSimplify::canLoopBeDeleted(
783  Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
784  BasicBlock *Preheader = L->getLoopPreheader();
785  // If there is no preheader, the loop will not be deleted.
786  if (!Preheader)
787  return false;
788 
789  // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
790  // We obviate multiple ExitingBlocks case for simplicity.
791  // TODO: If we see testcase with multiple ExitingBlocks can be deleted
792  // after exit value rewriting, we can enhance the logic here.
793  SmallVector<BasicBlock *, 4> ExitingBlocks;
794  L->getExitingBlocks(ExitingBlocks);
795  SmallVector<BasicBlock *, 8> ExitBlocks;
796  L->getUniqueExitBlocks(ExitBlocks);
797  if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
798  return false;
799 
800  BasicBlock *ExitBlock = ExitBlocks[0];
801  BasicBlock::iterator BI = ExitBlock->begin();
802  while (PHINode *P = dyn_cast<PHINode>(BI)) {
803  Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
804 
805  // If the Incoming value of P is found in RewritePhiSet, we know it
806  // could be rewritten to use a loop invariant value in transformation
807  // phase later. Skip it in the loop invariant check below.
808  bool found = false;
809  for (const RewritePhi &Phi : RewritePhiSet) {
810  unsigned i = Phi.Ith;
811  if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
812  found = true;
813  break;
814  }
815  }
816 
817  Instruction *I;
818  if (!found && (I = dyn_cast<Instruction>(Incoming)))
819  if (!L->hasLoopInvariantOperands(I))
820  return false;
821 
822  ++BI;
823  }
824 
825  for (auto *BB : L->blocks())
826  if (llvm::any_of(*BB, [](Instruction &I) {
827  return I.mayHaveSideEffects();
828  }))
829  return false;
830 
831  return true;
832 }
833 
834 //===----------------------------------------------------------------------===//
835 // IV Widening - Extend the width of an IV to cover its widest uses.
836 //===----------------------------------------------------------------------===//
837 
838 namespace {
839 
840 // Collect information about induction variables that are used by sign/zero
841 // extend operations. This information is recorded by CollectExtend and provides
842 // the input to WidenIV.
843 struct WideIVInfo {
844  PHINode *NarrowIV = nullptr;
845 
846  // Widest integer type created [sz]ext
847  Type *WidestNativeType = nullptr;
848 
849  // Was a sext user seen before a zext?
850  bool IsSigned = false;
851 };
852 
853 } // end anonymous namespace
854 
855 /// Update information about the induction variable that is extended by this
856 /// sign or zero extend operation. This is used to determine the final width of
857 /// the IV before actually widening it.
858 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
859  const TargetTransformInfo *TTI) {
860  bool IsSigned = Cast->getOpcode() == Instruction::SExt;
861  if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
862  return;
863 
864  Type *Ty = Cast->getType();
865  uint64_t Width = SE->getTypeSizeInBits(Ty);
866  if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
867  return;
868 
869  // Check that `Cast` actually extends the induction variable (we rely on this
870  // later). This takes care of cases where `Cast` is extending a truncation of
871  // the narrow induction variable, and thus can end up being narrower than the
872  // "narrow" induction variable.
873  uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
874  if (NarrowIVWidth >= Width)
875  return;
876 
877  // Cast is either an sext or zext up to this point.
878  // We should not widen an indvar if arithmetics on the wider indvar are more
879  // expensive than those on the narrower indvar. We check only the cost of ADD
880  // because at least an ADD is required to increment the induction variable. We
881  // could compute more comprehensively the cost of all instructions on the
882  // induction variable when necessary.
883  if (TTI &&
886  Cast->getOperand(0)->getType())) {
887  return;
888  }
889 
890  if (!WI.WidestNativeType) {
891  WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
892  WI.IsSigned = IsSigned;
893  return;
894  }
895 
896  // We extend the IV to satisfy the sign of its first user, arbitrarily.
897  if (WI.IsSigned != IsSigned)
898  return;
899 
900  if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
901  WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
902 }
903 
904 namespace {
905 
906 /// Record a link in the Narrow IV def-use chain along with the WideIV that
907 /// computes the same value as the Narrow IV def. This avoids caching Use*
908 /// pointers.
909 struct NarrowIVDefUse {
910  Instruction *NarrowDef = nullptr;
911  Instruction *NarrowUse = nullptr;
912  Instruction *WideDef = nullptr;
913 
914  // True if the narrow def is never negative. Tracking this information lets
915  // us use a sign extension instead of a zero extension or vice versa, when
916  // profitable and legal.
917  bool NeverNegative = false;
918 
919  NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
920  bool NeverNegative)
921  : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
922  NeverNegative(NeverNegative) {}
923 };
924 
925 /// The goal of this transform is to remove sign and zero extends without
926 /// creating any new induction variables. To do this, it creates a new phi of
927 /// the wider type and redirects all users, either removing extends or inserting
928 /// truncs whenever we stop propagating the type.
929 class WidenIV {
930  // Parameters
931  PHINode *OrigPhi;
932  Type *WideType;
933 
934  // Context
935  LoopInfo *LI;
936  Loop *L;
937  ScalarEvolution *SE;
938  DominatorTree *DT;
939 
940  // Does the module have any calls to the llvm.experimental.guard intrinsic
941  // at all? If not we can avoid scanning instructions looking for guards.
942  bool HasGuards;
943 
944  // Result
945  PHINode *WidePhi = nullptr;
946  Instruction *WideInc = nullptr;
947  const SCEV *WideIncExpr = nullptr;
949 
951  SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
952 
953  enum ExtendKind { ZeroExtended, SignExtended, Unknown };
954 
955  // A map tracking the kind of extension used to widen each narrow IV
956  // and narrow IV user.
957  // Key: pointer to a narrow IV or IV user.
958  // Value: the kind of extension used to widen this Instruction.
959  DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
960 
961  using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
962 
963  // A map with control-dependent ranges for post increment IV uses. The key is
964  // a pair of IV def and a use of this def denoting the context. The value is
965  // a ConstantRange representing possible values of the def at the given
966  // context.
967  DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
968 
969  Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
970  Instruction *UseI) {
971  DefUserPair Key(Def, UseI);
972  auto It = PostIncRangeInfos.find(Key);
973  return It == PostIncRangeInfos.end()
975  : Optional<ConstantRange>(It->second);
976  }
977 
978  void calculatePostIncRanges(PHINode *OrigPhi);
979  void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
980 
981  void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
982  DefUserPair Key(Def, UseI);
983  auto It = PostIncRangeInfos.find(Key);
984  if (It == PostIncRangeInfos.end())
985  PostIncRangeInfos.insert({Key, R});
986  else
987  It->second = R.intersectWith(It->second);
988  }
989 
990 public:
991  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
993  bool HasGuards)
994  : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
995  L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
996  HasGuards(HasGuards), DeadInsts(DI) {
997  assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
998  ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
999  }
1000 
1001  PHINode *createWideIV(SCEVExpander &Rewriter);
1002 
1003 protected:
1004  Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1005  Instruction *Use);
1006 
1007  Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1008  Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1009  const SCEVAddRecExpr *WideAR);
1010  Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1011 
1012  ExtendKind getExtendKind(Instruction *I);
1013 
1014  using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1015 
1016  WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1017 
1018  WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1019 
1020  const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1021  unsigned OpCode) const;
1022 
1023  Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1024 
1025  bool widenLoopCompare(NarrowIVDefUse DU);
1026 
1027  void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1028 };
1029 
1030 } // end anonymous namespace
1031 
1032 /// Perform a quick domtree based check for loop invariance assuming that V is
1033 /// used within the loop. LoopInfo::isLoopInvariant() seems gratuitous for this
1034 /// purpose.
1035 static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
1036  Instruction *Inst = dyn_cast<Instruction>(V);
1037  if (!Inst)
1038  return true;
1039 
1040  return DT->properlyDominates(Inst->getParent(), L->getHeader());
1041 }
1042 
1043 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1044  bool IsSigned, Instruction *Use) {
1045  // Set the debug location and conservative insertion point.
1046  IRBuilder<> Builder(Use);
1047  // Hoist the insertion point into loop preheaders as far as possible.
1048  for (const Loop *L = LI->getLoopFor(Use->getParent());
1049  L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
1050  L = L->getParentLoop())
1052 
1053  return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1054  Builder.CreateZExt(NarrowOper, WideType);
1055 }
1056 
1057 /// Instantiate a wide operation to replace a narrow operation. This only needs
1058 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1059 /// 0 for any operation we decide not to clone.
1060 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1061  const SCEVAddRecExpr *WideAR) {
1062  unsigned Opcode = DU.NarrowUse->getOpcode();
1063  switch (Opcode) {
1064  default:
1065  return nullptr;
1066  case Instruction::Add:
1067  case Instruction::Mul:
1068  case Instruction::UDiv:
1069  case Instruction::Sub:
1070  return cloneArithmeticIVUser(DU, WideAR);
1071 
1072  case Instruction::And:
1073  case Instruction::Or:
1074  case Instruction::Xor:
1075  case Instruction::Shl:
1076  case Instruction::LShr:
1077  case Instruction::AShr:
1078  return cloneBitwiseIVUser(DU);
1079  }
1080 }
1081 
1082 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1083  Instruction *NarrowUse = DU.NarrowUse;
1084  Instruction *NarrowDef = DU.NarrowDef;
1085  Instruction *WideDef = DU.WideDef;
1086 
1087  DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1088 
1089  // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1090  // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1091  // invariant and will be folded or hoisted. If it actually comes from a
1092  // widened IV, it should be removed during a future call to widenIVUse.
1093  bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1094  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1095  ? WideDef
1096  : createExtendInst(NarrowUse->getOperand(0), WideType,
1097  IsSigned, NarrowUse);
1098  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1099  ? WideDef
1100  : createExtendInst(NarrowUse->getOperand(1), WideType,
1101  IsSigned, NarrowUse);
1102 
1103  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1104  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1105  NarrowBO->getName());
1106  IRBuilder<> Builder(NarrowUse);
1107  Builder.Insert(WideBO);
1108  WideBO->copyIRFlags(NarrowBO);
1109  return WideBO;
1110 }
1111 
1112 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1113  const SCEVAddRecExpr *WideAR) {
1114  Instruction *NarrowUse = DU.NarrowUse;
1115  Instruction *NarrowDef = DU.NarrowDef;
1116  Instruction *WideDef = DU.WideDef;
1117 
1118  DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1119 
1120  unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1121 
1122  // We're trying to find X such that
1123  //
1124  // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1125  //
1126  // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1127  // and check using SCEV if any of them are correct.
1128 
1129  // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1130  // correct solution to X.
1131  auto GuessNonIVOperand = [&](bool SignExt) {
1132  const SCEV *WideLHS;
1133  const SCEV *WideRHS;
1134 
1135  auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1136  if (SignExt)
1137  return SE->getSignExtendExpr(S, Ty);
1138  return SE->getZeroExtendExpr(S, Ty);
1139  };
1140 
1141  if (IVOpIdx == 0) {
1142  WideLHS = SE->getSCEV(WideDef);
1143  const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1144  WideRHS = GetExtend(NarrowRHS, WideType);
1145  } else {
1146  const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1147  WideLHS = GetExtend(NarrowLHS, WideType);
1148  WideRHS = SE->getSCEV(WideDef);
1149  }
1150 
1151  // WideUse is "WideDef `op.wide` X" as described in the comment.
1152  const SCEV *WideUse = nullptr;
1153 
1154  switch (NarrowUse->getOpcode()) {
1155  default:
1156  llvm_unreachable("No other possibility!");
1157 
1158  case Instruction::Add:
1159  WideUse = SE->getAddExpr(WideLHS, WideRHS);
1160  break;
1161 
1162  case Instruction::Mul:
1163  WideUse = SE->getMulExpr(WideLHS, WideRHS);
1164  break;
1165 
1166  case Instruction::UDiv:
1167  WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1168  break;
1169 
1170  case Instruction::Sub:
1171  WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1172  break;
1173  }
1174 
1175  return WideUse == WideAR;
1176  };
1177 
1178  bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1179  if (!GuessNonIVOperand(SignExtend)) {
1180  SignExtend = !SignExtend;
1181  if (!GuessNonIVOperand(SignExtend))
1182  return nullptr;
1183  }
1184 
1185  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1186  ? WideDef
1187  : createExtendInst(NarrowUse->getOperand(0), WideType,
1188  SignExtend, NarrowUse);
1189  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1190  ? WideDef
1191  : createExtendInst(NarrowUse->getOperand(1), WideType,
1192  SignExtend, NarrowUse);
1193 
1194  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1195  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1196  NarrowBO->getName());
1197 
1198  IRBuilder<> Builder(NarrowUse);
1199  Builder.Insert(WideBO);
1200  WideBO->copyIRFlags(NarrowBO);
1201  return WideBO;
1202 }
1203 
1204 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1205  auto It = ExtendKindMap.find(I);
1206  assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1207  return It->second;
1208 }
1209 
1210 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1211  unsigned OpCode) const {
1212  if (OpCode == Instruction::Add)
1213  return SE->getAddExpr(LHS, RHS);
1214  if (OpCode == Instruction::Sub)
1215  return SE->getMinusSCEV(LHS, RHS);
1216  if (OpCode == Instruction::Mul)
1217  return SE->getMulExpr(LHS, RHS);
1218 
1219  llvm_unreachable("Unsupported opcode.");
1220 }
1221 
1222 /// No-wrap operations can transfer sign extension of their result to their
1223 /// operands. Generate the SCEV value for the widened operation without
1224 /// actually modifying the IR yet. If the expression after extending the
1225 /// operands is an AddRec for this loop, return the AddRec and the kind of
1226 /// extension used.
1227 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1228  // Handle the common case of add<nsw/nuw>
1229  const unsigned OpCode = DU.NarrowUse->getOpcode();
1230  // Only Add/Sub/Mul instructions supported yet.
1231  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1232  OpCode != Instruction::Mul)
1233  return {nullptr, Unknown};
1234 
1235  // One operand (NarrowDef) has already been extended to WideDef. Now determine
1236  // if extending the other will lead to a recurrence.
1237  const unsigned ExtendOperIdx =
1238  DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1239  assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1240 
1241  const SCEV *ExtendOperExpr = nullptr;
1242  const OverflowingBinaryOperator *OBO =
1243  cast<OverflowingBinaryOperator>(DU.NarrowUse);
1244  ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1245  if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1246  ExtendOperExpr = SE->getSignExtendExpr(
1247  SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1248  else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1249  ExtendOperExpr = SE->getZeroExtendExpr(
1250  SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1251  else
1252  return {nullptr, Unknown};
1253 
1254  // When creating this SCEV expr, don't apply the current operations NSW or NUW
1255  // flags. This instruction may be guarded by control flow that the no-wrap
1256  // behavior depends on. Non-control-equivalent instructions can be mapped to
1257  // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1258  // semantics to those operations.
1259  const SCEV *lhs = SE->getSCEV(DU.WideDef);
1260  const SCEV *rhs = ExtendOperExpr;
1261 
1262  // Let's swap operands to the initial order for the case of non-commutative
1263  // operations, like SUB. See PR21014.
1264  if (ExtendOperIdx == 0)
1265  std::swap(lhs, rhs);
1266  const SCEVAddRecExpr *AddRec =
1267  dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1268 
1269  if (!AddRec || AddRec->getLoop() != L)
1270  return {nullptr, Unknown};
1271 
1272  return {AddRec, ExtKind};
1273 }
1274 
1275 /// Is this instruction potentially interesting for further simplification after
1276 /// widening it's type? In other words, can the extend be safely hoisted out of
1277 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1278 /// so, return the extended recurrence and the kind of extension used. Otherwise
1279 /// return {nullptr, Unknown}.
1280 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1281  if (!SE->isSCEVable(DU.NarrowUse->getType()))
1282  return {nullptr, Unknown};
1283 
1284  const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1285  if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1286  SE->getTypeSizeInBits(WideType)) {
1287  // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1288  // index. So don't follow this use.
1289  return {nullptr, Unknown};
1290  }
1291 
1292  const SCEV *WideExpr;
1293  ExtendKind ExtKind;
1294  if (DU.NeverNegative) {
1295  WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1296  if (isa<SCEVAddRecExpr>(WideExpr))
1297  ExtKind = SignExtended;
1298  else {
1299  WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1300  ExtKind = ZeroExtended;
1301  }
1302  } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1303  WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1304  ExtKind = SignExtended;
1305  } else {
1306  WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1307  ExtKind = ZeroExtended;
1308  }
1309  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1310  if (!AddRec || AddRec->getLoop() != L)
1311  return {nullptr, Unknown};
1312  return {AddRec, ExtKind};
1313 }
1314 
1315 /// This IV user cannot be widen. Replace this use of the original narrow IV
1316 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1317 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1318  DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef
1319  << " for user " << *DU.NarrowUse << "\n");
1320  IRBuilder<> Builder(
1321  getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
1322  Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1323  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1324 }
1325 
1326 /// If the narrow use is a compare instruction, then widen the compare
1327 // (and possibly the other operand). The extend operation is hoisted into the
1328 // loop preheader as far as possible.
1329 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1330  ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1331  if (!Cmp)
1332  return false;
1333 
1334  // We can legally widen the comparison in the following two cases:
1335  //
1336  // - The signedness of the IV extension and comparison match
1337  //
1338  // - The narrow IV is always positive (and thus its sign extension is equal
1339  // to its zero extension). For instance, let's say we're zero extending
1340  // %narrow for the following use
1341  //
1342  // icmp slt i32 %narrow, %val ... (A)
1343  //
1344  // and %narrow is always positive. Then
1345  //
1346  // (A) == icmp slt i32 sext(%narrow), sext(%val)
1347  // == icmp slt i32 zext(%narrow), sext(%val)
1348  bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1349  if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1350  return false;
1351 
1352  Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1353  unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1354  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1355  assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1356 
1357  // Widen the compare instruction.
1358  IRBuilder<> Builder(
1359  getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
1360  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1361 
1362  // Widen the other operand of the compare, if necessary.
1363  if (CastWidth < IVWidth) {
1364  Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1365  DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1366  }
1367  return true;
1368 }
1369 
1370 /// Determine whether an individual user of the narrow IV can be widened. If so,
1371 /// return the wide clone of the user.
1372 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1373  assert(ExtendKindMap.count(DU.NarrowDef) &&
1374  "Should already know the kind of extension used to widen NarrowDef");
1375 
1376  // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1377  if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1378  if (LI->getLoopFor(UsePhi->getParent()) != L) {
1379  // For LCSSA phis, sink the truncate outside the loop.
1380  // After SimplifyCFG most loop exit targets have a single predecessor.
1381  // Otherwise fall back to a truncate within the loop.
1382  if (UsePhi->getNumOperands() != 1)
1383  truncateIVUse(DU, DT, LI);
1384  else {
1385  // Widening the PHI requires us to insert a trunc. The logical place
1386  // for this trunc is in the same BB as the PHI. This is not possible if
1387  // the BB is terminated by a catchswitch.
1388  if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1389  return nullptr;
1390 
1391  PHINode *WidePhi =
1392  PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1393  UsePhi);
1394  WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1395  IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1396  Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1397  UsePhi->replaceAllUsesWith(Trunc);
1398  DeadInsts.emplace_back(UsePhi);
1399  DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi
1400  << " to " << *WidePhi << "\n");
1401  }
1402  return nullptr;
1403  }
1404  }
1405 
1406  // This narrow use can be widened by a sext if it's non-negative or its narrow
1407  // def was widended by a sext. Same for zext.
1408  auto canWidenBySExt = [&]() {
1409  return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1410  };
1411  auto canWidenByZExt = [&]() {
1412  return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1413  };
1414 
1415  // Our raison d'etre! Eliminate sign and zero extension.
1416  if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1417  (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1418  Value *NewDef = DU.WideDef;
1419  if (DU.NarrowUse->getType() != WideType) {
1420  unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1421  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1422  if (CastWidth < IVWidth) {
1423  // The cast isn't as wide as the IV, so insert a Trunc.
1424  IRBuilder<> Builder(DU.NarrowUse);
1425  NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1426  }
1427  else {
1428  // A wider extend was hidden behind a narrower one. This may induce
1429  // another round of IV widening in which the intermediate IV becomes
1430  // dead. It should be very rare.
1431  DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1432  << " not wide enough to subsume " << *DU.NarrowUse << "\n");
1433  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1434  NewDef = DU.NarrowUse;
1435  }
1436  }
1437  if (NewDef != DU.NarrowUse) {
1438  DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1439  << " replaced by " << *DU.WideDef << "\n");
1440  ++NumElimExt;
1441  DU.NarrowUse->replaceAllUsesWith(NewDef);
1442  DeadInsts.emplace_back(DU.NarrowUse);
1443  }
1444  // Now that the extend is gone, we want to expose it's uses for potential
1445  // further simplification. We don't need to directly inform SimplifyIVUsers
1446  // of the new users, because their parent IV will be processed later as a
1447  // new loop phi. If we preserved IVUsers analysis, we would also want to
1448  // push the uses of WideDef here.
1449 
1450  // No further widening is needed. The deceased [sz]ext had done it for us.
1451  return nullptr;
1452  }
1453 
1454  // Does this user itself evaluate to a recurrence after widening?
1455  WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1456  if (!WideAddRec.first)
1457  WideAddRec = getWideRecurrence(DU);
1458 
1459  assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1460  if (!WideAddRec.first) {
1461  // If use is a loop condition, try to promote the condition instead of
1462  // truncating the IV first.
1463  if (widenLoopCompare(DU))
1464  return nullptr;
1465 
1466  // This user does not evaluate to a recurrence after widening, so don't
1467  // follow it. Instead insert a Trunc to kill off the original use,
1468  // eventually isolating the original narrow IV so it can be removed.
1469  truncateIVUse(DU, DT, LI);
1470  return nullptr;
1471  }
1472  // Assume block terminators cannot evaluate to a recurrence. We can't to
1473  // insert a Trunc after a terminator if there happens to be a critical edge.
1474  assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1475  "SCEV is not expected to evaluate a block terminator");
1476 
1477  // Reuse the IV increment that SCEVExpander created as long as it dominates
1478  // NarrowUse.
1479  Instruction *WideUse = nullptr;
1480  if (WideAddRec.first == WideIncExpr &&
1481  Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1482  WideUse = WideInc;
1483  else {
1484  WideUse = cloneIVUser(DU, WideAddRec.first);
1485  if (!WideUse)
1486  return nullptr;
1487  }
1488  // Evaluation of WideAddRec ensured that the narrow expression could be
1489  // extended outside the loop without overflow. This suggests that the wide use
1490  // evaluates to the same expression as the extended narrow use, but doesn't
1491  // absolutely guarantee it. Hence the following failsafe check. In rare cases
1492  // where it fails, we simply throw away the newly created wide use.
1493  if (WideAddRec.first != SE->getSCEV(WideUse)) {
1494  DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
1495  << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first << "\n");
1496  DeadInsts.emplace_back(WideUse);
1497  return nullptr;
1498  }
1499 
1500  ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1501  // Returning WideUse pushes it on the worklist.
1502  return WideUse;
1503 }
1504 
1505 /// Add eligible users of NarrowDef to NarrowIVUsers.
1506 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1507  const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1508  bool NonNegativeDef =
1509  SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1510  SE->getConstant(NarrowSCEV->getType(), 0));
1511  for (User *U : NarrowDef->users()) {
1512  Instruction *NarrowUser = cast<Instruction>(U);
1513 
1514  // Handle data flow merges and bizarre phi cycles.
1515  if (!Widened.insert(NarrowUser).second)
1516  continue;
1517 
1518  bool NonNegativeUse = false;
1519  if (!NonNegativeDef) {
1520  // We might have a control-dependent range information for this context.
1521  if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1522  NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1523  }
1524 
1525  NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1526  NonNegativeDef || NonNegativeUse);
1527  }
1528 }
1529 
1530 /// Process a single induction variable. First use the SCEVExpander to create a
1531 /// wide induction variable that evaluates to the same recurrence as the
1532 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1533 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1534 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1535 ///
1536 /// It would be simpler to delete uses as they are processed, but we must avoid
1537 /// invalidating SCEV expressions.
1538 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1539  // Is this phi an induction variable?
1540  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1541  if (!AddRec)
1542  return nullptr;
1543 
1544  // Widen the induction variable expression.
1545  const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1546  ? SE->getSignExtendExpr(AddRec, WideType)
1547  : SE->getZeroExtendExpr(AddRec, WideType);
1548 
1549  assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1550  "Expect the new IV expression to preserve its type");
1551 
1552  // Can the IV be extended outside the loop without overflow?
1553  AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1554  if (!AddRec || AddRec->getLoop() != L)
1555  return nullptr;
1556 
1557  // An AddRec must have loop-invariant operands. Since this AddRec is
1558  // materialized by a loop header phi, the expression cannot have any post-loop
1559  // operands, so they must dominate the loop header.
1560  assert(
1561  SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1562  SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1563  "Loop header phi recurrence inputs do not dominate the loop");
1564 
1565  // Iterate over IV uses (including transitive ones) looking for IV increments
1566  // of the form 'add nsw %iv, <const>'. For each increment and each use of
1567  // the increment calculate control-dependent range information basing on
1568  // dominating conditions inside of the loop (e.g. a range check inside of the
1569  // loop). Calculated ranges are stored in PostIncRangeInfos map.
1570  //
1571  // Control-dependent range information is later used to prove that a narrow
1572  // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1573  // this on demand because when pushNarrowIVUsers needs this information some
1574  // of the dominating conditions might be already widened.
1575  if (UsePostIncrementRanges)
1576  calculatePostIncRanges(OrigPhi);
1577 
1578  // The rewriter provides a value for the desired IV expression. This may
1579  // either find an existing phi or materialize a new one. Either way, we
1580  // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1581  // of the phi-SCC dominates the loop entry.
1582  Instruction *InsertPt = &L->getHeader()->front();
1583  WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1584 
1585  // Remembering the WideIV increment generated by SCEVExpander allows
1586  // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1587  // employ a general reuse mechanism because the call above is the only call to
1588  // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1589  if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1590  WideInc =
1591  cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1592  WideIncExpr = SE->getSCEV(WideInc);
1593  // Propagate the debug location associated with the original loop increment
1594  // to the new (widened) increment.
1595  auto *OrigInc =
1596  cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1597  WideInc->setDebugLoc(OrigInc->getDebugLoc());
1598  }
1599 
1600  DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1601  ++NumWidened;
1602 
1603  // Traverse the def-use chain using a worklist starting at the original IV.
1604  assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1605 
1606  Widened.insert(OrigPhi);
1607  pushNarrowIVUsers(OrigPhi, WidePhi);
1608 
1609  while (!NarrowIVUsers.empty()) {
1610  NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1611 
1612  // Process a def-use edge. This may replace the use, so don't hold a
1613  // use_iterator across it.
1614  Instruction *WideUse = widenIVUse(DU, Rewriter);
1615 
1616  // Follow all def-use edges from the previous narrow use.
1617  if (WideUse)
1618  pushNarrowIVUsers(DU.NarrowUse, WideUse);
1619 
1620  // widenIVUse may have removed the def-use edge.
1621  if (DU.NarrowDef->use_empty())
1622  DeadInsts.emplace_back(DU.NarrowDef);
1623  }
1624 
1625  // Attach any debug information to the new PHI. Since OrigPhi and WidePHI
1626  // evaluate the same recurrence, we can just copy the debug info over.
1628  llvm::findDbgValues(DbgValues, OrigPhi);
1629  auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(),
1630  ValueAsMetadata::get(WidePhi));
1631  for (auto &DbgValue : DbgValues)
1632  DbgValue->setOperand(0, MDPhi);
1633  return WidePhi;
1634 }
1635 
1636 /// Calculates control-dependent range for the given def at the given context
1637 /// by looking at dominating conditions inside of the loop
1638 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1639  Instruction *NarrowUser) {
1640  using namespace llvm::PatternMatch;
1641 
1642  Value *NarrowDefLHS;
1643  const APInt *NarrowDefRHS;
1644  if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1645  m_APInt(NarrowDefRHS))) ||
1646  !NarrowDefRHS->isNonNegative())
1647  return;
1648 
1649  auto UpdateRangeFromCondition = [&] (Value *Condition,
1650  bool TrueDest) {
1651  CmpInst::Predicate Pred;
1652  Value *CmpRHS;
1653  if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1654  m_Value(CmpRHS))))
1655  return;
1656 
1658  TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1659 
1660  auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1661  auto CmpConstrainedLHSRange =
1662  ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1663  auto NarrowDefRange =
1664  CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
1665 
1666  updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1667  };
1668 
1669  auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1670  if (!HasGuards)
1671  return;
1672 
1673  for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1674  Ctx->getParent()->rend())) {
1675  Value *C = nullptr;
1676  if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1677  UpdateRangeFromCondition(C, /*TrueDest=*/true);
1678  }
1679  };
1680 
1681  UpdateRangeFromGuards(NarrowUser);
1682 
1683  BasicBlock *NarrowUserBB = NarrowUser->getParent();
1684  // If NarrowUserBB is statically unreachable asking dominator queries may
1685  // yield surprising results. (e.g. the block may not have a dom tree node)
1686  if (!DT->isReachableFromEntry(NarrowUserBB))
1687  return;
1688 
1689  for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1690  L->contains(DTB->getBlock());
1691  DTB = DTB->getIDom()) {
1692  auto *BB = DTB->getBlock();
1693  auto *TI = BB->getTerminator();
1694  UpdateRangeFromGuards(TI);
1695 
1696  auto *BI = dyn_cast<BranchInst>(TI);
1697  if (!BI || !BI->isConditional())
1698  continue;
1699 
1700  auto *TrueSuccessor = BI->getSuccessor(0);
1701  auto *FalseSuccessor = BI->getSuccessor(1);
1702 
1703  auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1704  return BBE.isSingleEdge() &&
1705  DT->dominates(BBE, NarrowUser->getParent());
1706  };
1707 
1708  if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1709  UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1710 
1711  if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1712  UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1713  }
1714 }
1715 
1716 /// Calculates PostIncRangeInfos map for the given IV
1717 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1720  Worklist.push_back(OrigPhi);
1721  Visited.insert(OrigPhi);
1722 
1723  while (!Worklist.empty()) {
1724  Instruction *NarrowDef = Worklist.pop_back_val();
1725 
1726  for (Use &U : NarrowDef->uses()) {
1727  auto *NarrowUser = cast<Instruction>(U.getUser());
1728 
1729  // Don't go looking outside the current loop.
1730  auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1731  if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1732  continue;
1733 
1734  if (!Visited.insert(NarrowUser).second)
1735  continue;
1736 
1737  Worklist.push_back(NarrowUser);
1738 
1739  calculatePostIncRange(NarrowDef, NarrowUser);
1740  }
1741  }
1742 }
1743 
1744 //===----------------------------------------------------------------------===//
1745 // Live IV Reduction - Minimize IVs live across the loop.
1746 //===----------------------------------------------------------------------===//
1747 
1748 //===----------------------------------------------------------------------===//
1749 // Simplification of IV users based on SCEV evaluation.
1750 //===----------------------------------------------------------------------===//
1751 
1752 namespace {
1753 
1754 class IndVarSimplifyVisitor : public IVVisitor {
1755  ScalarEvolution *SE;
1756  const TargetTransformInfo *TTI;
1757  PHINode *IVPhi;
1758 
1759 public:
1760  WideIVInfo WI;
1761 
1762  IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1763  const TargetTransformInfo *TTI,
1764  const DominatorTree *DTree)
1765  : SE(SCEV), TTI(TTI), IVPhi(IV) {
1766  DT = DTree;
1767  WI.NarrowIV = IVPhi;
1768  }
1769 
1770  // Implement the interface used by simplifyUsersOfIV.
1771  void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1772 };
1773 
1774 } // end anonymous namespace
1775 
1776 /// Iteratively perform simplification on a worklist of IV users. Each
1777 /// successive simplification may push more users which may themselves be
1778 /// candidates for simplification.
1779 ///
1780 /// Sign/Zero extend elimination is interleaved with IV simplification.
1781 void IndVarSimplify::simplifyAndExtend(Loop *L,
1782  SCEVExpander &Rewriter,
1783  LoopInfo *LI) {
1785 
1786  auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1787  Intrinsic::getName(Intrinsic::experimental_guard));
1788  bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1789 
1790  SmallVector<PHINode*, 8> LoopPhis;
1791  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1792  LoopPhis.push_back(cast<PHINode>(I));
1793  }
1794  // Each round of simplification iterates through the SimplifyIVUsers worklist
1795  // for all current phis, then determines whether any IVs can be
1796  // widened. Widening adds new phis to LoopPhis, inducing another round of
1797  // simplification on the wide IVs.
1798  while (!LoopPhis.empty()) {
1799  // Evaluate as many IV expressions as possible before widening any IVs. This
1800  // forces SCEV to set no-wrap flags before evaluating sign/zero
1801  // extension. The first time SCEV attempts to normalize sign/zero extension,
1802  // the result becomes final. So for the most predictable results, we delay
1803  // evaluation of sign/zero extend evaluation until needed, and avoid running
1804  // other SCEV based analysis prior to simplifyAndExtend.
1805  do {
1806  PHINode *CurrIV = LoopPhis.pop_back_val();
1807 
1808  // Information about sign/zero extensions of CurrIV.
1809  IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1810 
1811  Changed |=
1812  simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1813 
1814  if (Visitor.WI.WidestNativeType) {
1815  WideIVs.push_back(Visitor.WI);
1816  }
1817  } while(!LoopPhis.empty());
1818 
1819  for (; !WideIVs.empty(); WideIVs.pop_back()) {
1820  WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1821  if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1822  Changed = true;
1823  LoopPhis.push_back(WidePhi);
1824  }
1825  }
1826  }
1827 }
1828 
1829 //===----------------------------------------------------------------------===//
1830 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1831 //===----------------------------------------------------------------------===//
1832 
1833 /// Return true if this loop's backedge taken count expression can be safely and
1834 /// cheaply expanded into an instruction sequence that can be used by
1835 /// linearFunctionTestReplace.
1836 ///
1837 /// TODO: This fails for pointer-type loop counters with greater than one byte
1838 /// strides, consequently preventing LFTR from running. For the purpose of LFTR
1839 /// we could skip this check in the case that the LFTR loop counter (chosen by
1840 /// FindLoopCounter) is also pointer type. Instead, we could directly convert
1841 /// the loop test to an inequality test by checking the target data's alignment
1842 /// of element types (given that the initial pointer value originates from or is
1843 /// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
1844 /// However, we don't yet have a strong motivation for converting loop tests
1845 /// into inequality tests.
1847  SCEVExpander &Rewriter) {
1848  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1849  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
1850  BackedgeTakenCount->isZero())
1851  return false;
1852 
1853  if (!L->getExitingBlock())
1854  return false;
1855 
1856  // Can't rewrite non-branch yet.
1857  if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
1858  return false;
1859 
1860  if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
1861  return false;
1862 
1863  return true;
1864 }
1865 
1866 /// Return the loop header phi IFF IncV adds a loop invariant value to the phi.
1868  Instruction *IncI = dyn_cast<Instruction>(IncV);
1869  if (!IncI)
1870  return nullptr;
1871 
1872  switch (IncI->getOpcode()) {
1873  case Instruction::Add:
1874  case Instruction::Sub:
1875  break;
1876  case Instruction::GetElementPtr:
1877  // An IV counter must preserve its type.
1878  if (IncI->getNumOperands() == 2)
1879  break;
1881  default:
1882  return nullptr;
1883  }
1884 
1885  PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
1886  if (Phi && Phi->getParent() == L->getHeader()) {
1887  if (isLoopInvariant(IncI->getOperand(1), L, DT))
1888  return Phi;
1889  return nullptr;
1890  }
1891  if (IncI->getOpcode() == Instruction::GetElementPtr)
1892  return nullptr;
1893 
1894  // Allow add/sub to be commuted.
1895  Phi = dyn_cast<PHINode>(IncI->getOperand(1));
1896  if (Phi && Phi->getParent() == L->getHeader()) {
1897  if (isLoopInvariant(IncI->getOperand(0), L, DT))
1898  return Phi;
1899  }
1900  return nullptr;
1901 }
1902 
1903 /// Return the compare guarding the loop latch, or NULL for unrecognized tests.
1905  assert(L->getExitingBlock() && "expected loop exit");
1906 
1907  BasicBlock *LatchBlock = L->getLoopLatch();
1908  // Don't bother with LFTR if the loop is not properly simplified.
1909  if (!LatchBlock)
1910  return nullptr;
1911 
1913  assert(BI && "expected exit branch");
1914 
1915  return dyn_cast<ICmpInst>(BI->getCondition());
1916 }
1917 
1918 /// linearFunctionTestReplace policy. Return true unless we can show that the
1919 /// current exit test is already sufficiently canonical.
1920 static bool needsLFTR(Loop *L, DominatorTree *DT) {
1921  // Do LFTR to simplify the exit condition to an ICMP.
1922  ICmpInst *Cond = getLoopTest(L);
1923  if (!Cond)
1924  return true;
1925 
1926  // Do LFTR to simplify the exit ICMP to EQ/NE
1927  ICmpInst::Predicate Pred = Cond->getPredicate();
1928  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
1929  return true;
1930 
1931  // Look for a loop invariant RHS
1932  Value *LHS = Cond->getOperand(0);
1933  Value *RHS = Cond->getOperand(1);
1934  if (!isLoopInvariant(RHS, L, DT)) {
1935  if (!isLoopInvariant(LHS, L, DT))
1936  return true;
1937  std::swap(LHS, RHS);
1938  }
1939  // Look for a simple IV counter LHS
1940  PHINode *Phi = dyn_cast<PHINode>(LHS);
1941  if (!Phi)
1942  Phi = getLoopPhiForCounter(LHS, L, DT);
1943 
1944  if (!Phi)
1945  return true;
1946 
1947  // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
1948  int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
1949  if (Idx < 0)
1950  return true;
1951 
1952  // Do LFTR if the exit condition's IV is *not* a simple counter.
1953  Value *IncV = Phi->getIncomingValue(Idx);
1954  return Phi != getLoopPhiForCounter(IncV, L, DT);
1955 }
1956 
1957 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
1958 /// down to checking that all operands are constant and listing instructions
1959 /// that may hide undef.
1961  unsigned Depth) {
1962  if (isa<Constant>(V))
1963  return !isa<UndefValue>(V);
1964 
1965  if (Depth >= 6)
1966  return false;
1967 
1968  // Conservatively handle non-constant non-instructions. For example, Arguments
1969  // may be undef.
1970  Instruction *I = dyn_cast<Instruction>(V);
1971  if (!I)
1972  return false;
1973 
1974  // Load and return values may be undef.
1975  if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
1976  return false;
1977 
1978  // Optimistically handle other instructions.
1979  for (Value *Op : I->operands()) {
1980  if (!Visited.insert(Op).second)
1981  continue;
1982  if (!hasConcreteDefImpl(Op, Visited, Depth+1))
1983  return false;
1984  }
1985  return true;
1986 }
1987 
1988 /// Return true if the given value is concrete. We must prove that undef can
1989 /// never reach it.
1990 ///
1991 /// TODO: If we decide that this is a good approach to checking for undef, we
1992 /// may factor it into a common location.
1993 static bool hasConcreteDef(Value *V) {
1994  SmallPtrSet<Value*, 8> Visited;
1995  Visited.insert(V);
1996  return hasConcreteDefImpl(V, Visited, 0);
1997 }
1998 
1999 /// Return true if this IV has any uses other than the (soon to be rewritten)
2000 /// loop exit test.
2001 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2002  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2003  Value *IncV = Phi->getIncomingValue(LatchIdx);
2004 
2005  for (User *U : Phi->users())
2006  if (U != Cond && U != IncV) return false;
2007 
2008  for (User *U : IncV->users())
2009  if (U != Cond && U != Phi) return false;
2010  return true;
2011 }
2012 
2013 /// Find an affine IV in canonical form.
2014 ///
2015 /// BECount may be an i8* pointer type. The pointer difference is already
2016 /// valid count without scaling the address stride, so it remains a pointer
2017 /// expression as far as SCEV is concerned.
2018 ///
2019 /// Currently only valid for LFTR. See the comments on hasConcreteDef below.
2020 ///
2021 /// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
2022 ///
2023 /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
2024 /// This is difficult in general for SCEV because of potential overflow. But we
2025 /// could at least handle constant BECounts.
2026 static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
2027  ScalarEvolution *SE, DominatorTree *DT) {
2028  uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2029 
2030  Value *Cond =
2031  cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
2032 
2033  // Loop over all of the PHI nodes, looking for a simple counter.
2034  PHINode *BestPhi = nullptr;
2035  const SCEV *BestInit = nullptr;
2036  BasicBlock *LatchBlock = L->getLoopLatch();
2037  assert(LatchBlock && "needsLFTR should guarantee a loop latch");
2038  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2039 
2040  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2041  PHINode *Phi = cast<PHINode>(I);
2042  if (!SE->isSCEVable(Phi->getType()))
2043  continue;
2044 
2045  // Avoid comparing an integer IV against a pointer Limit.
2046  if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2047  continue;
2048 
2049  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2050  if (!AR || AR->getLoop() != L || !AR->isAffine())
2051  continue;
2052 
2053  // AR may be a pointer type, while BECount is an integer type.
2054  // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2055  // AR may not be a narrower type, or we may never exit.
2056  uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2057  if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2058  continue;
2059 
2060  const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2061  if (!Step || !Step->isOne())
2062  continue;
2063 
2064  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2065  Value *IncV = Phi->getIncomingValue(LatchIdx);
2066  if (getLoopPhiForCounter(IncV, L, DT) != Phi)
2067  continue;
2068 
2069  // Avoid reusing a potentially undef value to compute other values that may
2070  // have originally had a concrete definition.
2071  if (!hasConcreteDef(Phi)) {
2072  // We explicitly allow unknown phis as long as they are already used by
2073  // the loop test. In this case we assume that performing LFTR could not
2074  // increase the number of undef users.
2075  if (ICmpInst *Cond = getLoopTest(L)) {
2076  if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT) &&
2077  Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
2078  continue;
2079  }
2080  }
2081  }
2082  const SCEV *Init = AR->getStart();
2083 
2084  if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2085  // Don't force a live loop counter if another IV can be used.
2086  if (AlmostDeadIV(Phi, LatchBlock, Cond))
2087  continue;
2088 
2089  // Prefer to count-from-zero. This is a more "canonical" counter form. It
2090  // also prefers integer to pointer IVs.
2091  if (BestInit->isZero() != Init->isZero()) {
2092  if (BestInit->isZero())
2093  continue;
2094  }
2095  // If two IVs both count from zero or both count from nonzero then the
2096  // narrower is likely a dead phi that has been widened. Use the wider phi
2097  // to allow the other to be eliminated.
2098  else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2099  continue;
2100  }
2101  BestPhi = Phi;
2102  BestInit = Init;
2103  }
2104  return BestPhi;
2105 }
2106 
2107 /// Help linearFunctionTestReplace by generating a value that holds the RHS of
2108 /// the new loop test.
2109 static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
2110  SCEVExpander &Rewriter, ScalarEvolution *SE) {
2111  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2112  assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
2113  const SCEV *IVInit = AR->getStart();
2114 
2115  // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
2116  // finds a valid pointer IV. Sign extend BECount in order to materialize a
2117  // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2118  // the existing GEPs whenever possible.
2119  if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) {
2120  // IVOffset will be the new GEP offset that is interpreted by GEP as a
2121  // signed value. IVCount on the other hand represents the loop trip count,
2122  // which is an unsigned value. FindLoopCounter only allows induction
2123  // variables that have a positive unit stride of one. This means we don't
2124  // have to handle the case of negative offsets (yet) and just need to zero
2125  // extend IVCount.
2126  Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2127  const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
2128 
2129  // Expand the code for the iteration count.
2130  assert(SE->isLoopInvariant(IVOffset, L) &&
2131  "Computed iteration count is not loop invariant!");
2132  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2133  Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
2134 
2135  Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
2136  assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
2137  // We could handle pointer IVs other than i8*, but we need to compensate for
2138  // gep index scaling. See canExpandBackedgeTakenCount comments.
2140  cast<PointerType>(GEPBase->getType())
2141  ->getElementType())->isOne() &&
2142  "unit stride pointer IV must be i8*");
2143 
2144  IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
2145  return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
2146  } else {
2147  // In any other case, convert both IVInit and IVCount to integers before
2148  // comparing. This may result in SCEV expansion of pointers, but in practice
2149  // SCEV will fold the pointer arithmetic away as such:
2150  // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2151  //
2152  // Valid Cases: (1) both integers is most common; (2) both may be pointers
2153  // for simple memset-style loops.
2154  //
2155  // IVInit integer and IVCount pointer would only occur if a canonical IV
2156  // were generated on top of case #2, which is not expected.
2157 
2158  const SCEV *IVLimit = nullptr;
2159  // For unit stride, IVCount = Start + BECount with 2's complement overflow.
2160  // For non-zero Start, compute IVCount here.
2161  if (AR->getStart()->isZero())
2162  IVLimit = IVCount;
2163  else {
2164  assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2165  const SCEV *IVInit = AR->getStart();
2166 
2167  // For integer IVs, truncate the IV before computing IVInit + BECount.
2168  if (SE->getTypeSizeInBits(IVInit->getType())
2169  > SE->getTypeSizeInBits(IVCount->getType()))
2170  IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
2171 
2172  IVLimit = SE->getAddExpr(IVInit, IVCount);
2173  }
2174  // Expand the code for the iteration count.
2175  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2176  IRBuilder<> Builder(BI);
2177  assert(SE->isLoopInvariant(IVLimit, L) &&
2178  "Computed iteration count is not loop invariant!");
2179  // Ensure that we generate the same type as IndVar, or a smaller integer
2180  // type. In the presence of null pointer values, we have an integer type
2181  // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2182  Type *LimitTy = IVCount->getType()->isPointerTy() ?
2183  IndVar->getType() : IVCount->getType();
2184  return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2185  }
2186 }
2187 
2188 /// This method rewrites the exit condition of the loop to be a canonical !=
2189 /// comparison against the incremented loop induction variable. This pass is
2190 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2191 /// determine a loop-invariant trip count of the loop, which is actually a much
2192 /// broader range than just linear tests.
2193 Value *IndVarSimplify::
2194 linearFunctionTestReplace(Loop *L,
2195  const SCEV *BackedgeTakenCount,
2196  PHINode *IndVar,
2197  SCEVExpander &Rewriter) {
2198  assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
2199 
2200  // Initialize CmpIndVar and IVCount to their preincremented values.
2201  Value *CmpIndVar = IndVar;
2202  const SCEV *IVCount = BackedgeTakenCount;
2203 
2204  assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2205 
2206  // If the exiting block is the same as the backedge block, we prefer to
2207  // compare against the post-incremented value, otherwise we must compare
2208  // against the preincremented value.
2209  if (L->getExitingBlock() == L->getLoopLatch()) {
2210  // Add one to the "backedge-taken" count to get the trip count.
2211  // This addition may overflow, which is valid as long as the comparison is
2212  // truncated to BackedgeTakenCount->getType().
2213  IVCount = SE->getAddExpr(BackedgeTakenCount,
2214  SE->getOne(BackedgeTakenCount->getType()));
2215  // The BackedgeTaken expression contains the number of times that the
2216  // backedge branches to the loop header. This is one less than the
2217  // number of times the loop executes, so use the incremented indvar.
2218  CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
2219  }
2220 
2221  Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
2222  assert(ExitCnt->getType()->isPointerTy() ==
2223  IndVar->getType()->isPointerTy() &&
2224  "genLoopLimit missed a cast");
2225 
2226  // Insert a new icmp_ne or icmp_eq instruction before the branch.
2227  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2229  if (L->contains(BI->getSuccessor(0)))
2230  P = ICmpInst::ICMP_NE;
2231  else
2232  P = ICmpInst::ICMP_EQ;
2233 
2234  DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2235  << " LHS:" << *CmpIndVar << '\n'
2236  << " op:\t"
2237  << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
2238  << " RHS:\t" << *ExitCnt << "\n"
2239  << " IVCount:\t" << *IVCount << "\n");
2240 
2241  IRBuilder<> Builder(BI);
2242 
2243  // The new loop exit condition should reuse the debug location of the
2244  // original loop exit condition.
2245  if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2246  Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2247 
2248  // LFTR can ignore IV overflow and truncate to the width of
2249  // BECount. This avoids materializing the add(zext(add)) expression.
2250  unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2251  unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2252  if (CmpIndVarSize > ExitCntSize) {
2253  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2254  const SCEV *ARStart = AR->getStart();
2255  const SCEV *ARStep = AR->getStepRecurrence(*SE);
2256  // For constant IVCount, avoid truncation.
2257  if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
2258  const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt();
2259  APInt Count = cast<SCEVConstant>(IVCount)->getAPInt();
2260  // Note that the post-inc value of BackedgeTakenCount may have overflowed
2261  // above such that IVCount is now zero.
2262  if (IVCount != BackedgeTakenCount && Count == 0) {
2263  Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
2264  ++Count;
2265  }
2266  else
2267  Count = Count.zext(CmpIndVarSize);
2268  APInt NewLimit;
2269  if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
2270  NewLimit = Start - Count;
2271  else
2272  NewLimit = Start + Count;
2273  ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
2274 
2275  DEBUG(dbgs() << " Widen RHS:\t" << *ExitCnt << "\n");
2276  } else {
2277  // We try to extend trip count first. If that doesn't work we truncate IV.
2278  // Zext(trunc(IV)) == IV implies equivalence of the following two:
2279  // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If
2280  // one of the two holds, extend the trip count, otherwise we truncate IV.
2281  bool Extended = false;
2282  const SCEV *IV = SE->getSCEV(CmpIndVar);
2283  const SCEV *ZExtTrunc =
2284  SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2285  ExitCnt->getType()),
2286  CmpIndVar->getType());
2287 
2288  if (ZExtTrunc == IV) {
2289  Extended = true;
2290  ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2291  "wide.trip.count");
2292  } else {
2293  const SCEV *SExtTrunc =
2294  SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2295  ExitCnt->getType()),
2296  CmpIndVar->getType());
2297  if (SExtTrunc == IV) {
2298  Extended = true;
2299  ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2300  "wide.trip.count");
2301  }
2302  }
2303 
2304  if (!Extended)
2305  CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2306  "lftr.wideiv");
2307  }
2308  }
2309  Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2310  Value *OrigCond = BI->getCondition();
2311  // It's tempting to use replaceAllUsesWith here to fully replace the old
2312  // comparison, but that's not immediately safe, since users of the old
2313  // comparison may not be dominated by the new comparison. Instead, just
2314  // update the branch to use the new comparison; in the common case this
2315  // will make old comparison dead.
2316  BI->setCondition(Cond);
2317  DeadInsts.push_back(OrigCond);
2318 
2319  ++NumLFTR;
2320  Changed = true;
2321  return Cond;
2322 }
2323 
2324 //===----------------------------------------------------------------------===//
2325 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2326 //===----------------------------------------------------------------------===//
2327 
2328 /// If there's a single exit block, sink any loop-invariant values that
2329 /// were defined in the preheader but not used inside the loop into the
2330 /// exit block to reduce register pressure in the loop.
2331 void IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2332  BasicBlock *ExitBlock = L->getExitBlock();
2333  if (!ExitBlock) return;
2334 
2335  BasicBlock *Preheader = L->getLoopPreheader();
2336  if (!Preheader) return;
2337 
2338  BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2339  BasicBlock::iterator I(Preheader->getTerminator());
2340  while (I != Preheader->begin()) {
2341  --I;
2342  // New instructions were inserted at the end of the preheader.
2343  if (isa<PHINode>(I))
2344  break;
2345 
2346  // Don't move instructions which might have side effects, since the side
2347  // effects need to complete before instructions inside the loop. Also don't
2348  // move instructions which might read memory, since the loop may modify
2349  // memory. Note that it's okay if the instruction might have undefined
2350  // behavior: LoopSimplify guarantees that the preheader dominates the exit
2351  // block.
2352  if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2353  continue;
2354 
2355  // Skip debug info intrinsics.
2356  if (isa<DbgInfoIntrinsic>(I))
2357  continue;
2358 
2359  // Skip eh pad instructions.
2360  if (I->isEHPad())
2361  continue;
2362 
2363  // Don't sink alloca: we never want to sink static alloca's out of the
2364  // entry block, and correctly sinking dynamic alloca's requires
2365  // checks for stacksave/stackrestore intrinsics.
2366  // FIXME: Refactor this check somehow?
2367  if (isa<AllocaInst>(I))
2368  continue;
2369 
2370  // Determine if there is a use in or before the loop (direct or
2371  // otherwise).
2372  bool UsedInLoop = false;
2373  for (Use &U : I->uses()) {
2374  Instruction *User = cast<Instruction>(U.getUser());
2375  BasicBlock *UseBB = User->getParent();
2376  if (PHINode *P = dyn_cast<PHINode>(User)) {
2377  unsigned i =
2378  PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2379  UseBB = P->getIncomingBlock(i);
2380  }
2381  if (UseBB == Preheader || L->contains(UseBB)) {
2382  UsedInLoop = true;
2383  break;
2384  }
2385  }
2386 
2387  // If there is, the def must remain in the preheader.
2388  if (UsedInLoop)
2389  continue;
2390 
2391  // Otherwise, sink it to the exit block.
2392  Instruction *ToMove = &*I;
2393  bool Done = false;
2394 
2395  if (I != Preheader->begin()) {
2396  // Skip debug info intrinsics.
2397  do {
2398  --I;
2399  } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2400 
2401  if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2402  Done = true;
2403  } else {
2404  Done = true;
2405  }
2406 
2407  ToMove->moveBefore(*ExitBlock, InsertPt);
2408  if (Done) break;
2409  InsertPt = ToMove->getIterator();
2410  }
2411 }
2412 
2413 //===----------------------------------------------------------------------===//
2414 // IndVarSimplify driver. Manage several subpasses of IV simplification.
2415 //===----------------------------------------------------------------------===//
2416 
2417 bool IndVarSimplify::run(Loop *L) {
2418  // We need (and expect!) the incoming loop to be in LCSSA.
2419  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2420  "LCSSA required to run indvars!");
2421 
2422  // If LoopSimplify form is not available, stay out of trouble. Some notes:
2423  // - LSR currently only supports LoopSimplify-form loops. Indvars'
2424  // canonicalization can be a pessimization without LSR to "clean up"
2425  // afterwards.
2426  // - We depend on having a preheader; in particular,
2427  // Loop::getCanonicalInductionVariable only supports loops with preheaders,
2428  // and we're in trouble if we can't find the induction variable even when
2429  // we've manually inserted one.
2430  // - LFTR relies on having a single backedge.
2431  if (!L->isLoopSimplifyForm())
2432  return false;
2433 
2434  // If there are any floating-point recurrences, attempt to
2435  // transform them to use integer recurrences.
2436  rewriteNonIntegerIVs(L);
2437 
2438  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2439 
2440  // Create a rewriter object which we'll use to transform the code with.
2441  SCEVExpander Rewriter(*SE, DL, "indvars");
2442 #ifndef NDEBUG
2443  Rewriter.setDebugType(DEBUG_TYPE);
2444 #endif
2445 
2446  // Eliminate redundant IV users.
2447  //
2448  // Simplification works best when run before other consumers of SCEV. We
2449  // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2450  // other expressions involving loop IVs have been evaluated. This helps SCEV
2451  // set no-wrap flags before normalizing sign/zero extension.
2452  Rewriter.disableCanonicalMode();
2453  simplifyAndExtend(L, Rewriter, LI);
2454 
2455  // Check to see if this loop has a computable loop-invariant execution count.
2456  // If so, this means that we can compute the final value of any expressions
2457  // that are recurrent in the loop, and substitute the exit values from the
2458  // loop into any instructions outside of the loop that use the final values of
2459  // the current expressions.
2460  //
2461  if (ReplaceExitValue != NeverRepl &&
2462  !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2463  rewriteLoopExitValues(L, Rewriter);
2464 
2465  // Eliminate redundant IV cycles.
2466  NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2467 
2468  // If we have a trip count expression, rewrite the loop's exit condition
2469  // using it. We can currently only handle loops with a single exit.
2470  if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter) &&
2471  needsLFTR(L, DT)) {
2472  PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
2473  if (IndVar) {
2474  // Check preconditions for proper SCEVExpander operation. SCEV does not
2475  // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
2476  // pass that uses the SCEVExpander must do it. This does not work well for
2477  // loop passes because SCEVExpander makes assumptions about all loops,
2478  // while LoopPassManager only forces the current loop to be simplified.
2479  //
2480  // FIXME: SCEV expansion has no way to bail out, so the caller must
2481  // explicitly check any assumptions made by SCEV. Brittle.
2482  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
2483  if (!AR || AR->getLoop()->getLoopPreheader())
2484  (void)linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
2485  Rewriter);
2486  }
2487  }
2488  // Clear the rewriter cache, because values that are in the rewriter's cache
2489  // can be deleted in the loop below, causing the AssertingVH in the cache to
2490  // trigger.
2491  Rewriter.clear();
2492 
2493  // Now that we're done iterating through lists, clean up any instructions
2494  // which are now dead.
2495  while (!DeadInsts.empty())
2496  if (Instruction *Inst =
2497  dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2499 
2500  // The Rewriter may not be used from this point on.
2501 
2502  // Loop-invariant instructions in the preheader that aren't used in the
2503  // loop may be sunk below the loop to reduce register pressure.
2504  sinkUnusedInvariants(L);
2505 
2506  // rewriteFirstIterationLoopExitValues does not rely on the computation of
2507  // trip count and therefore can further simplify exit values in addition to
2508  // rewriteLoopExitValues.
2509  rewriteFirstIterationLoopExitValues(L);
2510 
2511  // Clean up dead instructions.
2512  Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2513 
2514  // Check a post-condition.
2515  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2516  "Indvars did not preserve LCSSA!");
2517 
2518  // Verify that LFTR, and any other change have not interfered with SCEV's
2519  // ability to compute trip count.
2520 #ifndef NDEBUG
2521  if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2522  SE->forgetLoop(L);
2523  const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2524  if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2525  SE->getTypeSizeInBits(NewBECount->getType()))
2526  NewBECount = SE->getTruncateOrNoop(NewBECount,
2527  BackedgeTakenCount->getType());
2528  else
2529  BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2530  NewBECount->getType());
2531  assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
2532  }
2533 #endif
2534 
2535  return Changed;
2536 }
2537 
2540  LPMUpdater &) {
2541  Function *F = L.getHeader()->getParent();
2542  const DataLayout &DL = F->getParent()->getDataLayout();
2543 
2544  IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
2545  if (!IVS.run(&L))
2546  return PreservedAnalyses::all();
2547 
2548  auto PA = getLoopPassPreservedAnalyses();
2549  PA.preserveSet<CFGAnalyses>();
2550  return PA;
2551 }
2552 
2553 namespace {
2554 
2555 struct IndVarSimplifyLegacyPass : public LoopPass {
2556  static char ID; // Pass identification, replacement for typeid
2557 
2558  IndVarSimplifyLegacyPass() : LoopPass(ID) {
2560  }
2561 
2562  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2563  if (skipLoop(L))
2564  return false;
2565 
2566  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2567  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2568  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2569  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2570  auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
2571  auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2572  auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2573  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2574 
2575  IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
2576  return IVS.run(L);
2577  }
2578 
2579  void getAnalysisUsage(AnalysisUsage &AU) const override {
2580  AU.setPreservesCFG();
2582  }
2583 };
2584 
2585 } // end anonymous namespace
2586 
2588 
2589 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2590  "Induction Variable Simplification", false, false)
2592 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2593  "Induction Variable Simplification", false, false)
2594 
2596  return new IndVarSimplifyLegacyPass();
2597 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
bool hoistIVInc(Instruction *IncV, Instruction *InsertPos)
Utility for hoisting an IV increment.
Induction Variable Simplification
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1677
iterator_range< use_iterator > uses()
Definition: Value.h:360
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:157
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Value * getExactExistingExpansion(const SCEV *S, const Instruction *At, Loop *L)
Try to find existing LLVM IR value for S available at the point At.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT)
Perform a quick domtree based check for loop invariance assuming that V is used within the loop...
MutableArrayRef< T > makeMutableArrayRef(T &OneElt)
Construct a MutableArrayRef from a single element.
Definition: ArrayRef.h:503
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
bool isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:182
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:175
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:866
bool isZero() const
Return true if the expression is a constant zero.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:106
bool isHighCostExpansion(const SCEV *Expr, Loop *L, const Instruction *At=nullptr)
Return true for expressions that may incur non-trivial cost to evaluate at runtime.
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:859
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:738
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:869
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
void setDebugType(const char *s)
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
BasicBlock * getSuccessor(unsigned i) const
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1429
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
STATISTIC(NumFunctions, "Total number of functions")
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
Hexagon Common GEP
Value * getCondition() const
static ConstantRange makeAllowedICmpRegion(CmpInst::Predicate Pred, const ConstantRange &Other)
Produce the smallest range such that all values that may satisfy the given predicate with any value c...
This defines the Use class.
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
ReplaceExitVal
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:62
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", "Induction Variable Simplification", false, false) INITIALIZE_PASS_END(IndVarSimplifyLegacyPass
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1069
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:864
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
static Value * genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Help linearFunctionTestReplace by generating a value that holds the RHS of the new loop test...
Interface for visiting interesting IV users that are recognized but not simplified by this utility...
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr it the function does no...
Definition: BasicBlock.cpp:116
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
bool isSigned() const
Determine if this instruction is using a signed comparison.
Definition: InstrTypes.h:1022
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:591
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:951
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:678
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:362
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:560
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:707
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:860
This file implements a class to represent arbitrary precision integral constant values and operations...
BlockT * getHeader() const
Definition: LoopInfo.h:100
Key
PAL metadata keys.
void findDbgValues(SmallVectorImpl< DbgValueInst *> &DbgValues, Value *V)
Finds the llvm.dbg.value intrinsics describing a value.
Definition: Local.cpp:1405
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:813
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:182
This node represents a polynomial recurrence on the trip count of the specified loop.
This instruction compares its operands according to the predicate given to the constructor.
static Instruction * getInsertPointForUses(Instruction *User, Value *Def, DominatorTree *DT, LoopInfo *LI)
Determine the insertion point for this user.
static cl::opt< bool > VerifyIndvars("verify-indvars", cl::Hidden, cl::desc("Verify the ScalarEvolution result after running indvars"))
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Examine each PHI in the given block and delete it if it is dead.
const SCEV * getSizeOfExpr(Type *IntTy, Type *AllocTy)
Return an expression for sizeof AllocTy that is type IntTy.
ConstantRange intersectWith(const ConstantRange &CR) const
Return the range that results from the intersection of this range with another range.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:152
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:439
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1426
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:301
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:142
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:128
Value * getOperand(unsigned i) const
Definition: User.h:154
void getUniqueExitBlocks(SmallVectorImpl< BasicBlock *> &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfo.cpp:393
void initializeIndVarSimplifyLegacyPassPass(PassRegistry &)
void clearInsertPoint()
Clear the current insertion point.
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:106
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt...
Definition: PatternMatch.h:249
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:200
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:282
static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE, SCEVExpander &Rewriter)
Return true if this loop&#39;s backedge taken count expression can be safely and cheaply expanded into an...
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Conditional or Unconditional Branch instruction.
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value *> &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:626
Value * getIncomingValueForBlock(const BasicBlock *BB) const
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
const Instruction & front() const
Definition: BasicBlock.h:264
#define H(x, y, z)
Definition: MD5.cpp:57
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:536
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:419
Represent the analysis usage information of a pass.
This file declares a class to represent arbitrary precision floating point values and provide a varie...
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:821
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:853
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl...
Definition: Operator.h:67
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
op_range operands()
Definition: User.h:222
self_iterator getIterator()
Definition: ilist_node.h:82
Class to represent integer types.
Definition: DerivedTypes.h:40
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:76
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:35
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1356
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:423
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
static unsigned getIncomingValueNumForOperand(unsigned i)
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static ICmpInst * getLoopTest(Loop *L)
Return the compare guarding the loop latch, or NULL for unrecognized tests.
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:868
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1423
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:56
signed greater than
Definition: InstrTypes.h:880
#define DEBUG_TYPE
const APFloat & getValueAPF() const
Definition: Constants.h:299
Value * CreateGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1266
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:857
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
unsigned getSCEVType() const
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:176
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
Type * getType() const
Return the LLVM type of this SCEV expression.
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:301
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty)
static ValueAsMetadata * get(Value *V)
Definition: Metadata.cpp:349
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:867
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:243
Module.h This file contains the declarations for the Module class.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:64
Provides information about what library functions are available for the current target.
This class represents a range of values.
Definition: ConstantRange.h:47
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
Definition: DataLayout.h:241
signed less than
Definition: InstrTypes.h:882
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:383
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:585
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT)
Return the loop header phi IFF IncV adds a loop invariant value to the phi.
bool isConditional() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:285
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:238
static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI)
This IV user cannot be widen.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:674
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
signed less or equal
Definition: InstrTypes.h:883
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class for arbitrary precision integers.
Definition: APInt.h:69
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:405
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property...
Definition: Operator.h:96
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
This class uses information about analyze scalars to rewrite expressions in canonical form...
Represents analyses that only rely on functions&#39; control flow.
Definition: PassManager.h:114
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:523
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:601
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
Virtual Register Rewriter
Definition: VirtRegMap.cpp:221
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:927
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:191
void emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:654
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:149
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
user_iterator_impl< User > user_iterator
Definition: Value.h:374
bool mayReadFromMemory() const
Return true if this instruction may read memory.
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:1221
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:861
static bool needsLFTR(Loop *L, DominatorTree *DT)
linearFunctionTestReplace policy.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:308
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>()) const
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:865
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetTransformInfo *TTI=nullptr)
replace congruent phis with their most canonical representative.
const SCEV * getBackedgeTakenCount(const Loop *L)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
This class represents a cast from signed integer to floating point.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:381
bool isOne() const
Return true if the expression is a constant one.
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:856
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:346
LLVM Value Representation.
Definition: Value.h:73
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:866
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond)
Return true if this IV has any uses other than the (soon to be rewritten) loop exit test...
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:86
#define DEBUG(X)
Definition: Debug.h:118
void disableCanonicalMode()
Disable the behavior of expanding expressions in canonical form rather than in a more literal form...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:547
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:418
A container for analyses that lazily runs them and caches their results.
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
This pass exposes codegen information to IR-level passes.
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
This header defines various interfaces for pass management in LLVM.
void setIncomingValue(unsigned i, Value *V)
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
static cl::opt< bool > DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization"))
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:858
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:50
bool use_empty() const
Definition: Value.h:328
Pass * createIndVarSimplifyPass()
signed greater or equal
Definition: InstrTypes.h:881
IntegerType * Int32Ty
const BasicBlock * getParent() const
Definition: Instruction.h:67
This class represents a constant integer value.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:874
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property...
Definition: Operator.h:90
user_iterator user_end()
Definition: Value.h:389
static PHINode * FindLoopCounter(Loop *L, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Find an affine IV in canonical form.