LLVM  6.0.0svn
IndVarSimplify.cpp
Go to the documentation of this file.
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This transformation analyzes and transforms the induction variables (and
11 // computations derived from them) into simpler forms suitable for subsequent
12 // analysis and transformation.
13 //
14 // If the trip count of a loop is computable, this pass also makes the following
15 // changes:
16 // 1. The exit condition for the loop is canonicalized to compare the
17 // induction value against the exit value. This turns loops like:
18 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
19 // 2. Any use outside of the loop of an expression derived from the indvar
20 // is changed to compute the derived value outside of the loop, eliminating
21 // the dependence on the exit value of the induction variable. If the only
22 // purpose of the loop is to compute the exit value of some derived
23 // expression, this transformation will make the loop dead.
24 //
25 //===----------------------------------------------------------------------===//
26 
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/DenseMap.h"
32 #include "llvm/ADT/None.h"
33 #include "llvm/ADT/Optional.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
46 #include "llvm/IR/BasicBlock.h"
47 #include "llvm/IR/Constant.h"
48 #include "llvm/IR/ConstantRange.h"
49 #include "llvm/IR/Constants.h"
50 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Dominators.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/IRBuilder.h"
56 #include "llvm/IR/InstrTypes.h"
57 #include "llvm/IR/Instruction.h"
58 #include "llvm/IR/Instructions.h"
59 #include "llvm/IR/IntrinsicInst.h"
60 #include "llvm/IR/Intrinsics.h"
61 #include "llvm/IR/Module.h"
62 #include "llvm/IR/Operator.h"
63 #include "llvm/IR/PassManager.h"
64 #include "llvm/IR/PatternMatch.h"
65 #include "llvm/IR/Type.h"
66 #include "llvm/IR/Use.h"
67 #include "llvm/IR/User.h"
68 #include "llvm/IR/Value.h"
69 #include "llvm/IR/ValueHandle.h"
70 #include "llvm/Pass.h"
71 #include "llvm/Support/Casting.h"
73 #include "llvm/Support/Compiler.h"
74 #include "llvm/Support/Debug.h"
78 #include "llvm/Transforms/Scalar.h"
84 #include <cassert>
85 #include <cstdint>
86 #include <utility>
87 
88 using namespace llvm;
89 
90 #define DEBUG_TYPE "indvars"
91 
92 STATISTIC(NumWidened , "Number of indvars widened");
93 STATISTIC(NumReplaced , "Number of exit values replaced");
94 STATISTIC(NumLFTR , "Number of loop exit tests replaced");
95 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
96 STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
97 
98 // Trip count verification can be enabled by default under NDEBUG if we
99 // implement a strong expression equivalence checker in SCEV. Until then, we
100 // use the verify-indvars flag, which may assert in some cases.
102  "verify-indvars", cl::Hidden,
103  cl::desc("Verify the ScalarEvolution result after running indvars"));
104 
106 
108  "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
109  cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
110  cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
111  clEnumValN(OnlyCheapRepl, "cheap",
112  "only replace exit value when the cost is cheap"),
113  clEnumValN(AlwaysRepl, "always",
114  "always replace exit value whenever possible")));
115 
117  "indvars-post-increment-ranges", cl::Hidden,
118  cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
119  cl::init(true));
120 
121 static cl::opt<bool>
122 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
123  cl::desc("Disable Linear Function Test Replace optimization"));
124 
125 namespace {
126 
127 struct RewritePhi;
128 
129 class IndVarSimplify {
130  LoopInfo *LI;
131  ScalarEvolution *SE;
132  DominatorTree *DT;
133  const DataLayout &DL;
134  TargetLibraryInfo *TLI;
135  const TargetTransformInfo *TTI;
136 
138  bool Changed = false;
139 
140  bool isValidRewrite(Value *FromVal, Value *ToVal);
141 
142  void handleFloatingPointIV(Loop *L, PHINode *PH);
143  void rewriteNonIntegerIVs(Loop *L);
144 
145  void simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
146 
147  bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
148  void rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
149  void rewriteFirstIterationLoopExitValues(Loop *L);
150 
151  Value *linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
152  PHINode *IndVar, SCEVExpander &Rewriter);
153 
154  void sinkUnusedInvariants(Loop *L);
155 
156  Value *expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
157  Instruction *InsertPt, Type *Ty);
158 
159 public:
160  IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
161  const DataLayout &DL, TargetLibraryInfo *TLI,
162  TargetTransformInfo *TTI)
163  : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
164 
165  bool run(Loop *L);
166 };
167 
168 } // end anonymous namespace
169 
170 /// Return true if the SCEV expansion generated by the rewriter can replace the
171 /// original value. SCEV guarantees that it produces the same value, but the way
172 /// it is produced may be illegal IR. Ideally, this function will only be
173 /// called for verification.
174 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
175  // If an SCEV expression subsumed multiple pointers, its expansion could
176  // reassociate the GEP changing the base pointer. This is illegal because the
177  // final address produced by a GEP chain must be inbounds relative to its
178  // underlying object. Otherwise basic alias analysis, among other things,
179  // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
180  // producing an expression involving multiple pointers. Until then, we must
181  // bail out here.
182  //
183  // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
184  // because it understands lcssa phis while SCEV does not.
185  Value *FromPtr = FromVal;
186  Value *ToPtr = ToVal;
187  if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
188  FromPtr = GEP->getPointerOperand();
189  }
190  if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
191  ToPtr = GEP->getPointerOperand();
192  }
193  if (FromPtr != FromVal || ToPtr != ToVal) {
194  // Quickly check the common case
195  if (FromPtr == ToPtr)
196  return true;
197 
198  // SCEV may have rewritten an expression that produces the GEP's pointer
199  // operand. That's ok as long as the pointer operand has the same base
200  // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
201  // base of a recurrence. This handles the case in which SCEV expansion
202  // converts a pointer type recurrence into a nonrecurrent pointer base
203  // indexed by an integer recurrence.
204 
205  // If the GEP base pointer is a vector of pointers, abort.
206  if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
207  return false;
208 
209  const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
210  const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
211  if (FromBase == ToBase)
212  return true;
213 
214  DEBUG(dbgs() << "INDVARS: GEP rewrite bail out "
215  << *FromBase << " != " << *ToBase << "\n");
216 
217  return false;
218  }
219  return true;
220 }
221 
222 /// Determine the insertion point for this user. By default, insert immediately
223 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
224 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
225 /// common dominator for the incoming blocks.
227  DominatorTree *DT, LoopInfo *LI) {
228  PHINode *PHI = dyn_cast<PHINode>(User);
229  if (!PHI)
230  return User;
231 
232  Instruction *InsertPt = nullptr;
233  for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
234  if (PHI->getIncomingValue(i) != Def)
235  continue;
236 
237  BasicBlock *InsertBB = PHI->getIncomingBlock(i);
238  if (!InsertPt) {
239  InsertPt = InsertBB->getTerminator();
240  continue;
241  }
242  InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
243  InsertPt = InsertBB->getTerminator();
244  }
245  assert(InsertPt && "Missing phi operand");
246 
247  auto *DefI = dyn_cast<Instruction>(Def);
248  if (!DefI)
249  return InsertPt;
250 
251  assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
252 
253  auto *L = LI->getLoopFor(DefI->getParent());
254  assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
255 
256  for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
257  if (LI->getLoopFor(DTN->getBlock()) == L)
258  return DTN->getBlock()->getTerminator();
259 
260  llvm_unreachable("DefI dominates InsertPt!");
261 }
262 
263 //===----------------------------------------------------------------------===//
264 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
265 //===----------------------------------------------------------------------===//
266 
267 /// Convert APF to an integer, if possible.
268 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
269  bool isExact = false;
270  // See if we can convert this to an int64_t
271  uint64_t UIntVal;
272  if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
273  APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
274  !isExact)
275  return false;
276  IntVal = UIntVal;
277  return true;
278 }
279 
280 /// If the loop has floating induction variable then insert corresponding
281 /// integer induction variable if possible.
282 /// For example,
283 /// for(double i = 0; i < 10000; ++i)
284 /// bar(i)
285 /// is converted into
286 /// for(int i = 0; i < 10000; ++i)
287 /// bar((double)i);
288 void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
289  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
290  unsigned BackEdge = IncomingEdge^1;
291 
292  // Check incoming value.
293  auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
294 
295  int64_t InitValue;
296  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
297  return;
298 
299  // Check IV increment. Reject this PN if increment operation is not
300  // an add or increment value can not be represented by an integer.
301  auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
302  if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return;
303 
304  // If this is not an add of the PHI with a constantfp, or if the constant fp
305  // is not an integer, bail out.
306  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
307  int64_t IncValue;
308  if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
309  !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
310  return;
311 
312  // Check Incr uses. One user is PN and the other user is an exit condition
313  // used by the conditional terminator.
314  Value::user_iterator IncrUse = Incr->user_begin();
315  Instruction *U1 = cast<Instruction>(*IncrUse++);
316  if (IncrUse == Incr->user_end()) return;
317  Instruction *U2 = cast<Instruction>(*IncrUse++);
318  if (IncrUse != Incr->user_end()) return;
319 
320  // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
321  // only used by a branch, we can't transform it.
323  if (!Compare)
324  Compare = dyn_cast<FCmpInst>(U2);
325  if (!Compare || !Compare->hasOneUse() ||
326  !isa<BranchInst>(Compare->user_back()))
327  return;
328 
329  BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
330 
331  // We need to verify that the branch actually controls the iteration count
332  // of the loop. If not, the new IV can overflow and no one will notice.
333  // The branch block must be in the loop and one of the successors must be out
334  // of the loop.
335  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
336  if (!L->contains(TheBr->getParent()) ||
337  (L->contains(TheBr->getSuccessor(0)) &&
338  L->contains(TheBr->getSuccessor(1))))
339  return;
340 
341  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
342  // transform it.
343  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
344  int64_t ExitValue;
345  if (ExitValueVal == nullptr ||
346  !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
347  return;
348 
349  // Find new predicate for integer comparison.
351  switch (Compare->getPredicate()) {
352  default: return; // Unknown comparison.
353  case CmpInst::FCMP_OEQ:
354  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
355  case CmpInst::FCMP_ONE:
356  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
357  case CmpInst::FCMP_OGT:
358  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
359  case CmpInst::FCMP_OGE:
360  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
361  case CmpInst::FCMP_OLT:
362  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
363  case CmpInst::FCMP_OLE:
364  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
365  }
366 
367  // We convert the floating point induction variable to a signed i32 value if
368  // we can. This is only safe if the comparison will not overflow in a way
369  // that won't be trapped by the integer equivalent operations. Check for this
370  // now.
371  // TODO: We could use i64 if it is native and the range requires it.
372 
373  // The start/stride/exit values must all fit in signed i32.
374  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
375  return;
376 
377  // If not actually striding (add x, 0.0), avoid touching the code.
378  if (IncValue == 0)
379  return;
380 
381  // Positive and negative strides have different safety conditions.
382  if (IncValue > 0) {
383  // If we have a positive stride, we require the init to be less than the
384  // exit value.
385  if (InitValue >= ExitValue)
386  return;
387 
388  uint32_t Range = uint32_t(ExitValue-InitValue);
389  // Check for infinite loop, either:
390  // while (i <= Exit) or until (i > Exit)
391  if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
392  if (++Range == 0) return; // Range overflows.
393  }
394 
395  unsigned Leftover = Range % uint32_t(IncValue);
396 
397  // If this is an equality comparison, we require that the strided value
398  // exactly land on the exit value, otherwise the IV condition will wrap
399  // around and do things the fp IV wouldn't.
400  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
401  Leftover != 0)
402  return;
403 
404  // If the stride would wrap around the i32 before exiting, we can't
405  // transform the IV.
406  if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
407  return;
408  } else {
409  // If we have a negative stride, we require the init to be greater than the
410  // exit value.
411  if (InitValue <= ExitValue)
412  return;
413 
414  uint32_t Range = uint32_t(InitValue-ExitValue);
415  // Check for infinite loop, either:
416  // while (i >= Exit) or until (i < Exit)
417  if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
418  if (++Range == 0) return; // Range overflows.
419  }
420 
421  unsigned Leftover = Range % uint32_t(-IncValue);
422 
423  // If this is an equality comparison, we require that the strided value
424  // exactly land on the exit value, otherwise the IV condition will wrap
425  // around and do things the fp IV wouldn't.
426  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
427  Leftover != 0)
428  return;
429 
430  // If the stride would wrap around the i32 before exiting, we can't
431  // transform the IV.
432  if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
433  return;
434  }
435 
437 
438  // Insert new integer induction variable.
439  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
440  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
441  PN->getIncomingBlock(IncomingEdge));
442 
443  Value *NewAdd =
444  BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
445  Incr->getName()+".int", Incr);
446  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
447 
448  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
449  ConstantInt::get(Int32Ty, ExitValue),
450  Compare->getName());
451 
452  // In the following deletions, PN may become dead and may be deleted.
453  // Use a WeakTrackingVH to observe whether this happens.
454  WeakTrackingVH WeakPH = PN;
455 
456  // Delete the old floating point exit comparison. The branch starts using the
457  // new comparison.
458  NewCompare->takeName(Compare);
459  Compare->replaceAllUsesWith(NewCompare);
461 
462  // Delete the old floating point increment.
463  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
465 
466  // If the FP induction variable still has uses, this is because something else
467  // in the loop uses its value. In order to canonicalize the induction
468  // variable, we chose to eliminate the IV and rewrite it in terms of an
469  // int->fp cast.
470  //
471  // We give preference to sitofp over uitofp because it is faster on most
472  // platforms.
473  if (WeakPH) {
474  Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
475  &*PN->getParent()->getFirstInsertionPt());
476  PN->replaceAllUsesWith(Conv);
478  }
479  Changed = true;
480 }
481 
482 void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
483  // First step. Check to see if there are any floating-point recurrences.
484  // If there are, change them into integer recurrences, permitting analysis by
485  // the SCEV routines.
486  BasicBlock *Header = L->getHeader();
487 
489  for (BasicBlock::iterator I = Header->begin();
490  PHINode *PN = dyn_cast<PHINode>(I); ++I)
491  PHIs.push_back(PN);
492 
493  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
494  if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
495  handleFloatingPointIV(L, PN);
496 
497  // If the loop previously had floating-point IV, ScalarEvolution
498  // may not have been able to compute a trip count. Now that we've done some
499  // re-writing, the trip count may be computable.
500  if (Changed)
501  SE->forgetLoop(L);
502 }
503 
504 namespace {
505 
506 // Collect information about PHI nodes which can be transformed in
507 // rewriteLoopExitValues.
508 struct RewritePhi {
509  PHINode *PN;
510 
511  // Ith incoming value.
512  unsigned Ith;
513 
514  // Exit value after expansion.
515  Value *Val;
516 
517  // High Cost when expansion.
518  bool HighCost;
519 
520  RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
521  : PN(P), Ith(I), Val(V), HighCost(H) {}
522 };
523 
524 } // end anonymous namespace
525 
526 Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
527  Loop *L, Instruction *InsertPt,
528  Type *ResultTy) {
529  // Before expanding S into an expensive LLVM expression, see if we can use an
530  // already existing value as the expansion for S.
531  if (Value *ExistingValue = Rewriter.getExactExistingExpansion(S, InsertPt, L))
532  if (ExistingValue->getType() == ResultTy)
533  return ExistingValue;
534 
535  // We didn't find anything, fall back to using SCEVExpander.
536  return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
537 }
538 
539 //===----------------------------------------------------------------------===//
540 // rewriteLoopExitValues - Optimize IV users outside the loop.
541 // As a side effect, reduces the amount of IV processing within the loop.
542 //===----------------------------------------------------------------------===//
543 
544 /// Check to see if this loop has a computable loop-invariant execution count.
545 /// If so, this means that we can compute the final value of any expressions
546 /// that are recurrent in the loop, and substitute the exit values from the loop
547 /// into any instructions outside of the loop that use the final values of the
548 /// current expressions.
549 ///
550 /// This is mostly redundant with the regular IndVarSimplify activities that
551 /// happen later, except that it's more powerful in some cases, because it's
552 /// able to brute-force evaluate arbitrary instructions as long as they have
553 /// constant operands at the beginning of the loop.
554 void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
555  // Check a pre-condition.
556  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
557  "Indvars did not preserve LCSSA!");
558 
559  SmallVector<BasicBlock*, 8> ExitBlocks;
560  L->getUniqueExitBlocks(ExitBlocks);
561 
562  SmallVector<RewritePhi, 8> RewritePhiSet;
563  // Find all values that are computed inside the loop, but used outside of it.
564  // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
565  // the exit blocks of the loop to find them.
566  for (BasicBlock *ExitBB : ExitBlocks) {
567  // If there are no PHI nodes in this exit block, then no values defined
568  // inside the loop are used on this path, skip it.
569  PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
570  if (!PN) continue;
571 
572  unsigned NumPreds = PN->getNumIncomingValues();
573 
574  // Iterate over all of the PHI nodes.
575  BasicBlock::iterator BBI = ExitBB->begin();
576  while ((PN = dyn_cast<PHINode>(BBI++))) {
577  if (PN->use_empty())
578  continue; // dead use, don't replace it
579 
580  if (!SE->isSCEVable(PN->getType()))
581  continue;
582 
583  // It's necessary to tell ScalarEvolution about this explicitly so that
584  // it can walk the def-use list and forget all SCEVs, as it may not be
585  // watching the PHI itself. Once the new exit value is in place, there
586  // may not be a def-use connection between the loop and every instruction
587  // which got a SCEVAddRecExpr for that loop.
588  SE->forgetValue(PN);
589 
590  // Iterate over all of the values in all the PHI nodes.
591  for (unsigned i = 0; i != NumPreds; ++i) {
592  // If the value being merged in is not integer or is not defined
593  // in the loop, skip it.
594  Value *InVal = PN->getIncomingValue(i);
595  if (!isa<Instruction>(InVal))
596  continue;
597 
598  // If this pred is for a subloop, not L itself, skip it.
599  if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
600  continue; // The Block is in a subloop, skip it.
601 
602  // Check that InVal is defined in the loop.
603  Instruction *Inst = cast<Instruction>(InVal);
604  if (!L->contains(Inst))
605  continue;
606 
607  // Okay, this instruction has a user outside of the current loop
608  // and varies predictably *inside* the loop. Evaluate the value it
609  // contains when the loop exits, if possible.
610  const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
611  if (!SE->isLoopInvariant(ExitValue, L) ||
612  !isSafeToExpand(ExitValue, *SE))
613  continue;
614 
615  // Computing the value outside of the loop brings no benefit if :
616  // - it is definitely used inside the loop in a way which can not be
617  // optimized away.
618  // - no use outside of the loop can take advantage of hoisting the
619  // computation out of the loop
620  if (ExitValue->getSCEVType()>=scMulExpr) {
621  unsigned NumHardInternalUses = 0;
622  unsigned NumSoftExternalUses = 0;
623  unsigned NumUses = 0;
624  for (auto IB = Inst->user_begin(), IE = Inst->user_end();
625  IB != IE && NumUses <= 6; ++IB) {
626  Instruction *UseInstr = cast<Instruction>(*IB);
627  unsigned Opc = UseInstr->getOpcode();
628  NumUses++;
629  if (L->contains(UseInstr)) {
630  if (Opc == Instruction::Call || Opc == Instruction::Ret)
631  NumHardInternalUses++;
632  } else {
633  if (Opc == Instruction::PHI) {
634  // Do not count the Phi as a use. LCSSA may have inserted
635  // plenty of trivial ones.
636  NumUses--;
637  for (auto PB = UseInstr->user_begin(),
638  PE = UseInstr->user_end();
639  PB != PE && NumUses <= 6; ++PB, ++NumUses) {
640  unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
641  if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
642  NumSoftExternalUses++;
643  }
644  continue;
645  }
646  if (Opc != Instruction::Call && Opc != Instruction::Ret)
647  NumSoftExternalUses++;
648  }
649  }
650  if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses)
651  continue;
652  }
653 
654  bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
655  Value *ExitVal =
656  expandSCEVIfNeeded(Rewriter, ExitValue, L, Inst, PN->getType());
657 
658  DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
659  << " LoopVal = " << *Inst << "\n");
660 
661  if (!isValidRewrite(Inst, ExitVal)) {
662  DeadInsts.push_back(ExitVal);
663  continue;
664  }
665 
666  // Collect all the candidate PHINodes to be rewritten.
667  RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
668  }
669  }
670  }
671 
672  bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
673 
674  // Transformation.
675  for (const RewritePhi &Phi : RewritePhiSet) {
676  PHINode *PN = Phi.PN;
677  Value *ExitVal = Phi.Val;
678 
679  // Only do the rewrite when the ExitValue can be expanded cheaply.
680  // If LoopCanBeDel is true, rewrite exit value aggressively.
681  if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
682  DeadInsts.push_back(ExitVal);
683  continue;
684  }
685 
686  Changed = true;
687  ++NumReplaced;
688  Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
689  PN->setIncomingValue(Phi.Ith, ExitVal);
690 
691  // If this instruction is dead now, delete it. Don't do it now to avoid
692  // invalidating iterators.
693  if (isInstructionTriviallyDead(Inst, TLI))
694  DeadInsts.push_back(Inst);
695 
696  // Replace PN with ExitVal if that is legal and does not break LCSSA.
697  if (PN->getNumIncomingValues() == 1 &&
698  LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
699  PN->replaceAllUsesWith(ExitVal);
700  PN->eraseFromParent();
701  }
702  }
703 
704  // The insertion point instruction may have been deleted; clear it out
705  // so that the rewriter doesn't trip over it later.
706  Rewriter.clearInsertPoint();
707 }
708 
709 //===---------------------------------------------------------------------===//
710 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
711 // they will exit at the first iteration.
712 //===---------------------------------------------------------------------===//
713 
714 /// Check to see if this loop has loop invariant conditions which lead to loop
715 /// exits. If so, we know that if the exit path is taken, it is at the first
716 /// loop iteration. This lets us predict exit values of PHI nodes that live in
717 /// loop header.
718 void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
719  // Verify the input to the pass is already in LCSSA form.
720  assert(L->isLCSSAForm(*DT));
721 
722  SmallVector<BasicBlock *, 8> ExitBlocks;
723  L->getUniqueExitBlocks(ExitBlocks);
724  auto *LoopHeader = L->getHeader();
725  assert(LoopHeader && "Invalid loop");
726 
727  for (auto *ExitBB : ExitBlocks) {
728  BasicBlock::iterator BBI = ExitBB->begin();
729  // If there are no more PHI nodes in this exit block, then no more
730  // values defined inside the loop are used on this path.
731  while (auto *PN = dyn_cast<PHINode>(BBI++)) {
732  for (unsigned IncomingValIdx = 0, E = PN->getNumIncomingValues();
733  IncomingValIdx != E; ++IncomingValIdx) {
734  auto *IncomingBB = PN->getIncomingBlock(IncomingValIdx);
735 
736  // We currently only support loop exits from loop header. If the
737  // incoming block is not loop header, we need to recursively check
738  // all conditions starting from loop header are loop invariants.
739  // Additional support might be added in the future.
740  if (IncomingBB != LoopHeader)
741  continue;
742 
743  // Get condition that leads to the exit path.
744  auto *TermInst = IncomingBB->getTerminator();
745 
746  Value *Cond = nullptr;
747  if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
748  // Must be a conditional branch, otherwise the block
749  // should not be in the loop.
750  Cond = BI->getCondition();
751  } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
752  Cond = SI->getCondition();
753  else
754  continue;
755 
756  if (!L->isLoopInvariant(Cond))
757  continue;
758 
759  auto *ExitVal =
760  dyn_cast<PHINode>(PN->getIncomingValue(IncomingValIdx));
761 
762  // Only deal with PHIs.
763  if (!ExitVal)
764  continue;
765 
766  // If ExitVal is a PHI on the loop header, then we know its
767  // value along this exit because the exit can only be taken
768  // on the first iteration.
769  auto *LoopPreheader = L->getLoopPreheader();
770  assert(LoopPreheader && "Invalid loop");
771  int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
772  if (PreheaderIdx != -1) {
773  assert(ExitVal->getParent() == LoopHeader &&
774  "ExitVal must be in loop header");
775  PN->setIncomingValue(IncomingValIdx,
776  ExitVal->getIncomingValue(PreheaderIdx));
777  }
778  }
779  }
780  }
781 }
782 
783 /// Check whether it is possible to delete the loop after rewriting exit
784 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
785 /// aggressively.
786 bool IndVarSimplify::canLoopBeDeleted(
787  Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
788  BasicBlock *Preheader = L->getLoopPreheader();
789  // If there is no preheader, the loop will not be deleted.
790  if (!Preheader)
791  return false;
792 
793  // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
794  // We obviate multiple ExitingBlocks case for simplicity.
795  // TODO: If we see testcase with multiple ExitingBlocks can be deleted
796  // after exit value rewriting, we can enhance the logic here.
797  SmallVector<BasicBlock *, 4> ExitingBlocks;
798  L->getExitingBlocks(ExitingBlocks);
799  SmallVector<BasicBlock *, 8> ExitBlocks;
800  L->getUniqueExitBlocks(ExitBlocks);
801  if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
802  return false;
803 
804  BasicBlock *ExitBlock = ExitBlocks[0];
805  BasicBlock::iterator BI = ExitBlock->begin();
806  while (PHINode *P = dyn_cast<PHINode>(BI)) {
807  Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
808 
809  // If the Incoming value of P is found in RewritePhiSet, we know it
810  // could be rewritten to use a loop invariant value in transformation
811  // phase later. Skip it in the loop invariant check below.
812  bool found = false;
813  for (const RewritePhi &Phi : RewritePhiSet) {
814  unsigned i = Phi.Ith;
815  if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
816  found = true;
817  break;
818  }
819  }
820 
821  Instruction *I;
822  if (!found && (I = dyn_cast<Instruction>(Incoming)))
823  if (!L->hasLoopInvariantOperands(I))
824  return false;
825 
826  ++BI;
827  }
828 
829  for (auto *BB : L->blocks())
830  if (llvm::any_of(*BB, [](Instruction &I) {
831  return I.mayHaveSideEffects();
832  }))
833  return false;
834 
835  return true;
836 }
837 
838 //===----------------------------------------------------------------------===//
839 // IV Widening - Extend the width of an IV to cover its widest uses.
840 //===----------------------------------------------------------------------===//
841 
842 namespace {
843 
844 // Collect information about induction variables that are used by sign/zero
845 // extend operations. This information is recorded by CollectExtend and provides
846 // the input to WidenIV.
847 struct WideIVInfo {
848  PHINode *NarrowIV = nullptr;
849 
850  // Widest integer type created [sz]ext
851  Type *WidestNativeType = nullptr;
852 
853  // Was a sext user seen before a zext?
854  bool IsSigned = false;
855 };
856 
857 } // end anonymous namespace
858 
859 /// Update information about the induction variable that is extended by this
860 /// sign or zero extend operation. This is used to determine the final width of
861 /// the IV before actually widening it.
862 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
863  const TargetTransformInfo *TTI) {
864  bool IsSigned = Cast->getOpcode() == Instruction::SExt;
865  if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
866  return;
867 
868  Type *Ty = Cast->getType();
869  uint64_t Width = SE->getTypeSizeInBits(Ty);
870  if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
871  return;
872 
873  // Check that `Cast` actually extends the induction variable (we rely on this
874  // later). This takes care of cases where `Cast` is extending a truncation of
875  // the narrow induction variable, and thus can end up being narrower than the
876  // "narrow" induction variable.
877  uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
878  if (NarrowIVWidth >= Width)
879  return;
880 
881  // Cast is either an sext or zext up to this point.
882  // We should not widen an indvar if arithmetics on the wider indvar are more
883  // expensive than those on the narrower indvar. We check only the cost of ADD
884  // because at least an ADD is required to increment the induction variable. We
885  // could compute more comprehensively the cost of all instructions on the
886  // induction variable when necessary.
887  if (TTI &&
890  Cast->getOperand(0)->getType())) {
891  return;
892  }
893 
894  if (!WI.WidestNativeType) {
895  WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
896  WI.IsSigned = IsSigned;
897  return;
898  }
899 
900  // We extend the IV to satisfy the sign of its first user, arbitrarily.
901  if (WI.IsSigned != IsSigned)
902  return;
903 
904  if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
905  WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
906 }
907 
908 namespace {
909 
910 /// Record a link in the Narrow IV def-use chain along with the WideIV that
911 /// computes the same value as the Narrow IV def. This avoids caching Use*
912 /// pointers.
913 struct NarrowIVDefUse {
914  Instruction *NarrowDef = nullptr;
915  Instruction *NarrowUse = nullptr;
916  Instruction *WideDef = nullptr;
917 
918  // True if the narrow def is never negative. Tracking this information lets
919  // us use a sign extension instead of a zero extension or vice versa, when
920  // profitable and legal.
921  bool NeverNegative = false;
922 
923  NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
924  bool NeverNegative)
925  : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
926  NeverNegative(NeverNegative) {}
927 };
928 
929 /// The goal of this transform is to remove sign and zero extends without
930 /// creating any new induction variables. To do this, it creates a new phi of
931 /// the wider type and redirects all users, either removing extends or inserting
932 /// truncs whenever we stop propagating the type.
933 class WidenIV {
934  // Parameters
935  PHINode *OrigPhi;
936  Type *WideType;
937 
938  // Context
939  LoopInfo *LI;
940  Loop *L;
941  ScalarEvolution *SE;
942  DominatorTree *DT;
943 
944  // Does the module have any calls to the llvm.experimental.guard intrinsic
945  // at all? If not we can avoid scanning instructions looking for guards.
946  bool HasGuards;
947 
948  // Result
949  PHINode *WidePhi = nullptr;
950  Instruction *WideInc = nullptr;
951  const SCEV *WideIncExpr = nullptr;
953 
955  SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
956 
957  enum ExtendKind { ZeroExtended, SignExtended, Unknown };
958 
959  // A map tracking the kind of extension used to widen each narrow IV
960  // and narrow IV user.
961  // Key: pointer to a narrow IV or IV user.
962  // Value: the kind of extension used to widen this Instruction.
963  DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
964 
965  using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
966 
967  // A map with control-dependent ranges for post increment IV uses. The key is
968  // a pair of IV def and a use of this def denoting the context. The value is
969  // a ConstantRange representing possible values of the def at the given
970  // context.
971  DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
972 
973  Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
974  Instruction *UseI) {
975  DefUserPair Key(Def, UseI);
976  auto It = PostIncRangeInfos.find(Key);
977  return It == PostIncRangeInfos.end()
979  : Optional<ConstantRange>(It->second);
980  }
981 
982  void calculatePostIncRanges(PHINode *OrigPhi);
983  void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
984 
985  void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
986  DefUserPair Key(Def, UseI);
987  auto It = PostIncRangeInfos.find(Key);
988  if (It == PostIncRangeInfos.end())
989  PostIncRangeInfos.insert({Key, R});
990  else
991  It->second = R.intersectWith(It->second);
992  }
993 
994 public:
995  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
997  bool HasGuards)
998  : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
999  L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1000  HasGuards(HasGuards), DeadInsts(DI) {
1001  assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1002  ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
1003  }
1004 
1005  PHINode *createWideIV(SCEVExpander &Rewriter);
1006 
1007 protected:
1008  Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1009  Instruction *Use);
1010 
1011  Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1012  Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1013  const SCEVAddRecExpr *WideAR);
1014  Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1015 
1016  ExtendKind getExtendKind(Instruction *I);
1017 
1018  using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1019 
1020  WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1021 
1022  WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1023 
1024  const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1025  unsigned OpCode) const;
1026 
1027  Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1028 
1029  bool widenLoopCompare(NarrowIVDefUse DU);
1030 
1031  void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1032 };
1033 
1034 } // end anonymous namespace
1035 
1036 /// Perform a quick domtree based check for loop invariance assuming that V is
1037 /// used within the loop. LoopInfo::isLoopInvariant() seems gratuitous for this
1038 /// purpose.
1039 static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
1040  Instruction *Inst = dyn_cast<Instruction>(V);
1041  if (!Inst)
1042  return true;
1043 
1044  return DT->properlyDominates(Inst->getParent(), L->getHeader());
1045 }
1046 
1047 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1048  bool IsSigned, Instruction *Use) {
1049  // Set the debug location and conservative insertion point.
1050  IRBuilder<> Builder(Use);
1051  // Hoist the insertion point into loop preheaders as far as possible.
1052  for (const Loop *L = LI->getLoopFor(Use->getParent());
1053  L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
1054  L = L->getParentLoop())
1056 
1057  return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1058  Builder.CreateZExt(NarrowOper, WideType);
1059 }
1060 
1061 /// Instantiate a wide operation to replace a narrow operation. This only needs
1062 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1063 /// 0 for any operation we decide not to clone.
1064 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1065  const SCEVAddRecExpr *WideAR) {
1066  unsigned Opcode = DU.NarrowUse->getOpcode();
1067  switch (Opcode) {
1068  default:
1069  return nullptr;
1070  case Instruction::Add:
1071  case Instruction::Mul:
1072  case Instruction::UDiv:
1073  case Instruction::Sub:
1074  return cloneArithmeticIVUser(DU, WideAR);
1075 
1076  case Instruction::And:
1077  case Instruction::Or:
1078  case Instruction::Xor:
1079  case Instruction::Shl:
1080  case Instruction::LShr:
1081  case Instruction::AShr:
1082  return cloneBitwiseIVUser(DU);
1083  }
1084 }
1085 
1086 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1087  Instruction *NarrowUse = DU.NarrowUse;
1088  Instruction *NarrowDef = DU.NarrowDef;
1089  Instruction *WideDef = DU.WideDef;
1090 
1091  DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1092 
1093  // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1094  // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1095  // invariant and will be folded or hoisted. If it actually comes from a
1096  // widened IV, it should be removed during a future call to widenIVUse.
1097  bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1098  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1099  ? WideDef
1100  : createExtendInst(NarrowUse->getOperand(0), WideType,
1101  IsSigned, NarrowUse);
1102  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1103  ? WideDef
1104  : createExtendInst(NarrowUse->getOperand(1), WideType,
1105  IsSigned, NarrowUse);
1106 
1107  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1108  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1109  NarrowBO->getName());
1110  IRBuilder<> Builder(NarrowUse);
1111  Builder.Insert(WideBO);
1112  WideBO->copyIRFlags(NarrowBO);
1113  return WideBO;
1114 }
1115 
1116 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1117  const SCEVAddRecExpr *WideAR) {
1118  Instruction *NarrowUse = DU.NarrowUse;
1119  Instruction *NarrowDef = DU.NarrowDef;
1120  Instruction *WideDef = DU.WideDef;
1121 
1122  DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1123 
1124  unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1125 
1126  // We're trying to find X such that
1127  //
1128  // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1129  //
1130  // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1131  // and check using SCEV if any of them are correct.
1132 
1133  // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1134  // correct solution to X.
1135  auto GuessNonIVOperand = [&](bool SignExt) {
1136  const SCEV *WideLHS;
1137  const SCEV *WideRHS;
1138 
1139  auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1140  if (SignExt)
1141  return SE->getSignExtendExpr(S, Ty);
1142  return SE->getZeroExtendExpr(S, Ty);
1143  };
1144 
1145  if (IVOpIdx == 0) {
1146  WideLHS = SE->getSCEV(WideDef);
1147  const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1148  WideRHS = GetExtend(NarrowRHS, WideType);
1149  } else {
1150  const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1151  WideLHS = GetExtend(NarrowLHS, WideType);
1152  WideRHS = SE->getSCEV(WideDef);
1153  }
1154 
1155  // WideUse is "WideDef `op.wide` X" as described in the comment.
1156  const SCEV *WideUse = nullptr;
1157 
1158  switch (NarrowUse->getOpcode()) {
1159  default:
1160  llvm_unreachable("No other possibility!");
1161 
1162  case Instruction::Add:
1163  WideUse = SE->getAddExpr(WideLHS, WideRHS);
1164  break;
1165 
1166  case Instruction::Mul:
1167  WideUse = SE->getMulExpr(WideLHS, WideRHS);
1168  break;
1169 
1170  case Instruction::UDiv:
1171  WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1172  break;
1173 
1174  case Instruction::Sub:
1175  WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1176  break;
1177  }
1178 
1179  return WideUse == WideAR;
1180  };
1181 
1182  bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1183  if (!GuessNonIVOperand(SignExtend)) {
1184  SignExtend = !SignExtend;
1185  if (!GuessNonIVOperand(SignExtend))
1186  return nullptr;
1187  }
1188 
1189  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1190  ? WideDef
1191  : createExtendInst(NarrowUse->getOperand(0), WideType,
1192  SignExtend, NarrowUse);
1193  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1194  ? WideDef
1195  : createExtendInst(NarrowUse->getOperand(1), WideType,
1196  SignExtend, NarrowUse);
1197 
1198  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1199  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1200  NarrowBO->getName());
1201 
1202  IRBuilder<> Builder(NarrowUse);
1203  Builder.Insert(WideBO);
1204  WideBO->copyIRFlags(NarrowBO);
1205  return WideBO;
1206 }
1207 
1208 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1209  auto It = ExtendKindMap.find(I);
1210  assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1211  return It->second;
1212 }
1213 
1214 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1215  unsigned OpCode) const {
1216  if (OpCode == Instruction::Add)
1217  return SE->getAddExpr(LHS, RHS);
1218  if (OpCode == Instruction::Sub)
1219  return SE->getMinusSCEV(LHS, RHS);
1220  if (OpCode == Instruction::Mul)
1221  return SE->getMulExpr(LHS, RHS);
1222 
1223  llvm_unreachable("Unsupported opcode.");
1224 }
1225 
1226 /// No-wrap operations can transfer sign extension of their result to their
1227 /// operands. Generate the SCEV value for the widened operation without
1228 /// actually modifying the IR yet. If the expression after extending the
1229 /// operands is an AddRec for this loop, return the AddRec and the kind of
1230 /// extension used.
1231 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1232  // Handle the common case of add<nsw/nuw>
1233  const unsigned OpCode = DU.NarrowUse->getOpcode();
1234  // Only Add/Sub/Mul instructions supported yet.
1235  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1236  OpCode != Instruction::Mul)
1237  return {nullptr, Unknown};
1238 
1239  // One operand (NarrowDef) has already been extended to WideDef. Now determine
1240  // if extending the other will lead to a recurrence.
1241  const unsigned ExtendOperIdx =
1242  DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1243  assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1244 
1245  const SCEV *ExtendOperExpr = nullptr;
1246  const OverflowingBinaryOperator *OBO =
1247  cast<OverflowingBinaryOperator>(DU.NarrowUse);
1248  ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1249  if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1250  ExtendOperExpr = SE->getSignExtendExpr(
1251  SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1252  else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1253  ExtendOperExpr = SE->getZeroExtendExpr(
1254  SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1255  else
1256  return {nullptr, Unknown};
1257 
1258  // When creating this SCEV expr, don't apply the current operations NSW or NUW
1259  // flags. This instruction may be guarded by control flow that the no-wrap
1260  // behavior depends on. Non-control-equivalent instructions can be mapped to
1261  // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1262  // semantics to those operations.
1263  const SCEV *lhs = SE->getSCEV(DU.WideDef);
1264  const SCEV *rhs = ExtendOperExpr;
1265 
1266  // Let's swap operands to the initial order for the case of non-commutative
1267  // operations, like SUB. See PR21014.
1268  if (ExtendOperIdx == 0)
1269  std::swap(lhs, rhs);
1270  const SCEVAddRecExpr *AddRec =
1271  dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1272 
1273  if (!AddRec || AddRec->getLoop() != L)
1274  return {nullptr, Unknown};
1275 
1276  return {AddRec, ExtKind};
1277 }
1278 
1279 /// Is this instruction potentially interesting for further simplification after
1280 /// widening it's type? In other words, can the extend be safely hoisted out of
1281 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1282 /// so, return the extended recurrence and the kind of extension used. Otherwise
1283 /// return {nullptr, Unknown}.
1284 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1285  if (!SE->isSCEVable(DU.NarrowUse->getType()))
1286  return {nullptr, Unknown};
1287 
1288  const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1289  if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1290  SE->getTypeSizeInBits(WideType)) {
1291  // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1292  // index. So don't follow this use.
1293  return {nullptr, Unknown};
1294  }
1295 
1296  const SCEV *WideExpr;
1297  ExtendKind ExtKind;
1298  if (DU.NeverNegative) {
1299  WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1300  if (isa<SCEVAddRecExpr>(WideExpr))
1301  ExtKind = SignExtended;
1302  else {
1303  WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1304  ExtKind = ZeroExtended;
1305  }
1306  } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1307  WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1308  ExtKind = SignExtended;
1309  } else {
1310  WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1311  ExtKind = ZeroExtended;
1312  }
1313  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1314  if (!AddRec || AddRec->getLoop() != L)
1315  return {nullptr, Unknown};
1316  return {AddRec, ExtKind};
1317 }
1318 
1319 /// This IV user cannot be widen. Replace this use of the original narrow IV
1320 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1321 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1322  DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef
1323  << " for user " << *DU.NarrowUse << "\n");
1324  IRBuilder<> Builder(
1325  getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
1326  Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1327  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1328 }
1329 
1330 /// If the narrow use is a compare instruction, then widen the compare
1331 // (and possibly the other operand). The extend operation is hoisted into the
1332 // loop preheader as far as possible.
1333 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1334  ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1335  if (!Cmp)
1336  return false;
1337 
1338  // We can legally widen the comparison in the following two cases:
1339  //
1340  // - The signedness of the IV extension and comparison match
1341  //
1342  // - The narrow IV is always positive (and thus its sign extension is equal
1343  // to its zero extension). For instance, let's say we're zero extending
1344  // %narrow for the following use
1345  //
1346  // icmp slt i32 %narrow, %val ... (A)
1347  //
1348  // and %narrow is always positive. Then
1349  //
1350  // (A) == icmp slt i32 sext(%narrow), sext(%val)
1351  // == icmp slt i32 zext(%narrow), sext(%val)
1352  bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1353  if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1354  return false;
1355 
1356  Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1357  unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1358  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1359  assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1360 
1361  // Widen the compare instruction.
1362  IRBuilder<> Builder(
1363  getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
1364  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1365 
1366  // Widen the other operand of the compare, if necessary.
1367  if (CastWidth < IVWidth) {
1368  Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1369  DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1370  }
1371  return true;
1372 }
1373 
1374 /// Determine whether an individual user of the narrow IV can be widened. If so,
1375 /// return the wide clone of the user.
1376 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1377  assert(ExtendKindMap.count(DU.NarrowDef) &&
1378  "Should already know the kind of extension used to widen NarrowDef");
1379 
1380  // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1381  if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1382  if (LI->getLoopFor(UsePhi->getParent()) != L) {
1383  // For LCSSA phis, sink the truncate outside the loop.
1384  // After SimplifyCFG most loop exit targets have a single predecessor.
1385  // Otherwise fall back to a truncate within the loop.
1386  if (UsePhi->getNumOperands() != 1)
1387  truncateIVUse(DU, DT, LI);
1388  else {
1389  // Widening the PHI requires us to insert a trunc. The logical place
1390  // for this trunc is in the same BB as the PHI. This is not possible if
1391  // the BB is terminated by a catchswitch.
1392  if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1393  return nullptr;
1394 
1395  PHINode *WidePhi =
1396  PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1397  UsePhi);
1398  WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1399  IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1400  Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1401  UsePhi->replaceAllUsesWith(Trunc);
1402  DeadInsts.emplace_back(UsePhi);
1403  DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi
1404  << " to " << *WidePhi << "\n");
1405  }
1406  return nullptr;
1407  }
1408  }
1409 
1410  // This narrow use can be widened by a sext if it's non-negative or its narrow
1411  // def was widended by a sext. Same for zext.
1412  auto canWidenBySExt = [&]() {
1413  return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1414  };
1415  auto canWidenByZExt = [&]() {
1416  return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1417  };
1418 
1419  // Our raison d'etre! Eliminate sign and zero extension.
1420  if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1421  (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1422  Value *NewDef = DU.WideDef;
1423  if (DU.NarrowUse->getType() != WideType) {
1424  unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1425  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1426  if (CastWidth < IVWidth) {
1427  // The cast isn't as wide as the IV, so insert a Trunc.
1428  IRBuilder<> Builder(DU.NarrowUse);
1429  NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1430  }
1431  else {
1432  // A wider extend was hidden behind a narrower one. This may induce
1433  // another round of IV widening in which the intermediate IV becomes
1434  // dead. It should be very rare.
1435  DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1436  << " not wide enough to subsume " << *DU.NarrowUse << "\n");
1437  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1438  NewDef = DU.NarrowUse;
1439  }
1440  }
1441  if (NewDef != DU.NarrowUse) {
1442  DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1443  << " replaced by " << *DU.WideDef << "\n");
1444  ++NumElimExt;
1445  DU.NarrowUse->replaceAllUsesWith(NewDef);
1446  DeadInsts.emplace_back(DU.NarrowUse);
1447  }
1448  // Now that the extend is gone, we want to expose it's uses for potential
1449  // further simplification. We don't need to directly inform SimplifyIVUsers
1450  // of the new users, because their parent IV will be processed later as a
1451  // new loop phi. If we preserved IVUsers analysis, we would also want to
1452  // push the uses of WideDef here.
1453 
1454  // No further widening is needed. The deceased [sz]ext had done it for us.
1455  return nullptr;
1456  }
1457 
1458  // Does this user itself evaluate to a recurrence after widening?
1459  WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1460  if (!WideAddRec.first)
1461  WideAddRec = getWideRecurrence(DU);
1462 
1463  assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1464  if (!WideAddRec.first) {
1465  // If use is a loop condition, try to promote the condition instead of
1466  // truncating the IV first.
1467  if (widenLoopCompare(DU))
1468  return nullptr;
1469 
1470  // This user does not evaluate to a recurrence after widening, so don't
1471  // follow it. Instead insert a Trunc to kill off the original use,
1472  // eventually isolating the original narrow IV so it can be removed.
1473  truncateIVUse(DU, DT, LI);
1474  return nullptr;
1475  }
1476  // Assume block terminators cannot evaluate to a recurrence. We can't to
1477  // insert a Trunc after a terminator if there happens to be a critical edge.
1478  assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1479  "SCEV is not expected to evaluate a block terminator");
1480 
1481  // Reuse the IV increment that SCEVExpander created as long as it dominates
1482  // NarrowUse.
1483  Instruction *WideUse = nullptr;
1484  if (WideAddRec.first == WideIncExpr &&
1485  Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1486  WideUse = WideInc;
1487  else {
1488  WideUse = cloneIVUser(DU, WideAddRec.first);
1489  if (!WideUse)
1490  return nullptr;
1491  }
1492  // Evaluation of WideAddRec ensured that the narrow expression could be
1493  // extended outside the loop without overflow. This suggests that the wide use
1494  // evaluates to the same expression as the extended narrow use, but doesn't
1495  // absolutely guarantee it. Hence the following failsafe check. In rare cases
1496  // where it fails, we simply throw away the newly created wide use.
1497  if (WideAddRec.first != SE->getSCEV(WideUse)) {
1498  DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
1499  << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first << "\n");
1500  DeadInsts.emplace_back(WideUse);
1501  return nullptr;
1502  }
1503 
1504  ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1505  // Returning WideUse pushes it on the worklist.
1506  return WideUse;
1507 }
1508 
1509 /// Add eligible users of NarrowDef to NarrowIVUsers.
1510 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1511  const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1512  bool NonNegativeDef =
1513  SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1514  SE->getConstant(NarrowSCEV->getType(), 0));
1515  for (User *U : NarrowDef->users()) {
1516  Instruction *NarrowUser = cast<Instruction>(U);
1517 
1518  // Handle data flow merges and bizarre phi cycles.
1519  if (!Widened.insert(NarrowUser).second)
1520  continue;
1521 
1522  bool NonNegativeUse = false;
1523  if (!NonNegativeDef) {
1524  // We might have a control-dependent range information for this context.
1525  if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1526  NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1527  }
1528 
1529  NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1530  NonNegativeDef || NonNegativeUse);
1531  }
1532 }
1533 
1534 /// Process a single induction variable. First use the SCEVExpander to create a
1535 /// wide induction variable that evaluates to the same recurrence as the
1536 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1537 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1538 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1539 ///
1540 /// It would be simpler to delete uses as they are processed, but we must avoid
1541 /// invalidating SCEV expressions.
1542 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1543  // Is this phi an induction variable?
1544  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1545  if (!AddRec)
1546  return nullptr;
1547 
1548  // Widen the induction variable expression.
1549  const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1550  ? SE->getSignExtendExpr(AddRec, WideType)
1551  : SE->getZeroExtendExpr(AddRec, WideType);
1552 
1553  assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1554  "Expect the new IV expression to preserve its type");
1555 
1556  // Can the IV be extended outside the loop without overflow?
1557  AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1558  if (!AddRec || AddRec->getLoop() != L)
1559  return nullptr;
1560 
1561  // An AddRec must have loop-invariant operands. Since this AddRec is
1562  // materialized by a loop header phi, the expression cannot have any post-loop
1563  // operands, so they must dominate the loop header.
1564  assert(
1565  SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1566  SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1567  "Loop header phi recurrence inputs do not dominate the loop");
1568 
1569  // Iterate over IV uses (including transitive ones) looking for IV increments
1570  // of the form 'add nsw %iv, <const>'. For each increment and each use of
1571  // the increment calculate control-dependent range information basing on
1572  // dominating conditions inside of the loop (e.g. a range check inside of the
1573  // loop). Calculated ranges are stored in PostIncRangeInfos map.
1574  //
1575  // Control-dependent range information is later used to prove that a narrow
1576  // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1577  // this on demand because when pushNarrowIVUsers needs this information some
1578  // of the dominating conditions might be already widened.
1579  if (UsePostIncrementRanges)
1580  calculatePostIncRanges(OrigPhi);
1581 
1582  // The rewriter provides a value for the desired IV expression. This may
1583  // either find an existing phi or materialize a new one. Either way, we
1584  // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1585  // of the phi-SCC dominates the loop entry.
1586  Instruction *InsertPt = &L->getHeader()->front();
1587  WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1588 
1589  // Remembering the WideIV increment generated by SCEVExpander allows
1590  // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1591  // employ a general reuse mechanism because the call above is the only call to
1592  // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1593  if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1594  WideInc =
1595  cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1596  WideIncExpr = SE->getSCEV(WideInc);
1597  // Propagate the debug location associated with the original loop increment
1598  // to the new (widened) increment.
1599  auto *OrigInc =
1600  cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1601  WideInc->setDebugLoc(OrigInc->getDebugLoc());
1602  }
1603 
1604  DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1605  ++NumWidened;
1606 
1607  // Traverse the def-use chain using a worklist starting at the original IV.
1608  assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1609 
1610  Widened.insert(OrigPhi);
1611  pushNarrowIVUsers(OrigPhi, WidePhi);
1612 
1613  while (!NarrowIVUsers.empty()) {
1614  NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1615 
1616  // Process a def-use edge. This may replace the use, so don't hold a
1617  // use_iterator across it.
1618  Instruction *WideUse = widenIVUse(DU, Rewriter);
1619 
1620  // Follow all def-use edges from the previous narrow use.
1621  if (WideUse)
1622  pushNarrowIVUsers(DU.NarrowUse, WideUse);
1623 
1624  // widenIVUse may have removed the def-use edge.
1625  if (DU.NarrowDef->use_empty())
1626  DeadInsts.emplace_back(DU.NarrowDef);
1627  }
1628 
1629  // Attach any debug information to the new PHI. Since OrigPhi and WidePHI
1630  // evaluate the same recurrence, we can just copy the debug info over.
1632  llvm::findDbgValues(DbgValues, OrigPhi);
1633  auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(),
1634  ValueAsMetadata::get(WidePhi));
1635  for (auto &DbgValue : DbgValues)
1636  DbgValue->setOperand(0, MDPhi);
1637  return WidePhi;
1638 }
1639 
1640 /// Calculates control-dependent range for the given def at the given context
1641 /// by looking at dominating conditions inside of the loop
1642 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1643  Instruction *NarrowUser) {
1644  using namespace llvm::PatternMatch;
1645 
1646  Value *NarrowDefLHS;
1647  const APInt *NarrowDefRHS;
1648  if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1649  m_APInt(NarrowDefRHS))) ||
1650  !NarrowDefRHS->isNonNegative())
1651  return;
1652 
1653  auto UpdateRangeFromCondition = [&] (Value *Condition,
1654  bool TrueDest) {
1655  CmpInst::Predicate Pred;
1656  Value *CmpRHS;
1657  if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1658  m_Value(CmpRHS))))
1659  return;
1660 
1662  TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1663 
1664  auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1665  auto CmpConstrainedLHSRange =
1666  ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1667  auto NarrowDefRange =
1668  CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
1669 
1670  updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1671  };
1672 
1673  auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1674  if (!HasGuards)
1675  return;
1676 
1677  for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1678  Ctx->getParent()->rend())) {
1679  Value *C = nullptr;
1680  if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1681  UpdateRangeFromCondition(C, /*TrueDest=*/true);
1682  }
1683  };
1684 
1685  UpdateRangeFromGuards(NarrowUser);
1686 
1687  BasicBlock *NarrowUserBB = NarrowUser->getParent();
1688  // If NarrowUserBB is statically unreachable asking dominator queries may
1689  // yield surprising results. (e.g. the block may not have a dom tree node)
1690  if (!DT->isReachableFromEntry(NarrowUserBB))
1691  return;
1692 
1693  for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1694  L->contains(DTB->getBlock());
1695  DTB = DTB->getIDom()) {
1696  auto *BB = DTB->getBlock();
1697  auto *TI = BB->getTerminator();
1698  UpdateRangeFromGuards(TI);
1699 
1700  auto *BI = dyn_cast<BranchInst>(TI);
1701  if (!BI || !BI->isConditional())
1702  continue;
1703 
1704  auto *TrueSuccessor = BI->getSuccessor(0);
1705  auto *FalseSuccessor = BI->getSuccessor(1);
1706 
1707  auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1708  return BBE.isSingleEdge() &&
1709  DT->dominates(BBE, NarrowUser->getParent());
1710  };
1711 
1712  if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1713  UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1714 
1715  if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1716  UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1717  }
1718 }
1719 
1720 /// Calculates PostIncRangeInfos map for the given IV
1721 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1724  Worklist.push_back(OrigPhi);
1725  Visited.insert(OrigPhi);
1726 
1727  while (!Worklist.empty()) {
1728  Instruction *NarrowDef = Worklist.pop_back_val();
1729 
1730  for (Use &U : NarrowDef->uses()) {
1731  auto *NarrowUser = cast<Instruction>(U.getUser());
1732 
1733  // Don't go looking outside the current loop.
1734  auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1735  if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1736  continue;
1737 
1738  if (!Visited.insert(NarrowUser).second)
1739  continue;
1740 
1741  Worklist.push_back(NarrowUser);
1742 
1743  calculatePostIncRange(NarrowDef, NarrowUser);
1744  }
1745  }
1746 }
1747 
1748 //===----------------------------------------------------------------------===//
1749 // Live IV Reduction - Minimize IVs live across the loop.
1750 //===----------------------------------------------------------------------===//
1751 
1752 //===----------------------------------------------------------------------===//
1753 // Simplification of IV users based on SCEV evaluation.
1754 //===----------------------------------------------------------------------===//
1755 
1756 namespace {
1757 
1758 class IndVarSimplifyVisitor : public IVVisitor {
1759  ScalarEvolution *SE;
1760  const TargetTransformInfo *TTI;
1761  PHINode *IVPhi;
1762 
1763 public:
1764  WideIVInfo WI;
1765 
1766  IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1767  const TargetTransformInfo *TTI,
1768  const DominatorTree *DTree)
1769  : SE(SCEV), TTI(TTI), IVPhi(IV) {
1770  DT = DTree;
1771  WI.NarrowIV = IVPhi;
1772  }
1773 
1774  // Implement the interface used by simplifyUsersOfIV.
1775  void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1776 };
1777 
1778 } // end anonymous namespace
1779 
1780 /// Iteratively perform simplification on a worklist of IV users. Each
1781 /// successive simplification may push more users which may themselves be
1782 /// candidates for simplification.
1783 ///
1784 /// Sign/Zero extend elimination is interleaved with IV simplification.
1785 void IndVarSimplify::simplifyAndExtend(Loop *L,
1786  SCEVExpander &Rewriter,
1787  LoopInfo *LI) {
1789 
1790  auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1791  Intrinsic::getName(Intrinsic::experimental_guard));
1792  bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1793 
1794  SmallVector<PHINode*, 8> LoopPhis;
1795  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1796  LoopPhis.push_back(cast<PHINode>(I));
1797  }
1798  // Each round of simplification iterates through the SimplifyIVUsers worklist
1799  // for all current phis, then determines whether any IVs can be
1800  // widened. Widening adds new phis to LoopPhis, inducing another round of
1801  // simplification on the wide IVs.
1802  while (!LoopPhis.empty()) {
1803  // Evaluate as many IV expressions as possible before widening any IVs. This
1804  // forces SCEV to set no-wrap flags before evaluating sign/zero
1805  // extension. The first time SCEV attempts to normalize sign/zero extension,
1806  // the result becomes final. So for the most predictable results, we delay
1807  // evaluation of sign/zero extend evaluation until needed, and avoid running
1808  // other SCEV based analysis prior to simplifyAndExtend.
1809  do {
1810  PHINode *CurrIV = LoopPhis.pop_back_val();
1811 
1812  // Information about sign/zero extensions of CurrIV.
1813  IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1814 
1815  Changed |=
1816  simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1817 
1818  if (Visitor.WI.WidestNativeType) {
1819  WideIVs.push_back(Visitor.WI);
1820  }
1821  } while(!LoopPhis.empty());
1822 
1823  for (; !WideIVs.empty(); WideIVs.pop_back()) {
1824  WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1825  if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1826  Changed = true;
1827  LoopPhis.push_back(WidePhi);
1828  }
1829  }
1830  }
1831 }
1832 
1833 //===----------------------------------------------------------------------===//
1834 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1835 //===----------------------------------------------------------------------===//
1836 
1837 /// Return true if this loop's backedge taken count expression can be safely and
1838 /// cheaply expanded into an instruction sequence that can be used by
1839 /// linearFunctionTestReplace.
1840 ///
1841 /// TODO: This fails for pointer-type loop counters with greater than one byte
1842 /// strides, consequently preventing LFTR from running. For the purpose of LFTR
1843 /// we could skip this check in the case that the LFTR loop counter (chosen by
1844 /// FindLoopCounter) is also pointer type. Instead, we could directly convert
1845 /// the loop test to an inequality test by checking the target data's alignment
1846 /// of element types (given that the initial pointer value originates from or is
1847 /// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
1848 /// However, we don't yet have a strong motivation for converting loop tests
1849 /// into inequality tests.
1851  SCEVExpander &Rewriter) {
1852  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1853  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
1854  BackedgeTakenCount->isZero())
1855  return false;
1856 
1857  if (!L->getExitingBlock())
1858  return false;
1859 
1860  // Can't rewrite non-branch yet.
1861  if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
1862  return false;
1863 
1864  if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
1865  return false;
1866 
1867  return true;
1868 }
1869 
1870 /// Return the loop header phi IFF IncV adds a loop invariant value to the phi.
1872  Instruction *IncI = dyn_cast<Instruction>(IncV);
1873  if (!IncI)
1874  return nullptr;
1875 
1876  switch (IncI->getOpcode()) {
1877  case Instruction::Add:
1878  case Instruction::Sub:
1879  break;
1880  case Instruction::GetElementPtr:
1881  // An IV counter must preserve its type.
1882  if (IncI->getNumOperands() == 2)
1883  break;
1885  default:
1886  return nullptr;
1887  }
1888 
1889  PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
1890  if (Phi && Phi->getParent() == L->getHeader()) {
1891  if (isLoopInvariant(IncI->getOperand(1), L, DT))
1892  return Phi;
1893  return nullptr;
1894  }
1895  if (IncI->getOpcode() == Instruction::GetElementPtr)
1896  return nullptr;
1897 
1898  // Allow add/sub to be commuted.
1899  Phi = dyn_cast<PHINode>(IncI->getOperand(1));
1900  if (Phi && Phi->getParent() == L->getHeader()) {
1901  if (isLoopInvariant(IncI->getOperand(0), L, DT))
1902  return Phi;
1903  }
1904  return nullptr;
1905 }
1906 
1907 /// Return the compare guarding the loop latch, or NULL for unrecognized tests.
1909  assert(L->getExitingBlock() && "expected loop exit");
1910 
1911  BasicBlock *LatchBlock = L->getLoopLatch();
1912  // Don't bother with LFTR if the loop is not properly simplified.
1913  if (!LatchBlock)
1914  return nullptr;
1915 
1917  assert(BI && "expected exit branch");
1918 
1919  return dyn_cast<ICmpInst>(BI->getCondition());
1920 }
1921 
1922 /// linearFunctionTestReplace policy. Return true unless we can show that the
1923 /// current exit test is already sufficiently canonical.
1924 static bool needsLFTR(Loop *L, DominatorTree *DT) {
1925  // Do LFTR to simplify the exit condition to an ICMP.
1926  ICmpInst *Cond = getLoopTest(L);
1927  if (!Cond)
1928  return true;
1929 
1930  // Do LFTR to simplify the exit ICMP to EQ/NE
1931  ICmpInst::Predicate Pred = Cond->getPredicate();
1932  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
1933  return true;
1934 
1935  // Look for a loop invariant RHS
1936  Value *LHS = Cond->getOperand(0);
1937  Value *RHS = Cond->getOperand(1);
1938  if (!isLoopInvariant(RHS, L, DT)) {
1939  if (!isLoopInvariant(LHS, L, DT))
1940  return true;
1941  std::swap(LHS, RHS);
1942  }
1943  // Look for a simple IV counter LHS
1944  PHINode *Phi = dyn_cast<PHINode>(LHS);
1945  if (!Phi)
1946  Phi = getLoopPhiForCounter(LHS, L, DT);
1947 
1948  if (!Phi)
1949  return true;
1950 
1951  // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
1952  int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
1953  if (Idx < 0)
1954  return true;
1955 
1956  // Do LFTR if the exit condition's IV is *not* a simple counter.
1957  Value *IncV = Phi->getIncomingValue(Idx);
1958  return Phi != getLoopPhiForCounter(IncV, L, DT);
1959 }
1960 
1961 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
1962 /// down to checking that all operands are constant and listing instructions
1963 /// that may hide undef.
1965  unsigned Depth) {
1966  if (isa<Constant>(V))
1967  return !isa<UndefValue>(V);
1968 
1969  if (Depth >= 6)
1970  return false;
1971 
1972  // Conservatively handle non-constant non-instructions. For example, Arguments
1973  // may be undef.
1974  Instruction *I = dyn_cast<Instruction>(V);
1975  if (!I)
1976  return false;
1977 
1978  // Load and return values may be undef.
1979  if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
1980  return false;
1981 
1982  // Optimistically handle other instructions.
1983  for (Value *Op : I->operands()) {
1984  if (!Visited.insert(Op).second)
1985  continue;
1986  if (!hasConcreteDefImpl(Op, Visited, Depth+1))
1987  return false;
1988  }
1989  return true;
1990 }
1991 
1992 /// Return true if the given value is concrete. We must prove that undef can
1993 /// never reach it.
1994 ///
1995 /// TODO: If we decide that this is a good approach to checking for undef, we
1996 /// may factor it into a common location.
1997 static bool hasConcreteDef(Value *V) {
1998  SmallPtrSet<Value*, 8> Visited;
1999  Visited.insert(V);
2000  return hasConcreteDefImpl(V, Visited, 0);
2001 }
2002 
2003 /// Return true if this IV has any uses other than the (soon to be rewritten)
2004 /// loop exit test.
2005 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2006  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2007  Value *IncV = Phi->getIncomingValue(LatchIdx);
2008 
2009  for (User *U : Phi->users())
2010  if (U != Cond && U != IncV) return false;
2011 
2012  for (User *U : IncV->users())
2013  if (U != Cond && U != Phi) return false;
2014  return true;
2015 }
2016 
2017 /// Find an affine IV in canonical form.
2018 ///
2019 /// BECount may be an i8* pointer type. The pointer difference is already
2020 /// valid count without scaling the address stride, so it remains a pointer
2021 /// expression as far as SCEV is concerned.
2022 ///
2023 /// Currently only valid for LFTR. See the comments on hasConcreteDef below.
2024 ///
2025 /// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
2026 ///
2027 /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
2028 /// This is difficult in general for SCEV because of potential overflow. But we
2029 /// could at least handle constant BECounts.
2030 static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
2031  ScalarEvolution *SE, DominatorTree *DT) {
2032  uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2033 
2034  Value *Cond =
2035  cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
2036 
2037  // Loop over all of the PHI nodes, looking for a simple counter.
2038  PHINode *BestPhi = nullptr;
2039  const SCEV *BestInit = nullptr;
2040  BasicBlock *LatchBlock = L->getLoopLatch();
2041  assert(LatchBlock && "needsLFTR should guarantee a loop latch");
2042  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2043 
2044  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2045  PHINode *Phi = cast<PHINode>(I);
2046  if (!SE->isSCEVable(Phi->getType()))
2047  continue;
2048 
2049  // Avoid comparing an integer IV against a pointer Limit.
2050  if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2051  continue;
2052 
2053  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2054  if (!AR || AR->getLoop() != L || !AR->isAffine())
2055  continue;
2056 
2057  // AR may be a pointer type, while BECount is an integer type.
2058  // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2059  // AR may not be a narrower type, or we may never exit.
2060  uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2061  if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2062  continue;
2063 
2064  const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2065  if (!Step || !Step->isOne())
2066  continue;
2067 
2068  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2069  Value *IncV = Phi->getIncomingValue(LatchIdx);
2070  if (getLoopPhiForCounter(IncV, L, DT) != Phi)
2071  continue;
2072 
2073  // Avoid reusing a potentially undef value to compute other values that may
2074  // have originally had a concrete definition.
2075  if (!hasConcreteDef(Phi)) {
2076  // We explicitly allow unknown phis as long as they are already used by
2077  // the loop test. In this case we assume that performing LFTR could not
2078  // increase the number of undef users.
2079  if (ICmpInst *Cond = getLoopTest(L)) {
2080  if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT) &&
2081  Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
2082  continue;
2083  }
2084  }
2085  }
2086  const SCEV *Init = AR->getStart();
2087 
2088  if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2089  // Don't force a live loop counter if another IV can be used.
2090  if (AlmostDeadIV(Phi, LatchBlock, Cond))
2091  continue;
2092 
2093  // Prefer to count-from-zero. This is a more "canonical" counter form. It
2094  // also prefers integer to pointer IVs.
2095  if (BestInit->isZero() != Init->isZero()) {
2096  if (BestInit->isZero())
2097  continue;
2098  }
2099  // If two IVs both count from zero or both count from nonzero then the
2100  // narrower is likely a dead phi that has been widened. Use the wider phi
2101  // to allow the other to be eliminated.
2102  else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2103  continue;
2104  }
2105  BestPhi = Phi;
2106  BestInit = Init;
2107  }
2108  return BestPhi;
2109 }
2110 
2111 /// Help linearFunctionTestReplace by generating a value that holds the RHS of
2112 /// the new loop test.
2113 static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
2114  SCEVExpander &Rewriter, ScalarEvolution *SE) {
2115  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2116  assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
2117  const SCEV *IVInit = AR->getStart();
2118 
2119  // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
2120  // finds a valid pointer IV. Sign extend BECount in order to materialize a
2121  // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2122  // the existing GEPs whenever possible.
2123  if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) {
2124  // IVOffset will be the new GEP offset that is interpreted by GEP as a
2125  // signed value. IVCount on the other hand represents the loop trip count,
2126  // which is an unsigned value. FindLoopCounter only allows induction
2127  // variables that have a positive unit stride of one. This means we don't
2128  // have to handle the case of negative offsets (yet) and just need to zero
2129  // extend IVCount.
2130  Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2131  const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
2132 
2133  // Expand the code for the iteration count.
2134  assert(SE->isLoopInvariant(IVOffset, L) &&
2135  "Computed iteration count is not loop invariant!");
2136  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2137  Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
2138 
2139  Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
2140  assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
2141  // We could handle pointer IVs other than i8*, but we need to compensate for
2142  // gep index scaling. See canExpandBackedgeTakenCount comments.
2144  cast<PointerType>(GEPBase->getType())
2145  ->getElementType())->isOne() &&
2146  "unit stride pointer IV must be i8*");
2147 
2148  IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
2149  return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
2150  } else {
2151  // In any other case, convert both IVInit and IVCount to integers before
2152  // comparing. This may result in SCEV expansion of pointers, but in practice
2153  // SCEV will fold the pointer arithmetic away as such:
2154  // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2155  //
2156  // Valid Cases: (1) both integers is most common; (2) both may be pointers
2157  // for simple memset-style loops.
2158  //
2159  // IVInit integer and IVCount pointer would only occur if a canonical IV
2160  // were generated on top of case #2, which is not expected.
2161 
2162  const SCEV *IVLimit = nullptr;
2163  // For unit stride, IVCount = Start + BECount with 2's complement overflow.
2164  // For non-zero Start, compute IVCount here.
2165  if (AR->getStart()->isZero())
2166  IVLimit = IVCount;
2167  else {
2168  assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2169  const SCEV *IVInit = AR->getStart();
2170 
2171  // For integer IVs, truncate the IV before computing IVInit + BECount.
2172  if (SE->getTypeSizeInBits(IVInit->getType())
2173  > SE->getTypeSizeInBits(IVCount->getType()))
2174  IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
2175 
2176  IVLimit = SE->getAddExpr(IVInit, IVCount);
2177  }
2178  // Expand the code for the iteration count.
2179  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2180  IRBuilder<> Builder(BI);
2181  assert(SE->isLoopInvariant(IVLimit, L) &&
2182  "Computed iteration count is not loop invariant!");
2183  // Ensure that we generate the same type as IndVar, or a smaller integer
2184  // type. In the presence of null pointer values, we have an integer type
2185  // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2186  Type *LimitTy = IVCount->getType()->isPointerTy() ?
2187  IndVar->getType() : IVCount->getType();
2188  return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2189  }
2190 }
2191 
2192 /// This method rewrites the exit condition of the loop to be a canonical !=
2193 /// comparison against the incremented loop induction variable. This pass is
2194 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2195 /// determine a loop-invariant trip count of the loop, which is actually a much
2196 /// broader range than just linear tests.
2197 Value *IndVarSimplify::
2198 linearFunctionTestReplace(Loop *L,
2199  const SCEV *BackedgeTakenCount,
2200  PHINode *IndVar,
2201  SCEVExpander &Rewriter) {
2202  assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
2203 
2204  // Initialize CmpIndVar and IVCount to their preincremented values.
2205  Value *CmpIndVar = IndVar;
2206  const SCEV *IVCount = BackedgeTakenCount;
2207 
2208  assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2209 
2210  // If the exiting block is the same as the backedge block, we prefer to
2211  // compare against the post-incremented value, otherwise we must compare
2212  // against the preincremented value.
2213  if (L->getExitingBlock() == L->getLoopLatch()) {
2214  // Add one to the "backedge-taken" count to get the trip count.
2215  // This addition may overflow, which is valid as long as the comparison is
2216  // truncated to BackedgeTakenCount->getType().
2217  IVCount = SE->getAddExpr(BackedgeTakenCount,
2218  SE->getOne(BackedgeTakenCount->getType()));
2219  // The BackedgeTaken expression contains the number of times that the
2220  // backedge branches to the loop header. This is one less than the
2221  // number of times the loop executes, so use the incremented indvar.
2222  CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
2223  }
2224 
2225  Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
2226  assert(ExitCnt->getType()->isPointerTy() ==
2227  IndVar->getType()->isPointerTy() &&
2228  "genLoopLimit missed a cast");
2229 
2230  // Insert a new icmp_ne or icmp_eq instruction before the branch.
2231  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2233  if (L->contains(BI->getSuccessor(0)))
2234  P = ICmpInst::ICMP_NE;
2235  else
2236  P = ICmpInst::ICMP_EQ;
2237 
2238  DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2239  << " LHS:" << *CmpIndVar << '\n'
2240  << " op:\t"
2241  << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
2242  << " RHS:\t" << *ExitCnt << "\n"
2243  << " IVCount:\t" << *IVCount << "\n");
2244 
2245  IRBuilder<> Builder(BI);
2246 
2247  // The new loop exit condition should reuse the debug location of the
2248  // original loop exit condition.
2249  if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2250  Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2251 
2252  // LFTR can ignore IV overflow and truncate to the width of
2253  // BECount. This avoids materializing the add(zext(add)) expression.
2254  unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2255  unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2256  if (CmpIndVarSize > ExitCntSize) {
2257  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2258  const SCEV *ARStart = AR->getStart();
2259  const SCEV *ARStep = AR->getStepRecurrence(*SE);
2260  // For constant IVCount, avoid truncation.
2261  if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
2262  const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt();
2263  APInt Count = cast<SCEVConstant>(IVCount)->getAPInt();
2264  // Note that the post-inc value of BackedgeTakenCount may have overflowed
2265  // above such that IVCount is now zero.
2266  if (IVCount != BackedgeTakenCount && Count == 0) {
2267  Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
2268  ++Count;
2269  }
2270  else
2271  Count = Count.zext(CmpIndVarSize);
2272  APInt NewLimit;
2273  if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
2274  NewLimit = Start - Count;
2275  else
2276  NewLimit = Start + Count;
2277  ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
2278 
2279  DEBUG(dbgs() << " Widen RHS:\t" << *ExitCnt << "\n");
2280  } else {
2281  // We try to extend trip count first. If that doesn't work we truncate IV.
2282  // Zext(trunc(IV)) == IV implies equivalence of the following two:
2283  // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If
2284  // one of the two holds, extend the trip count, otherwise we truncate IV.
2285  bool Extended = false;
2286  const SCEV *IV = SE->getSCEV(CmpIndVar);
2287  const SCEV *ZExtTrunc =
2288  SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2289  ExitCnt->getType()),
2290  CmpIndVar->getType());
2291 
2292  if (ZExtTrunc == IV) {
2293  Extended = true;
2294  ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2295  "wide.trip.count");
2296  } else {
2297  const SCEV *SExtTrunc =
2298  SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2299  ExitCnt->getType()),
2300  CmpIndVar->getType());
2301  if (SExtTrunc == IV) {
2302  Extended = true;
2303  ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2304  "wide.trip.count");
2305  }
2306  }
2307 
2308  if (!Extended)
2309  CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2310  "lftr.wideiv");
2311  }
2312  }
2313  Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2314  Value *OrigCond = BI->getCondition();
2315  // It's tempting to use replaceAllUsesWith here to fully replace the old
2316  // comparison, but that's not immediately safe, since users of the old
2317  // comparison may not be dominated by the new comparison. Instead, just
2318  // update the branch to use the new comparison; in the common case this
2319  // will make old comparison dead.
2320  BI->setCondition(Cond);
2321  DeadInsts.push_back(OrigCond);
2322 
2323  ++NumLFTR;
2324  Changed = true;
2325  return Cond;
2326 }
2327 
2328 //===----------------------------------------------------------------------===//
2329 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2330 //===----------------------------------------------------------------------===//
2331 
2332 /// If there's a single exit block, sink any loop-invariant values that
2333 /// were defined in the preheader but not used inside the loop into the
2334 /// exit block to reduce register pressure in the loop.
2335 void IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2336  BasicBlock *ExitBlock = L->getExitBlock();
2337  if (!ExitBlock) return;
2338 
2339  BasicBlock *Preheader = L->getLoopPreheader();
2340  if (!Preheader) return;
2341 
2342  BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2343  BasicBlock::iterator I(Preheader->getTerminator());
2344  while (I != Preheader->begin()) {
2345  --I;
2346  // New instructions were inserted at the end of the preheader.
2347  if (isa<PHINode>(I))
2348  break;
2349 
2350  // Don't move instructions which might have side effects, since the side
2351  // effects need to complete before instructions inside the loop. Also don't
2352  // move instructions which might read memory, since the loop may modify
2353  // memory. Note that it's okay if the instruction might have undefined
2354  // behavior: LoopSimplify guarantees that the preheader dominates the exit
2355  // block.
2356  if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2357  continue;
2358 
2359  // Skip debug info intrinsics.
2360  if (isa<DbgInfoIntrinsic>(I))
2361  continue;
2362 
2363  // Skip eh pad instructions.
2364  if (I->isEHPad())
2365  continue;
2366 
2367  // Don't sink alloca: we never want to sink static alloca's out of the
2368  // entry block, and correctly sinking dynamic alloca's requires
2369  // checks for stacksave/stackrestore intrinsics.
2370  // FIXME: Refactor this check somehow?
2371  if (isa<AllocaInst>(I))
2372  continue;
2373 
2374  // Determine if there is a use in or before the loop (direct or
2375  // otherwise).
2376  bool UsedInLoop = false;
2377  for (Use &U : I->uses()) {
2378  Instruction *User = cast<Instruction>(U.getUser());
2379  BasicBlock *UseBB = User->getParent();
2380  if (PHINode *P = dyn_cast<PHINode>(User)) {
2381  unsigned i =
2382  PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2383  UseBB = P->getIncomingBlock(i);
2384  }
2385  if (UseBB == Preheader || L->contains(UseBB)) {
2386  UsedInLoop = true;
2387  break;
2388  }
2389  }
2390 
2391  // If there is, the def must remain in the preheader.
2392  if (UsedInLoop)
2393  continue;
2394 
2395  // Otherwise, sink it to the exit block.
2396  Instruction *ToMove = &*I;
2397  bool Done = false;
2398 
2399  if (I != Preheader->begin()) {
2400  // Skip debug info intrinsics.
2401  do {
2402  --I;
2403  } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2404 
2405  if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2406  Done = true;
2407  } else {
2408  Done = true;
2409  }
2410 
2411  ToMove->moveBefore(*ExitBlock, InsertPt);
2412  if (Done) break;
2413  InsertPt = ToMove->getIterator();
2414  }
2415 }
2416 
2417 //===----------------------------------------------------------------------===//
2418 // IndVarSimplify driver. Manage several subpasses of IV simplification.
2419 //===----------------------------------------------------------------------===//
2420 
2421 bool IndVarSimplify::run(Loop *L) {
2422  // We need (and expect!) the incoming loop to be in LCSSA.
2423  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2424  "LCSSA required to run indvars!");
2425 
2426  // If LoopSimplify form is not available, stay out of trouble. Some notes:
2427  // - LSR currently only supports LoopSimplify-form loops. Indvars'
2428  // canonicalization can be a pessimization without LSR to "clean up"
2429  // afterwards.
2430  // - We depend on having a preheader; in particular,
2431  // Loop::getCanonicalInductionVariable only supports loops with preheaders,
2432  // and we're in trouble if we can't find the induction variable even when
2433  // we've manually inserted one.
2434  // - LFTR relies on having a single backedge.
2435  if (!L->isLoopSimplifyForm())
2436  return false;
2437 
2438  // If there are any floating-point recurrences, attempt to
2439  // transform them to use integer recurrences.
2440  rewriteNonIntegerIVs(L);
2441 
2442  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2443 
2444  // Create a rewriter object which we'll use to transform the code with.
2445  SCEVExpander Rewriter(*SE, DL, "indvars");
2446 #ifndef NDEBUG
2447  Rewriter.setDebugType(DEBUG_TYPE);
2448 #endif
2449 
2450  // Eliminate redundant IV users.
2451  //
2452  // Simplification works best when run before other consumers of SCEV. We
2453  // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2454  // other expressions involving loop IVs have been evaluated. This helps SCEV
2455  // set no-wrap flags before normalizing sign/zero extension.
2456  Rewriter.disableCanonicalMode();
2457  simplifyAndExtend(L, Rewriter, LI);
2458 
2459  // Check to see if this loop has a computable loop-invariant execution count.
2460  // If so, this means that we can compute the final value of any expressions
2461  // that are recurrent in the loop, and substitute the exit values from the
2462  // loop into any instructions outside of the loop that use the final values of
2463  // the current expressions.
2464  //
2465  if (ReplaceExitValue != NeverRepl &&
2466  !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2467  rewriteLoopExitValues(L, Rewriter);
2468 
2469  // Eliminate redundant IV cycles.
2470  NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2471 
2472  // If we have a trip count expression, rewrite the loop's exit condition
2473  // using it. We can currently only handle loops with a single exit.
2474  if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter) &&
2475  needsLFTR(L, DT)) {
2476  PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
2477  if (IndVar) {
2478  // Check preconditions for proper SCEVExpander operation. SCEV does not
2479  // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
2480  // pass that uses the SCEVExpander must do it. This does not work well for
2481  // loop passes because SCEVExpander makes assumptions about all loops,
2482  // while LoopPassManager only forces the current loop to be simplified.
2483  //
2484  // FIXME: SCEV expansion has no way to bail out, so the caller must
2485  // explicitly check any assumptions made by SCEV. Brittle.
2486  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
2487  if (!AR || AR->getLoop()->getLoopPreheader())
2488  (void)linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
2489  Rewriter);
2490  }
2491  }
2492  // Clear the rewriter cache, because values that are in the rewriter's cache
2493  // can be deleted in the loop below, causing the AssertingVH in the cache to
2494  // trigger.
2495  Rewriter.clear();
2496 
2497  // Now that we're done iterating through lists, clean up any instructions
2498  // which are now dead.
2499  while (!DeadInsts.empty())
2500  if (Instruction *Inst =
2501  dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2503 
2504  // The Rewriter may not be used from this point on.
2505 
2506  // Loop-invariant instructions in the preheader that aren't used in the
2507  // loop may be sunk below the loop to reduce register pressure.
2508  sinkUnusedInvariants(L);
2509 
2510  // rewriteFirstIterationLoopExitValues does not rely on the computation of
2511  // trip count and therefore can further simplify exit values in addition to
2512  // rewriteLoopExitValues.
2513  rewriteFirstIterationLoopExitValues(L);
2514 
2515  // Clean up dead instructions.
2516  Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2517 
2518  // Check a post-condition.
2519  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2520  "Indvars did not preserve LCSSA!");
2521 
2522  // Verify that LFTR, and any other change have not interfered with SCEV's
2523  // ability to compute trip count.
2524 #ifndef NDEBUG
2525  if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2526  SE->forgetLoop(L);
2527  const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2528  if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2529  SE->getTypeSizeInBits(NewBECount->getType()))
2530  NewBECount = SE->getTruncateOrNoop(NewBECount,
2531  BackedgeTakenCount->getType());
2532  else
2533  BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2534  NewBECount->getType());
2535  assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
2536  }
2537 #endif
2538 
2539  return Changed;
2540 }
2541 
2544  LPMUpdater &) {
2545  Function *F = L.getHeader()->getParent();
2546  const DataLayout &DL = F->getParent()->getDataLayout();
2547 
2548  IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
2549  if (!IVS.run(&L))
2550  return PreservedAnalyses::all();
2551 
2552  auto PA = getLoopPassPreservedAnalyses();
2553  PA.preserveSet<CFGAnalyses>();
2554  return PA;
2555 }
2556 
2557 namespace {
2558 
2559 struct IndVarSimplifyLegacyPass : public LoopPass {
2560  static char ID; // Pass identification, replacement for typeid
2561 
2562  IndVarSimplifyLegacyPass() : LoopPass(ID) {
2564  }
2565 
2566  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2567  if (skipLoop(L))
2568  return false;
2569 
2570  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2571  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2572  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2573  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2574  auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
2575  auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2576  auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2577  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2578 
2579  IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
2580  return IVS.run(L);
2581  }
2582 
2583  void getAnalysisUsage(AnalysisUsage &AU) const override {
2584  AU.setPreservesCFG();
2586  }
2587 };
2588 
2589 } // end anonymous namespace
2590 
2592 
2593 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2594  "Induction Variable Simplification", false, false)
2596 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2597  "Induction Variable Simplification", false, false)
2598 
2600  return new IndVarSimplifyLegacyPass();
2601 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:69
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
bool hoistIVInc(Instruction *IncV, Instruction *InsertPos)
Utility for hoisting an IV increment.
Induction Variable Simplification
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1638
iterator_range< use_iterator > uses()
Definition: Value.h:356
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:157
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Value * getExactExistingExpansion(const SCEV *S, const Instruction *At, Loop *L)
Try to find existing LLVM IR value for S available at the point At.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT)
Perform a quick domtree based check for loop invariance assuming that V is used within the loop...
MutableArrayRef< T > makeMutableArrayRef(T &OneElt)
Construct a MutableArrayRef from a single element.
Definition: ArrayRef.h:503
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
bool isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:182
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:175
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:865
bool isZero() const
Return true if the expression is a constant zero.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:106
bool isHighCostExpansion(const SCEV *Expr, Loop *L, const Instruction *At=nullptr)
Return true for expressions that may incur non-trivial cost to evaluate at runtime.
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:859
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:728
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:869
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
void setDebugType(const char *s)
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
BasicBlock * getSuccessor(unsigned i) const
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1390
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
STATISTIC(NumFunctions, "Total number of functions")
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
Hexagon Common GEP
Value * getCondition() const
static ConstantRange makeAllowedICmpRegion(CmpInst::Predicate Pred, const ConstantRange &Other)
Produce the smallest range such that all values that may satisfy the given predicate with any value c...
This defines the Use class.
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
ReplaceExitVal
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:62
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", "Induction Variable Simplification", false, false) INITIALIZE_PASS_END(IndVarSimplifyLegacyPass
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1069
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:864
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
static Value * genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Help linearFunctionTestReplace by generating a value that holds the RHS of the new loop test...
Interface for visiting interesting IV users that are recognized but not simplified by this utility...
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr it the function does no...
Definition: BasicBlock.cpp:116
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
bool isSigned() const
Determine if this instruction is using a signed comparison.
Definition: InstrTypes.h:994
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:591
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:951
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:678
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:362
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:560
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:668
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:860
This file implements a class to represent arbitrary precision integral constant values and operations...
BlockT * getHeader() const
Definition: LoopInfo.h:100
Key
PAL metadata keys.
void findDbgValues(SmallVectorImpl< DbgValueInst *> &DbgValues, Value *V)
Finds the llvm.dbg.value intrinsics describing a value.
Definition: Local.cpp:1288
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:813
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:182
This node represents a polynomial recurrence on the trip count of the specified loop.
This instruction compares its operands according to the predicate given to the constructor.
static Instruction * getInsertPointForUses(Instruction *User, Value *Def, DominatorTree *DT, LoopInfo *LI)
Determine the insertion point for this user.
static cl::opt< bool > VerifyIndvars("verify-indvars", cl::Hidden, cl::desc("Verify the ScalarEvolution result after running indvars"))
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Examine each PHI in the given block and delete it if it is dead.
const SCEV * getSizeOfExpr(Type *IntTy, Type *AllocTy)
Return an expression for sizeof AllocTy that is type IntTy.
ConstantRange intersectWith(const ConstantRange &CR) const
Return the range that results from the intersection of this range with another range.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:152
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:430
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1387
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:292
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:140
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:128
Value * getOperand(unsigned i) const
Definition: User.h:154
void getUniqueExitBlocks(SmallVectorImpl< BasicBlock *> &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfo.cpp:393
void initializeIndVarSimplifyLegacyPassPass(PassRegistry &)
void clearInsertPoint()
Clear the current insertion point.
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:106
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt...
Definition: PatternMatch.h:260
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:200
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:281
static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE, SCEVExpander &Rewriter)
Return true if this loop&#39;s backedge taken count expression can be safely and cheaply expanded into an...
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Conditional or Unconditional Branch instruction.
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value *> &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:626
Value * getIncomingValueForBlock(const BasicBlock *BB) const
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
const Instruction & front() const
Definition: BasicBlock.h:264
#define H(x, y, z)
Definition: MD5.cpp:57
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:535
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:382
Represent the analysis usage information of a pass.
This file declares a class to represent arbitrary precision floating point values and provide a varie...
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:820
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:853
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl...
Definition: Operator.h:67
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
op_range operands()
Definition: User.h:222
self_iterator getIterator()
Definition: ilist_node.h:82
Class to represent integer types.
Definition: DerivedTypes.h:40
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:76
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:35
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1320
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:401
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
static unsigned getIncomingValueNumForOperand(unsigned i)
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static ICmpInst * getLoopTest(Loop *L)
Return the compare guarding the loop latch, or NULL for unrecognized tests.
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:868
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1384
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:56
signed greater than
Definition: InstrTypes.h:880
#define DEBUG_TYPE
const APFloat & getValueAPF() const
Definition: Constants.h:294
Value * CreateGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1227
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:857
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
unsigned getSCEVType() const
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:176
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
Type * getType() const
Return the LLVM type of this SCEV expression.
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:301
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty)
static ValueAsMetadata * get(Value *V)
Definition: Metadata.cpp:341
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:867
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:239
Module.h This file contains the declarations for the Module class.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:63
Provides information about what library functions are available for the current target.
This class represents a range of values.
Definition: ConstantRange.h:47
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
Definition: DataLayout.h:238
signed less than
Definition: InstrTypes.h:882
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:385
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:560
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT)
Return the loop header phi IFF IncV adds a loop invariant value to the phi.
bool isConditional() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:285
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:238
static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI)
This IV user cannot be widen.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:637
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
signed less or equal
Definition: InstrTypes.h:883
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:57
Class for arbitrary precision integers.
Definition: APInt.h:69
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:401
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property...
Definition: Operator.h:96
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
This class uses information about analyze scalars to rewrite expressions in canonical form...
Represents analyses that only rely on functions&#39; control flow.
Definition: PassManager.h:114
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:523
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:601
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
Virtual Register Rewriter
Definition: VirtRegMap.cpp:221
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:927
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:191
void emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:656
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:149
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
user_iterator_impl< User > user_iterator
Definition: Value.h:370
bool mayReadFromMemory() const
Return true if this instruction may read memory.
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:1023
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:861
static bool needsLFTR(Loop *L, DominatorTree *DT)
linearFunctionTestReplace policy.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>()) const
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:865
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetTransformInfo *TTI=nullptr)
replace congruent phis with their most canonical representative.
const SCEV * getBackedgeTakenCount(const Loop *L)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
This class represents a cast from signed integer to floating point.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:377
bool isOne() const
Return true if the expression is a constant one.
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:856
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:556
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:324
LLVM Value Representation.
Definition: Value.h:73
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:866
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond)
Return true if this IV has any uses other than the (soon to be rewritten) loop exit test...
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:88
#define DEBUG(X)
Definition: Debug.h:118
void disableCanonicalMode()
Disable the behavior of expanding expressions in canonical form rather than in a more literal form...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:538
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:414
A container for analyses that lazily runs them and caches their results.
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
This pass exposes codegen information to IR-level passes.
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
This header defines various interfaces for pass management in LLVM.
void setIncomingValue(unsigned i, Value *V)
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
static cl::opt< bool > DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization"))
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:858
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:50
bool use_empty() const
Definition: Value.h:328
Pass * createIndVarSimplifyPass()
signed greater or equal
Definition: InstrTypes.h:881
IntegerType * Int32Ty
const BasicBlock * getParent() const
Definition: Instruction.h:66
This class represents a constant integer value.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:837
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property...
Definition: Operator.h:90
user_iterator user_end()
Definition: Value.h:385
static PHINode * FindLoopCounter(Loop *L, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Find an affine IV in canonical form.