LLVM  9.0.0svn
IndVarSimplify.cpp
Go to the documentation of this file.
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 // 1. The exit condition for the loop is canonicalized to compare the
16 // induction value against the exit value. This turns loops like:
17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 // 2. Any use outside of the loop of an expression derived from the indvar
19 // is changed to compute the derived value outside of the loop, eliminating
20 // the dependence on the exit value of the induction variable. If the only
21 // purpose of the loop is to compute the exit value of some derived
22 // expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallSet.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/Pass.h"
72 #include "llvm/Support/Casting.h"
74 #include "llvm/Support/Compiler.h"
75 #include "llvm/Support/Debug.h"
79 #include "llvm/Transforms/Scalar.h"
84 #include <cassert>
85 #include <cstdint>
86 #include <utility>
87 
88 using namespace llvm;
89 
90 #define DEBUG_TYPE "indvars"
91 
92 STATISTIC(NumWidened , "Number of indvars widened");
93 STATISTIC(NumReplaced , "Number of exit values replaced");
94 STATISTIC(NumLFTR , "Number of loop exit tests replaced");
95 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
96 STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
97 
98 // Trip count verification can be enabled by default under NDEBUG if we
99 // implement a strong expression equivalence checker in SCEV. Until then, we
100 // use the verify-indvars flag, which may assert in some cases.
102  "verify-indvars", cl::Hidden,
103  cl::desc("Verify the ScalarEvolution result after running indvars"));
104 
106 
108  "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
109  cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
110  cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
111  clEnumValN(OnlyCheapRepl, "cheap",
112  "only replace exit value when the cost is cheap"),
113  clEnumValN(NoHardUse, "noharduse",
114  "only replace exit values when loop def likely dead"),
115  clEnumValN(AlwaysRepl, "always",
116  "always replace exit value whenever possible")));
117 
119  "indvars-post-increment-ranges", cl::Hidden,
120  cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
121  cl::init(true));
122 
123 static cl::opt<bool>
124 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
125  cl::desc("Disable Linear Function Test Replace optimization"));
126 
127 namespace {
128 
129 struct RewritePhi;
130 
131 class IndVarSimplify {
132  LoopInfo *LI;
133  ScalarEvolution *SE;
134  DominatorTree *DT;
135  const DataLayout &DL;
136  TargetLibraryInfo *TLI;
137  const TargetTransformInfo *TTI;
138 
140 
141  bool isValidRewrite(Value *FromVal, Value *ToVal);
142 
143  bool handleFloatingPointIV(Loop *L, PHINode *PH);
144  bool rewriteNonIntegerIVs(Loop *L);
145 
146  bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
147  bool optimizeLoopExits(Loop *L);
148 
149  bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
150  bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
151  bool rewriteFirstIterationLoopExitValues(Loop *L);
152  bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
153 
154  bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
155  const SCEV *ExitCount,
156  PHINode *IndVar, SCEVExpander &Rewriter);
157 
158  bool sinkUnusedInvariants(Loop *L);
159 
160 public:
161  IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
162  const DataLayout &DL, TargetLibraryInfo *TLI,
163  TargetTransformInfo *TTI)
164  : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
165 
166  bool run(Loop *L);
167 };
168 
169 } // end anonymous namespace
170 
171 /// Return true if the SCEV expansion generated by the rewriter can replace the
172 /// original value. SCEV guarantees that it produces the same value, but the way
173 /// it is produced may be illegal IR. Ideally, this function will only be
174 /// called for verification.
175 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
176  // If an SCEV expression subsumed multiple pointers, its expansion could
177  // reassociate the GEP changing the base pointer. This is illegal because the
178  // final address produced by a GEP chain must be inbounds relative to its
179  // underlying object. Otherwise basic alias analysis, among other things,
180  // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
181  // producing an expression involving multiple pointers. Until then, we must
182  // bail out here.
183  //
184  // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
185  // because it understands lcssa phis while SCEV does not.
186  Value *FromPtr = FromVal;
187  Value *ToPtr = ToVal;
188  if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
189  FromPtr = GEP->getPointerOperand();
190  }
191  if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
192  ToPtr = GEP->getPointerOperand();
193  }
194  if (FromPtr != FromVal || ToPtr != ToVal) {
195  // Quickly check the common case
196  if (FromPtr == ToPtr)
197  return true;
198 
199  // SCEV may have rewritten an expression that produces the GEP's pointer
200  // operand. That's ok as long as the pointer operand has the same base
201  // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
202  // base of a recurrence. This handles the case in which SCEV expansion
203  // converts a pointer type recurrence into a nonrecurrent pointer base
204  // indexed by an integer recurrence.
205 
206  // If the GEP base pointer is a vector of pointers, abort.
207  if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
208  return false;
209 
210  const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
211  const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
212  if (FromBase == ToBase)
213  return true;
214 
215  LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
216  << " != " << *ToBase << "\n");
217 
218  return false;
219  }
220  return true;
221 }
222 
223 /// Determine the insertion point for this user. By default, insert immediately
224 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
225 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
226 /// common dominator for the incoming blocks. A nullptr can be returned if no
227 /// viable location is found: it may happen if User is a PHI and Def only comes
228 /// to this PHI from unreachable blocks.
230  DominatorTree *DT, LoopInfo *LI) {
231  PHINode *PHI = dyn_cast<PHINode>(User);
232  if (!PHI)
233  return User;
234 
235  Instruction *InsertPt = nullptr;
236  for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
237  if (PHI->getIncomingValue(i) != Def)
238  continue;
239 
240  BasicBlock *InsertBB = PHI->getIncomingBlock(i);
241 
242  if (!DT->isReachableFromEntry(InsertBB))
243  continue;
244 
245  if (!InsertPt) {
246  InsertPt = InsertBB->getTerminator();
247  continue;
248  }
249  InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
250  InsertPt = InsertBB->getTerminator();
251  }
252 
253  // If we have skipped all inputs, it means that Def only comes to Phi from
254  // unreachable blocks.
255  if (!InsertPt)
256  return nullptr;
257 
258  auto *DefI = dyn_cast<Instruction>(Def);
259  if (!DefI)
260  return InsertPt;
261 
262  assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
263 
264  auto *L = LI->getLoopFor(DefI->getParent());
265  assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
266 
267  for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
268  if (LI->getLoopFor(DTN->getBlock()) == L)
269  return DTN->getBlock()->getTerminator();
270 
271  llvm_unreachable("DefI dominates InsertPt!");
272 }
273 
274 //===----------------------------------------------------------------------===//
275 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
276 //===----------------------------------------------------------------------===//
277 
278 /// Convert APF to an integer, if possible.
279 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
280  bool isExact = false;
281  // See if we can convert this to an int64_t
282  uint64_t UIntVal;
283  if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
284  APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
285  !isExact)
286  return false;
287  IntVal = UIntVal;
288  return true;
289 }
290 
291 /// If the loop has floating induction variable then insert corresponding
292 /// integer induction variable if possible.
293 /// For example,
294 /// for(double i = 0; i < 10000; ++i)
295 /// bar(i)
296 /// is converted into
297 /// for(int i = 0; i < 10000; ++i)
298 /// bar((double)i);
299 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
300  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
301  unsigned BackEdge = IncomingEdge^1;
302 
303  // Check incoming value.
304  auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
305 
306  int64_t InitValue;
307  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
308  return false;
309 
310  // Check IV increment. Reject this PN if increment operation is not
311  // an add or increment value can not be represented by an integer.
312  auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
313  if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
314 
315  // If this is not an add of the PHI with a constantfp, or if the constant fp
316  // is not an integer, bail out.
317  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
318  int64_t IncValue;
319  if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
320  !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
321  return false;
322 
323  // Check Incr uses. One user is PN and the other user is an exit condition
324  // used by the conditional terminator.
325  Value::user_iterator IncrUse = Incr->user_begin();
326  Instruction *U1 = cast<Instruction>(*IncrUse++);
327  if (IncrUse == Incr->user_end()) return false;
328  Instruction *U2 = cast<Instruction>(*IncrUse++);
329  if (IncrUse != Incr->user_end()) return false;
330 
331  // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
332  // only used by a branch, we can't transform it.
334  if (!Compare)
335  Compare = dyn_cast<FCmpInst>(U2);
336  if (!Compare || !Compare->hasOneUse() ||
337  !isa<BranchInst>(Compare->user_back()))
338  return false;
339 
340  BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
341 
342  // We need to verify that the branch actually controls the iteration count
343  // of the loop. If not, the new IV can overflow and no one will notice.
344  // The branch block must be in the loop and one of the successors must be out
345  // of the loop.
346  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
347  if (!L->contains(TheBr->getParent()) ||
348  (L->contains(TheBr->getSuccessor(0)) &&
349  L->contains(TheBr->getSuccessor(1))))
350  return false;
351 
352  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
353  // transform it.
354  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
355  int64_t ExitValue;
356  if (ExitValueVal == nullptr ||
357  !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
358  return false;
359 
360  // Find new predicate for integer comparison.
362  switch (Compare->getPredicate()) {
363  default: return false; // Unknown comparison.
364  case CmpInst::FCMP_OEQ:
365  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
366  case CmpInst::FCMP_ONE:
367  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
368  case CmpInst::FCMP_OGT:
369  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
370  case CmpInst::FCMP_OGE:
371  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
372  case CmpInst::FCMP_OLT:
373  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
374  case CmpInst::FCMP_OLE:
375  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
376  }
377 
378  // We convert the floating point induction variable to a signed i32 value if
379  // we can. This is only safe if the comparison will not overflow in a way
380  // that won't be trapped by the integer equivalent operations. Check for this
381  // now.
382  // TODO: We could use i64 if it is native and the range requires it.
383 
384  // The start/stride/exit values must all fit in signed i32.
385  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
386  return false;
387 
388  // If not actually striding (add x, 0.0), avoid touching the code.
389  if (IncValue == 0)
390  return false;
391 
392  // Positive and negative strides have different safety conditions.
393  if (IncValue > 0) {
394  // If we have a positive stride, we require the init to be less than the
395  // exit value.
396  if (InitValue >= ExitValue)
397  return false;
398 
399  uint32_t Range = uint32_t(ExitValue-InitValue);
400  // Check for infinite loop, either:
401  // while (i <= Exit) or until (i > Exit)
402  if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
403  if (++Range == 0) return false; // Range overflows.
404  }
405 
406  unsigned Leftover = Range % uint32_t(IncValue);
407 
408  // If this is an equality comparison, we require that the strided value
409  // exactly land on the exit value, otherwise the IV condition will wrap
410  // around and do things the fp IV wouldn't.
411  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
412  Leftover != 0)
413  return false;
414 
415  // If the stride would wrap around the i32 before exiting, we can't
416  // transform the IV.
417  if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
418  return false;
419  } else {
420  // If we have a negative stride, we require the init to be greater than the
421  // exit value.
422  if (InitValue <= ExitValue)
423  return false;
424 
425  uint32_t Range = uint32_t(InitValue-ExitValue);
426  // Check for infinite loop, either:
427  // while (i >= Exit) or until (i < Exit)
428  if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
429  if (++Range == 0) return false; // Range overflows.
430  }
431 
432  unsigned Leftover = Range % uint32_t(-IncValue);
433 
434  // If this is an equality comparison, we require that the strided value
435  // exactly land on the exit value, otherwise the IV condition will wrap
436  // around and do things the fp IV wouldn't.
437  if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
438  Leftover != 0)
439  return false;
440 
441  // If the stride would wrap around the i32 before exiting, we can't
442  // transform the IV.
443  if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
444  return false;
445  }
446 
448 
449  // Insert new integer induction variable.
450  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
451  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
452  PN->getIncomingBlock(IncomingEdge));
453 
454  Value *NewAdd =
455  BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
456  Incr->getName()+".int", Incr);
457  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
458 
459  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
460  ConstantInt::get(Int32Ty, ExitValue),
461  Compare->getName());
462 
463  // In the following deletions, PN may become dead and may be deleted.
464  // Use a WeakTrackingVH to observe whether this happens.
465  WeakTrackingVH WeakPH = PN;
466 
467  // Delete the old floating point exit comparison. The branch starts using the
468  // new comparison.
469  NewCompare->takeName(Compare);
470  Compare->replaceAllUsesWith(NewCompare);
472 
473  // Delete the old floating point increment.
474  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
476 
477  // If the FP induction variable still has uses, this is because something else
478  // in the loop uses its value. In order to canonicalize the induction
479  // variable, we chose to eliminate the IV and rewrite it in terms of an
480  // int->fp cast.
481  //
482  // We give preference to sitofp over uitofp because it is faster on most
483  // platforms.
484  if (WeakPH) {
485  Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
486  &*PN->getParent()->getFirstInsertionPt());
487  PN->replaceAllUsesWith(Conv);
489  }
490  return true;
491 }
492 
493 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
494  // First step. Check to see if there are any floating-point recurrences.
495  // If there are, change them into integer recurrences, permitting analysis by
496  // the SCEV routines.
497  BasicBlock *Header = L->getHeader();
498 
500  for (PHINode &PN : Header->phis())
501  PHIs.push_back(&PN);
502 
503  bool Changed = false;
504  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
505  if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
506  Changed |= handleFloatingPointIV(L, PN);
507 
508  // If the loop previously had floating-point IV, ScalarEvolution
509  // may not have been able to compute a trip count. Now that we've done some
510  // re-writing, the trip count may be computable.
511  if (Changed)
512  SE->forgetLoop(L);
513  return Changed;
514 }
515 
516 namespace {
517 
518 // Collect information about PHI nodes which can be transformed in
519 // rewriteLoopExitValues.
520 struct RewritePhi {
521  PHINode *PN;
522 
523  // Ith incoming value.
524  unsigned Ith;
525 
526  // Exit value after expansion.
527  Value *Val;
528 
529  // High Cost when expansion.
530  bool HighCost;
531 
532  RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
533  : PN(P), Ith(I), Val(V), HighCost(H) {}
534 };
535 
536 } // end anonymous namespace
537 
538 //===----------------------------------------------------------------------===//
539 // rewriteLoopExitValues - Optimize IV users outside the loop.
540 // As a side effect, reduces the amount of IV processing within the loop.
541 //===----------------------------------------------------------------------===//
542 
543 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
546  Visited.insert(I);
547  WorkList.push_back(I);
548  while (!WorkList.empty()) {
549  const Instruction *Curr = WorkList.pop_back_val();
550  // This use is outside the loop, nothing to do.
551  if (!L->contains(Curr))
552  continue;
553  // Do we assume it is a "hard" use which will not be eliminated easily?
554  if (Curr->mayHaveSideEffects())
555  return true;
556  // Otherwise, add all its users to worklist.
557  for (auto U : Curr->users()) {
558  auto *UI = cast<Instruction>(U);
559  if (Visited.insert(UI).second)
560  WorkList.push_back(UI);
561  }
562  }
563  return false;
564 }
565 
566 /// Check to see if this loop has a computable loop-invariant execution count.
567 /// If so, this means that we can compute the final value of any expressions
568 /// that are recurrent in the loop, and substitute the exit values from the loop
569 /// into any instructions outside of the loop that use the final values of the
570 /// current expressions.
571 ///
572 /// This is mostly redundant with the regular IndVarSimplify activities that
573 /// happen later, except that it's more powerful in some cases, because it's
574 /// able to brute-force evaluate arbitrary instructions as long as they have
575 /// constant operands at the beginning of the loop.
576 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
577  // Check a pre-condition.
578  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
579  "Indvars did not preserve LCSSA!");
580 
581  SmallVector<BasicBlock*, 8> ExitBlocks;
582  L->getUniqueExitBlocks(ExitBlocks);
583 
584  SmallVector<RewritePhi, 8> RewritePhiSet;
585  // Find all values that are computed inside the loop, but used outside of it.
586  // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
587  // the exit blocks of the loop to find them.
588  for (BasicBlock *ExitBB : ExitBlocks) {
589  // If there are no PHI nodes in this exit block, then no values defined
590  // inside the loop are used on this path, skip it.
591  PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
592  if (!PN) continue;
593 
594  unsigned NumPreds = PN->getNumIncomingValues();
595 
596  // Iterate over all of the PHI nodes.
597  BasicBlock::iterator BBI = ExitBB->begin();
598  while ((PN = dyn_cast<PHINode>(BBI++))) {
599  if (PN->use_empty())
600  continue; // dead use, don't replace it
601 
602  if (!SE->isSCEVable(PN->getType()))
603  continue;
604 
605  // It's necessary to tell ScalarEvolution about this explicitly so that
606  // it can walk the def-use list and forget all SCEVs, as it may not be
607  // watching the PHI itself. Once the new exit value is in place, there
608  // may not be a def-use connection between the loop and every instruction
609  // which got a SCEVAddRecExpr for that loop.
610  SE->forgetValue(PN);
611 
612  // Iterate over all of the values in all the PHI nodes.
613  for (unsigned i = 0; i != NumPreds; ++i) {
614  // If the value being merged in is not integer or is not defined
615  // in the loop, skip it.
616  Value *InVal = PN->getIncomingValue(i);
617  if (!isa<Instruction>(InVal))
618  continue;
619 
620  // If this pred is for a subloop, not L itself, skip it.
621  if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
622  continue; // The Block is in a subloop, skip it.
623 
624  // Check that InVal is defined in the loop.
625  Instruction *Inst = cast<Instruction>(InVal);
626  if (!L->contains(Inst))
627  continue;
628 
629  // Okay, this instruction has a user outside of the current loop
630  // and varies predictably *inside* the loop. Evaluate the value it
631  // contains when the loop exits, if possible.
632  const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
633  if (!SE->isLoopInvariant(ExitValue, L) ||
634  !isSafeToExpand(ExitValue, *SE))
635  continue;
636 
637  // Computing the value outside of the loop brings no benefit if it is
638  // definitely used inside the loop in a way which can not be optimized
639  // away. Avoid doing so unless we know we have a value which computes
640  // the ExitValue already. TODO: This should be merged into SCEV
641  // expander to leverage its knowledge of existing expressions.
642  if (ReplaceExitValue != AlwaysRepl &&
643  !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
644  hasHardUserWithinLoop(L, Inst))
645  continue;
646 
647  bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
648  Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
649 
650  LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
651  << '\n'
652  << " LoopVal = " << *Inst << "\n");
653 
654  if (!isValidRewrite(Inst, ExitVal)) {
655  DeadInsts.push_back(ExitVal);
656  continue;
657  }
658 
659 #ifndef NDEBUG
660  // If we reuse an instruction from a loop which is neither L nor one of
661  // its containing loops, we end up breaking LCSSA form for this loop by
662  // creating a new use of its instruction.
663  if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
664  if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
665  if (EVL != L)
666  assert(EVL->contains(L) && "LCSSA breach detected!");
667 #endif
668 
669  // Collect all the candidate PHINodes to be rewritten.
670  RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
671  }
672  }
673  }
674 
675  bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
676 
677  bool Changed = false;
678  // Transformation.
679  for (const RewritePhi &Phi : RewritePhiSet) {
680  PHINode *PN = Phi.PN;
681  Value *ExitVal = Phi.Val;
682 
683  // Only do the rewrite when the ExitValue can be expanded cheaply.
684  // If LoopCanBeDel is true, rewrite exit value aggressively.
685  if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
686  DeadInsts.push_back(ExitVal);
687  continue;
688  }
689 
690  Changed = true;
691  ++NumReplaced;
692  Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
693  PN->setIncomingValue(Phi.Ith, ExitVal);
694 
695  // If this instruction is dead now, delete it. Don't do it now to avoid
696  // invalidating iterators.
697  if (isInstructionTriviallyDead(Inst, TLI))
698  DeadInsts.push_back(Inst);
699 
700  // Replace PN with ExitVal if that is legal and does not break LCSSA.
701  if (PN->getNumIncomingValues() == 1 &&
702  LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
703  PN->replaceAllUsesWith(ExitVal);
704  PN->eraseFromParent();
705  }
706  }
707 
708  // The insertion point instruction may have been deleted; clear it out
709  // so that the rewriter doesn't trip over it later.
710  Rewriter.clearInsertPoint();
711  return Changed;
712 }
713 
714 //===---------------------------------------------------------------------===//
715 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
716 // they will exit at the first iteration.
717 //===---------------------------------------------------------------------===//
718 
719 /// Check to see if this loop has loop invariant conditions which lead to loop
720 /// exits. If so, we know that if the exit path is taken, it is at the first
721 /// loop iteration. This lets us predict exit values of PHI nodes that live in
722 /// loop header.
723 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
724  // Verify the input to the pass is already in LCSSA form.
725  assert(L->isLCSSAForm(*DT));
726 
727  SmallVector<BasicBlock *, 8> ExitBlocks;
728  L->getUniqueExitBlocks(ExitBlocks);
729 
730  bool MadeAnyChanges = false;
731  for (auto *ExitBB : ExitBlocks) {
732  // If there are no more PHI nodes in this exit block, then no more
733  // values defined inside the loop are used on this path.
734  for (PHINode &PN : ExitBB->phis()) {
735  for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
736  IncomingValIdx != E; ++IncomingValIdx) {
737  auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
738 
739  // Can we prove that the exit must run on the first iteration if it
740  // runs at all? (i.e. early exits are fine for our purposes, but
741  // traces which lead to this exit being taken on the 2nd iteration
742  // aren't.) Note that this is about whether the exit branch is
743  // executed, not about whether it is taken.
744  if (!L->getLoopLatch() ||
745  !DT->dominates(IncomingBB, L->getLoopLatch()))
746  continue;
747 
748  // Get condition that leads to the exit path.
749  auto *TermInst = IncomingBB->getTerminator();
750 
751  Value *Cond = nullptr;
752  if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
753  // Must be a conditional branch, otherwise the block
754  // should not be in the loop.
755  Cond = BI->getCondition();
756  } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
757  Cond = SI->getCondition();
758  else
759  continue;
760 
761  if (!L->isLoopInvariant(Cond))
762  continue;
763 
764  auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
765 
766  // Only deal with PHIs in the loop header.
767  if (!ExitVal || ExitVal->getParent() != L->getHeader())
768  continue;
769 
770  // If ExitVal is a PHI on the loop header, then we know its
771  // value along this exit because the exit can only be taken
772  // on the first iteration.
773  auto *LoopPreheader = L->getLoopPreheader();
774  assert(LoopPreheader && "Invalid loop");
775  int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
776  if (PreheaderIdx != -1) {
777  assert(ExitVal->getParent() == L->getHeader() &&
778  "ExitVal must be in loop header");
779  MadeAnyChanges = true;
780  PN.setIncomingValue(IncomingValIdx,
781  ExitVal->getIncomingValue(PreheaderIdx));
782  }
783  }
784  }
785  }
786  return MadeAnyChanges;
787 }
788 
789 /// Check whether it is possible to delete the loop after rewriting exit
790 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
791 /// aggressively.
792 bool IndVarSimplify::canLoopBeDeleted(
793  Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
794  BasicBlock *Preheader = L->getLoopPreheader();
795  // If there is no preheader, the loop will not be deleted.
796  if (!Preheader)
797  return false;
798 
799  // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
800  // We obviate multiple ExitingBlocks case for simplicity.
801  // TODO: If we see testcase with multiple ExitingBlocks can be deleted
802  // after exit value rewriting, we can enhance the logic here.
803  SmallVector<BasicBlock *, 4> ExitingBlocks;
804  L->getExitingBlocks(ExitingBlocks);
805  SmallVector<BasicBlock *, 8> ExitBlocks;
806  L->getUniqueExitBlocks(ExitBlocks);
807  if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
808  return false;
809 
810  BasicBlock *ExitBlock = ExitBlocks[0];
811  BasicBlock::iterator BI = ExitBlock->begin();
812  while (PHINode *P = dyn_cast<PHINode>(BI)) {
813  Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
814 
815  // If the Incoming value of P is found in RewritePhiSet, we know it
816  // could be rewritten to use a loop invariant value in transformation
817  // phase later. Skip it in the loop invariant check below.
818  bool found = false;
819  for (const RewritePhi &Phi : RewritePhiSet) {
820  unsigned i = Phi.Ith;
821  if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
822  found = true;
823  break;
824  }
825  }
826 
827  Instruction *I;
828  if (!found && (I = dyn_cast<Instruction>(Incoming)))
829  if (!L->hasLoopInvariantOperands(I))
830  return false;
831 
832  ++BI;
833  }
834 
835  for (auto *BB : L->blocks())
836  if (llvm::any_of(*BB, [](Instruction &I) {
837  return I.mayHaveSideEffects();
838  }))
839  return false;
840 
841  return true;
842 }
843 
844 //===----------------------------------------------------------------------===//
845 // IV Widening - Extend the width of an IV to cover its widest uses.
846 //===----------------------------------------------------------------------===//
847 
848 namespace {
849 
850 // Collect information about induction variables that are used by sign/zero
851 // extend operations. This information is recorded by CollectExtend and provides
852 // the input to WidenIV.
853 struct WideIVInfo {
854  PHINode *NarrowIV = nullptr;
855 
856  // Widest integer type created [sz]ext
857  Type *WidestNativeType = nullptr;
858 
859  // Was a sext user seen before a zext?
860  bool IsSigned = false;
861 };
862 
863 } // end anonymous namespace
864 
865 /// Update information about the induction variable that is extended by this
866 /// sign or zero extend operation. This is used to determine the final width of
867 /// the IV before actually widening it.
868 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
869  const TargetTransformInfo *TTI) {
870  bool IsSigned = Cast->getOpcode() == Instruction::SExt;
871  if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
872  return;
873 
874  Type *Ty = Cast->getType();
875  uint64_t Width = SE->getTypeSizeInBits(Ty);
876  if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
877  return;
878 
879  // Check that `Cast` actually extends the induction variable (we rely on this
880  // later). This takes care of cases where `Cast` is extending a truncation of
881  // the narrow induction variable, and thus can end up being narrower than the
882  // "narrow" induction variable.
883  uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
884  if (NarrowIVWidth >= Width)
885  return;
886 
887  // Cast is either an sext or zext up to this point.
888  // We should not widen an indvar if arithmetics on the wider indvar are more
889  // expensive than those on the narrower indvar. We check only the cost of ADD
890  // because at least an ADD is required to increment the induction variable. We
891  // could compute more comprehensively the cost of all instructions on the
892  // induction variable when necessary.
893  if (TTI &&
896  Cast->getOperand(0)->getType())) {
897  return;
898  }
899 
900  if (!WI.WidestNativeType) {
901  WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
902  WI.IsSigned = IsSigned;
903  return;
904  }
905 
906  // We extend the IV to satisfy the sign of its first user, arbitrarily.
907  if (WI.IsSigned != IsSigned)
908  return;
909 
910  if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
911  WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
912 }
913 
914 namespace {
915 
916 /// Record a link in the Narrow IV def-use chain along with the WideIV that
917 /// computes the same value as the Narrow IV def. This avoids caching Use*
918 /// pointers.
919 struct NarrowIVDefUse {
920  Instruction *NarrowDef = nullptr;
921  Instruction *NarrowUse = nullptr;
922  Instruction *WideDef = nullptr;
923 
924  // True if the narrow def is never negative. Tracking this information lets
925  // us use a sign extension instead of a zero extension or vice versa, when
926  // profitable and legal.
927  bool NeverNegative = false;
928 
929  NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
930  bool NeverNegative)
931  : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
932  NeverNegative(NeverNegative) {}
933 };
934 
935 /// The goal of this transform is to remove sign and zero extends without
936 /// creating any new induction variables. To do this, it creates a new phi of
937 /// the wider type and redirects all users, either removing extends or inserting
938 /// truncs whenever we stop propagating the type.
939 class WidenIV {
940  // Parameters
941  PHINode *OrigPhi;
942  Type *WideType;
943 
944  // Context
945  LoopInfo *LI;
946  Loop *L;
947  ScalarEvolution *SE;
948  DominatorTree *DT;
949 
950  // Does the module have any calls to the llvm.experimental.guard intrinsic
951  // at all? If not we can avoid scanning instructions looking for guards.
952  bool HasGuards;
953 
954  // Result
955  PHINode *WidePhi = nullptr;
956  Instruction *WideInc = nullptr;
957  const SCEV *WideIncExpr = nullptr;
959 
961  SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
962 
963  enum ExtendKind { ZeroExtended, SignExtended, Unknown };
964 
965  // A map tracking the kind of extension used to widen each narrow IV
966  // and narrow IV user.
967  // Key: pointer to a narrow IV or IV user.
968  // Value: the kind of extension used to widen this Instruction.
969  DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
970 
971  using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
972 
973  // A map with control-dependent ranges for post increment IV uses. The key is
974  // a pair of IV def and a use of this def denoting the context. The value is
975  // a ConstantRange representing possible values of the def at the given
976  // context.
977  DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
978 
979  Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
980  Instruction *UseI) {
981  DefUserPair Key(Def, UseI);
982  auto It = PostIncRangeInfos.find(Key);
983  return It == PostIncRangeInfos.end()
985  : Optional<ConstantRange>(It->second);
986  }
987 
988  void calculatePostIncRanges(PHINode *OrigPhi);
989  void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
990 
991  void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
992  DefUserPair Key(Def, UseI);
993  auto It = PostIncRangeInfos.find(Key);
994  if (It == PostIncRangeInfos.end())
995  PostIncRangeInfos.insert({Key, R});
996  else
997  It->second = R.intersectWith(It->second);
998  }
999 
1000 public:
1001  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1003  bool HasGuards)
1004  : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1005  L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1006  HasGuards(HasGuards), DeadInsts(DI) {
1007  assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1008  ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
1009  }
1010 
1011  PHINode *createWideIV(SCEVExpander &Rewriter);
1012 
1013 protected:
1014  Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1015  Instruction *Use);
1016 
1017  Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1018  Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1019  const SCEVAddRecExpr *WideAR);
1020  Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1021 
1022  ExtendKind getExtendKind(Instruction *I);
1023 
1024  using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1025 
1026  WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1027 
1028  WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1029 
1030  const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1031  unsigned OpCode) const;
1032 
1033  Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1034 
1035  bool widenLoopCompare(NarrowIVDefUse DU);
1036  bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1037  void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1038 
1039  void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1040 };
1041 
1042 } // end anonymous namespace
1043 
1044 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1045  bool IsSigned, Instruction *Use) {
1046  // Set the debug location and conservative insertion point.
1047  IRBuilder<> Builder(Use);
1048  // Hoist the insertion point into loop preheaders as far as possible.
1049  for (const Loop *L = LI->getLoopFor(Use->getParent());
1050  L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1051  L = L->getParentLoop())
1053 
1054  return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1055  Builder.CreateZExt(NarrowOper, WideType);
1056 }
1057 
1058 /// Instantiate a wide operation to replace a narrow operation. This only needs
1059 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1060 /// 0 for any operation we decide not to clone.
1061 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1062  const SCEVAddRecExpr *WideAR) {
1063  unsigned Opcode = DU.NarrowUse->getOpcode();
1064  switch (Opcode) {
1065  default:
1066  return nullptr;
1067  case Instruction::Add:
1068  case Instruction::Mul:
1069  case Instruction::UDiv:
1070  case Instruction::Sub:
1071  return cloneArithmeticIVUser(DU, WideAR);
1072 
1073  case Instruction::And:
1074  case Instruction::Or:
1075  case Instruction::Xor:
1076  case Instruction::Shl:
1077  case Instruction::LShr:
1078  case Instruction::AShr:
1079  return cloneBitwiseIVUser(DU);
1080  }
1081 }
1082 
1083 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1084  Instruction *NarrowUse = DU.NarrowUse;
1085  Instruction *NarrowDef = DU.NarrowDef;
1086  Instruction *WideDef = DU.WideDef;
1087 
1088  LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1089 
1090  // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1091  // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1092  // invariant and will be folded or hoisted. If it actually comes from a
1093  // widened IV, it should be removed during a future call to widenIVUse.
1094  bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1095  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1096  ? WideDef
1097  : createExtendInst(NarrowUse->getOperand(0), WideType,
1098  IsSigned, NarrowUse);
1099  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1100  ? WideDef
1101  : createExtendInst(NarrowUse->getOperand(1), WideType,
1102  IsSigned, NarrowUse);
1103 
1104  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1105  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1106  NarrowBO->getName());
1107  IRBuilder<> Builder(NarrowUse);
1108  Builder.Insert(WideBO);
1109  WideBO->copyIRFlags(NarrowBO);
1110  return WideBO;
1111 }
1112 
1113 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1114  const SCEVAddRecExpr *WideAR) {
1115  Instruction *NarrowUse = DU.NarrowUse;
1116  Instruction *NarrowDef = DU.NarrowDef;
1117  Instruction *WideDef = DU.WideDef;
1118 
1119  LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1120 
1121  unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1122 
1123  // We're trying to find X such that
1124  //
1125  // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1126  //
1127  // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1128  // and check using SCEV if any of them are correct.
1129 
1130  // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1131  // correct solution to X.
1132  auto GuessNonIVOperand = [&](bool SignExt) {
1133  const SCEV *WideLHS;
1134  const SCEV *WideRHS;
1135 
1136  auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1137  if (SignExt)
1138  return SE->getSignExtendExpr(S, Ty);
1139  return SE->getZeroExtendExpr(S, Ty);
1140  };
1141 
1142  if (IVOpIdx == 0) {
1143  WideLHS = SE->getSCEV(WideDef);
1144  const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1145  WideRHS = GetExtend(NarrowRHS, WideType);
1146  } else {
1147  const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1148  WideLHS = GetExtend(NarrowLHS, WideType);
1149  WideRHS = SE->getSCEV(WideDef);
1150  }
1151 
1152  // WideUse is "WideDef `op.wide` X" as described in the comment.
1153  const SCEV *WideUse = nullptr;
1154 
1155  switch (NarrowUse->getOpcode()) {
1156  default:
1157  llvm_unreachable("No other possibility!");
1158 
1159  case Instruction::Add:
1160  WideUse = SE->getAddExpr(WideLHS, WideRHS);
1161  break;
1162 
1163  case Instruction::Mul:
1164  WideUse = SE->getMulExpr(WideLHS, WideRHS);
1165  break;
1166 
1167  case Instruction::UDiv:
1168  WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1169  break;
1170 
1171  case Instruction::Sub:
1172  WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1173  break;
1174  }
1175 
1176  return WideUse == WideAR;
1177  };
1178 
1179  bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1180  if (!GuessNonIVOperand(SignExtend)) {
1181  SignExtend = !SignExtend;
1182  if (!GuessNonIVOperand(SignExtend))
1183  return nullptr;
1184  }
1185 
1186  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1187  ? WideDef
1188  : createExtendInst(NarrowUse->getOperand(0), WideType,
1189  SignExtend, NarrowUse);
1190  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1191  ? WideDef
1192  : createExtendInst(NarrowUse->getOperand(1), WideType,
1193  SignExtend, NarrowUse);
1194 
1195  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1196  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1197  NarrowBO->getName());
1198 
1199  IRBuilder<> Builder(NarrowUse);
1200  Builder.Insert(WideBO);
1201  WideBO->copyIRFlags(NarrowBO);
1202  return WideBO;
1203 }
1204 
1205 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1206  auto It = ExtendKindMap.find(I);
1207  assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1208  return It->second;
1209 }
1210 
1211 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1212  unsigned OpCode) const {
1213  if (OpCode == Instruction::Add)
1214  return SE->getAddExpr(LHS, RHS);
1215  if (OpCode == Instruction::Sub)
1216  return SE->getMinusSCEV(LHS, RHS);
1217  if (OpCode == Instruction::Mul)
1218  return SE->getMulExpr(LHS, RHS);
1219 
1220  llvm_unreachable("Unsupported opcode.");
1221 }
1222 
1223 /// No-wrap operations can transfer sign extension of their result to their
1224 /// operands. Generate the SCEV value for the widened operation without
1225 /// actually modifying the IR yet. If the expression after extending the
1226 /// operands is an AddRec for this loop, return the AddRec and the kind of
1227 /// extension used.
1228 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1229  // Handle the common case of add<nsw/nuw>
1230  const unsigned OpCode = DU.NarrowUse->getOpcode();
1231  // Only Add/Sub/Mul instructions supported yet.
1232  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1233  OpCode != Instruction::Mul)
1234  return {nullptr, Unknown};
1235 
1236  // One operand (NarrowDef) has already been extended to WideDef. Now determine
1237  // if extending the other will lead to a recurrence.
1238  const unsigned ExtendOperIdx =
1239  DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1240  assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1241 
1242  const SCEV *ExtendOperExpr = nullptr;
1243  const OverflowingBinaryOperator *OBO =
1244  cast<OverflowingBinaryOperator>(DU.NarrowUse);
1245  ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1246  if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1247  ExtendOperExpr = SE->getSignExtendExpr(
1248  SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1249  else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1250  ExtendOperExpr = SE->getZeroExtendExpr(
1251  SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1252  else
1253  return {nullptr, Unknown};
1254 
1255  // When creating this SCEV expr, don't apply the current operations NSW or NUW
1256  // flags. This instruction may be guarded by control flow that the no-wrap
1257  // behavior depends on. Non-control-equivalent instructions can be mapped to
1258  // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1259  // semantics to those operations.
1260  const SCEV *lhs = SE->getSCEV(DU.WideDef);
1261  const SCEV *rhs = ExtendOperExpr;
1262 
1263  // Let's swap operands to the initial order for the case of non-commutative
1264  // operations, like SUB. See PR21014.
1265  if (ExtendOperIdx == 0)
1266  std::swap(lhs, rhs);
1267  const SCEVAddRecExpr *AddRec =
1268  dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1269 
1270  if (!AddRec || AddRec->getLoop() != L)
1271  return {nullptr, Unknown};
1272 
1273  return {AddRec, ExtKind};
1274 }
1275 
1276 /// Is this instruction potentially interesting for further simplification after
1277 /// widening it's type? In other words, can the extend be safely hoisted out of
1278 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1279 /// so, return the extended recurrence and the kind of extension used. Otherwise
1280 /// return {nullptr, Unknown}.
1281 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1282  if (!SE->isSCEVable(DU.NarrowUse->getType()))
1283  return {nullptr, Unknown};
1284 
1285  const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1286  if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1287  SE->getTypeSizeInBits(WideType)) {
1288  // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1289  // index. So don't follow this use.
1290  return {nullptr, Unknown};
1291  }
1292 
1293  const SCEV *WideExpr;
1294  ExtendKind ExtKind;
1295  if (DU.NeverNegative) {
1296  WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1297  if (isa<SCEVAddRecExpr>(WideExpr))
1298  ExtKind = SignExtended;
1299  else {
1300  WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1301  ExtKind = ZeroExtended;
1302  }
1303  } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1304  WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1305  ExtKind = SignExtended;
1306  } else {
1307  WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1308  ExtKind = ZeroExtended;
1309  }
1310  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1311  if (!AddRec || AddRec->getLoop() != L)
1312  return {nullptr, Unknown};
1313  return {AddRec, ExtKind};
1314 }
1315 
1316 /// This IV user cannot be widened. Replace this use of the original narrow IV
1317 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1318 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1319  auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1320  if (!InsertPt)
1321  return;
1322  LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1323  << *DU.NarrowUse << "\n");
1324  IRBuilder<> Builder(InsertPt);
1325  Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1326  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1327 }
1328 
1329 /// If the narrow use is a compare instruction, then widen the compare
1330 // (and possibly the other operand). The extend operation is hoisted into the
1331 // loop preheader as far as possible.
1332 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1333  ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1334  if (!Cmp)
1335  return false;
1336 
1337  // We can legally widen the comparison in the following two cases:
1338  //
1339  // - The signedness of the IV extension and comparison match
1340  //
1341  // - The narrow IV is always positive (and thus its sign extension is equal
1342  // to its zero extension). For instance, let's say we're zero extending
1343  // %narrow for the following use
1344  //
1345  // icmp slt i32 %narrow, %val ... (A)
1346  //
1347  // and %narrow is always positive. Then
1348  //
1349  // (A) == icmp slt i32 sext(%narrow), sext(%val)
1350  // == icmp slt i32 zext(%narrow), sext(%val)
1351  bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1352  if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1353  return false;
1354 
1355  Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1356  unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1357  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1358  assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1359 
1360  // Widen the compare instruction.
1361  auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1362  if (!InsertPt)
1363  return false;
1364  IRBuilder<> Builder(InsertPt);
1365  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1366 
1367  // Widen the other operand of the compare, if necessary.
1368  if (CastWidth < IVWidth) {
1369  Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1370  DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1371  }
1372  return true;
1373 }
1374 
1375 /// If the narrow use is an instruction whose two operands are the defining
1376 /// instruction of DU and a load instruction, then we have the following:
1377 /// if the load is hoisted outside the loop, then we do not reach this function
1378 /// as scalar evolution analysis works fine in widenIVUse with variables
1379 /// hoisted outside the loop and efficient code is subsequently generated by
1380 /// not emitting truncate instructions. But when the load is not hoisted
1381 /// (whether due to limitation in alias analysis or due to a true legality),
1382 /// then scalar evolution can not proceed with loop variant values and
1383 /// inefficient code is generated. This function handles the non-hoisted load
1384 /// special case by making the optimization generate the same type of code for
1385 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1386 /// instruction). This special case is important especially when the induction
1387 /// variables are affecting addressing mode in code generation.
1388 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1389  Instruction *NarrowUse = DU.NarrowUse;
1390  Instruction *NarrowDef = DU.NarrowDef;
1391  Instruction *WideDef = DU.WideDef;
1392 
1393  // Handle the common case of add<nsw/nuw>
1394  const unsigned OpCode = NarrowUse->getOpcode();
1395  // Only Add/Sub/Mul instructions are supported.
1396  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1397  OpCode != Instruction::Mul)
1398  return false;
1399 
1400  // The operand that is not defined by NarrowDef of DU. Let's call it the
1401  // other operand.
1402  unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1403  assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1404  "bad DU");
1405 
1406  const SCEV *ExtendOperExpr = nullptr;
1407  const OverflowingBinaryOperator *OBO =
1408  cast<OverflowingBinaryOperator>(NarrowUse);
1409  ExtendKind ExtKind = getExtendKind(NarrowDef);
1410  if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1411  ExtendOperExpr = SE->getSignExtendExpr(
1412  SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1413  else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1414  ExtendOperExpr = SE->getZeroExtendExpr(
1415  SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1416  else
1417  return false;
1418 
1419  // We are interested in the other operand being a load instruction.
1420  // But, we should look into relaxing this restriction later on.
1421  auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1422  if (I && I->getOpcode() != Instruction::Load)
1423  return false;
1424 
1425  // Verifying that Defining operand is an AddRec
1426  const SCEV *Op1 = SE->getSCEV(WideDef);
1427  const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1428  if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1429  return false;
1430  // Verifying that other operand is an Extend.
1431  if (ExtKind == SignExtended) {
1432  if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1433  return false;
1434  } else {
1435  if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1436  return false;
1437  }
1438 
1439  if (ExtKind == SignExtended) {
1440  for (Use &U : NarrowUse->uses()) {
1441  SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1442  if (!User || User->getType() != WideType)
1443  return false;
1444  }
1445  } else { // ExtKind == ZeroExtended
1446  for (Use &U : NarrowUse->uses()) {
1447  ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1448  if (!User || User->getType() != WideType)
1449  return false;
1450  }
1451  }
1452 
1453  return true;
1454 }
1455 
1456 /// Special Case for widening with variant Loads (see
1457 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1458 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1459  Instruction *NarrowUse = DU.NarrowUse;
1460  Instruction *NarrowDef = DU.NarrowDef;
1461  Instruction *WideDef = DU.WideDef;
1462 
1463  ExtendKind ExtKind = getExtendKind(NarrowDef);
1464 
1465  LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1466 
1467  // Generating a widening use instruction.
1468  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1469  ? WideDef
1470  : createExtendInst(NarrowUse->getOperand(0), WideType,
1471  ExtKind, NarrowUse);
1472  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1473  ? WideDef
1474  : createExtendInst(NarrowUse->getOperand(1), WideType,
1475  ExtKind, NarrowUse);
1476 
1477  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1478  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1479  NarrowBO->getName());
1480  IRBuilder<> Builder(NarrowUse);
1481  Builder.Insert(WideBO);
1482  WideBO->copyIRFlags(NarrowBO);
1483 
1484  if (ExtKind == SignExtended)
1485  ExtendKindMap[NarrowUse] = SignExtended;
1486  else
1487  ExtendKindMap[NarrowUse] = ZeroExtended;
1488 
1489  // Update the Use.
1490  if (ExtKind == SignExtended) {
1491  for (Use &U : NarrowUse->uses()) {
1492  SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1493  if (User && User->getType() == WideType) {
1494  LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1495  << *WideBO << "\n");
1496  ++NumElimExt;
1497  User->replaceAllUsesWith(WideBO);
1498  DeadInsts.emplace_back(User);
1499  }
1500  }
1501  } else { // ExtKind == ZeroExtended
1502  for (Use &U : NarrowUse->uses()) {
1503  ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1504  if (User && User->getType() == WideType) {
1505  LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1506  << *WideBO << "\n");
1507  ++NumElimExt;
1508  User->replaceAllUsesWith(WideBO);
1509  DeadInsts.emplace_back(User);
1510  }
1511  }
1512  }
1513 }
1514 
1515 /// Determine whether an individual user of the narrow IV can be widened. If so,
1516 /// return the wide clone of the user.
1517 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1518  assert(ExtendKindMap.count(DU.NarrowDef) &&
1519  "Should already know the kind of extension used to widen NarrowDef");
1520 
1521  // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1522  if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1523  if (LI->getLoopFor(UsePhi->getParent()) != L) {
1524  // For LCSSA phis, sink the truncate outside the loop.
1525  // After SimplifyCFG most loop exit targets have a single predecessor.
1526  // Otherwise fall back to a truncate within the loop.
1527  if (UsePhi->getNumOperands() != 1)
1528  truncateIVUse(DU, DT, LI);
1529  else {
1530  // Widening the PHI requires us to insert a trunc. The logical place
1531  // for this trunc is in the same BB as the PHI. This is not possible if
1532  // the BB is terminated by a catchswitch.
1533  if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1534  return nullptr;
1535 
1536  PHINode *WidePhi =
1537  PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1538  UsePhi);
1539  WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1540  IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1541  Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1542  UsePhi->replaceAllUsesWith(Trunc);
1543  DeadInsts.emplace_back(UsePhi);
1544  LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1545  << *WidePhi << "\n");
1546  }
1547  return nullptr;
1548  }
1549  }
1550 
1551  // This narrow use can be widened by a sext if it's non-negative or its narrow
1552  // def was widended by a sext. Same for zext.
1553  auto canWidenBySExt = [&]() {
1554  return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1555  };
1556  auto canWidenByZExt = [&]() {
1557  return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1558  };
1559 
1560  // Our raison d'etre! Eliminate sign and zero extension.
1561  if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1562  (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1563  Value *NewDef = DU.WideDef;
1564  if (DU.NarrowUse->getType() != WideType) {
1565  unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1566  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1567  if (CastWidth < IVWidth) {
1568  // The cast isn't as wide as the IV, so insert a Trunc.
1569  IRBuilder<> Builder(DU.NarrowUse);
1570  NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1571  }
1572  else {
1573  // A wider extend was hidden behind a narrower one. This may induce
1574  // another round of IV widening in which the intermediate IV becomes
1575  // dead. It should be very rare.
1576  LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1577  << " not wide enough to subsume " << *DU.NarrowUse
1578  << "\n");
1579  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1580  NewDef = DU.NarrowUse;
1581  }
1582  }
1583  if (NewDef != DU.NarrowUse) {
1584  LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1585  << " replaced by " << *DU.WideDef << "\n");
1586  ++NumElimExt;
1587  DU.NarrowUse->replaceAllUsesWith(NewDef);
1588  DeadInsts.emplace_back(DU.NarrowUse);
1589  }
1590  // Now that the extend is gone, we want to expose it's uses for potential
1591  // further simplification. We don't need to directly inform SimplifyIVUsers
1592  // of the new users, because their parent IV will be processed later as a
1593  // new loop phi. If we preserved IVUsers analysis, we would also want to
1594  // push the uses of WideDef here.
1595 
1596  // No further widening is needed. The deceased [sz]ext had done it for us.
1597  return nullptr;
1598  }
1599 
1600  // Does this user itself evaluate to a recurrence after widening?
1601  WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1602  if (!WideAddRec.first)
1603  WideAddRec = getWideRecurrence(DU);
1604 
1605  assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1606  if (!WideAddRec.first) {
1607  // If use is a loop condition, try to promote the condition instead of
1608  // truncating the IV first.
1609  if (widenLoopCompare(DU))
1610  return nullptr;
1611 
1612  // We are here about to generate a truncate instruction that may hurt
1613  // performance because the scalar evolution expression computed earlier
1614  // in WideAddRec.first does not indicate a polynomial induction expression.
1615  // In that case, look at the operands of the use instruction to determine
1616  // if we can still widen the use instead of truncating its operand.
1617  if (widenWithVariantLoadUse(DU)) {
1618  widenWithVariantLoadUseCodegen(DU);
1619  return nullptr;
1620  }
1621 
1622  // This user does not evaluate to a recurrence after widening, so don't
1623  // follow it. Instead insert a Trunc to kill off the original use,
1624  // eventually isolating the original narrow IV so it can be removed.
1625  truncateIVUse(DU, DT, LI);
1626  return nullptr;
1627  }
1628  // Assume block terminators cannot evaluate to a recurrence. We can't to
1629  // insert a Trunc after a terminator if there happens to be a critical edge.
1630  assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1631  "SCEV is not expected to evaluate a block terminator");
1632 
1633  // Reuse the IV increment that SCEVExpander created as long as it dominates
1634  // NarrowUse.
1635  Instruction *WideUse = nullptr;
1636  if (WideAddRec.first == WideIncExpr &&
1637  Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1638  WideUse = WideInc;
1639  else {
1640  WideUse = cloneIVUser(DU, WideAddRec.first);
1641  if (!WideUse)
1642  return nullptr;
1643  }
1644  // Evaluation of WideAddRec ensured that the narrow expression could be
1645  // extended outside the loop without overflow. This suggests that the wide use
1646  // evaluates to the same expression as the extended narrow use, but doesn't
1647  // absolutely guarantee it. Hence the following failsafe check. In rare cases
1648  // where it fails, we simply throw away the newly created wide use.
1649  if (WideAddRec.first != SE->getSCEV(WideUse)) {
1650  LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1651  << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1652  << "\n");
1653  DeadInsts.emplace_back(WideUse);
1654  return nullptr;
1655  }
1656 
1657  ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1658  // Returning WideUse pushes it on the worklist.
1659  return WideUse;
1660 }
1661 
1662 /// Add eligible users of NarrowDef to NarrowIVUsers.
1663 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1664  const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1665  bool NonNegativeDef =
1666  SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1667  SE->getConstant(NarrowSCEV->getType(), 0));
1668  for (User *U : NarrowDef->users()) {
1669  Instruction *NarrowUser = cast<Instruction>(U);
1670 
1671  // Handle data flow merges and bizarre phi cycles.
1672  if (!Widened.insert(NarrowUser).second)
1673  continue;
1674 
1675  bool NonNegativeUse = false;
1676  if (!NonNegativeDef) {
1677  // We might have a control-dependent range information for this context.
1678  if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1679  NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1680  }
1681 
1682  NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1683  NonNegativeDef || NonNegativeUse);
1684  }
1685 }
1686 
1687 /// Process a single induction variable. First use the SCEVExpander to create a
1688 /// wide induction variable that evaluates to the same recurrence as the
1689 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1690 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1691 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1692 ///
1693 /// It would be simpler to delete uses as they are processed, but we must avoid
1694 /// invalidating SCEV expressions.
1695 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1696  // Is this phi an induction variable?
1697  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1698  if (!AddRec)
1699  return nullptr;
1700 
1701  // Widen the induction variable expression.
1702  const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1703  ? SE->getSignExtendExpr(AddRec, WideType)
1704  : SE->getZeroExtendExpr(AddRec, WideType);
1705 
1706  assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1707  "Expect the new IV expression to preserve its type");
1708 
1709  // Can the IV be extended outside the loop without overflow?
1710  AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1711  if (!AddRec || AddRec->getLoop() != L)
1712  return nullptr;
1713 
1714  // An AddRec must have loop-invariant operands. Since this AddRec is
1715  // materialized by a loop header phi, the expression cannot have any post-loop
1716  // operands, so they must dominate the loop header.
1717  assert(
1718  SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1719  SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1720  "Loop header phi recurrence inputs do not dominate the loop");
1721 
1722  // Iterate over IV uses (including transitive ones) looking for IV increments
1723  // of the form 'add nsw %iv, <const>'. For each increment and each use of
1724  // the increment calculate control-dependent range information basing on
1725  // dominating conditions inside of the loop (e.g. a range check inside of the
1726  // loop). Calculated ranges are stored in PostIncRangeInfos map.
1727  //
1728  // Control-dependent range information is later used to prove that a narrow
1729  // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1730  // this on demand because when pushNarrowIVUsers needs this information some
1731  // of the dominating conditions might be already widened.
1732  if (UsePostIncrementRanges)
1733  calculatePostIncRanges(OrigPhi);
1734 
1735  // The rewriter provides a value for the desired IV expression. This may
1736  // either find an existing phi or materialize a new one. Either way, we
1737  // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1738  // of the phi-SCC dominates the loop entry.
1739  Instruction *InsertPt = &L->getHeader()->front();
1740  WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1741 
1742  // Remembering the WideIV increment generated by SCEVExpander allows
1743  // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1744  // employ a general reuse mechanism because the call above is the only call to
1745  // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1746  if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1747  WideInc =
1748  cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1749  WideIncExpr = SE->getSCEV(WideInc);
1750  // Propagate the debug location associated with the original loop increment
1751  // to the new (widened) increment.
1752  auto *OrigInc =
1753  cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1754  WideInc->setDebugLoc(OrigInc->getDebugLoc());
1755  }
1756 
1757  LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1758  ++NumWidened;
1759 
1760  // Traverse the def-use chain using a worklist starting at the original IV.
1761  assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1762 
1763  Widened.insert(OrigPhi);
1764  pushNarrowIVUsers(OrigPhi, WidePhi);
1765 
1766  while (!NarrowIVUsers.empty()) {
1767  NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1768 
1769  // Process a def-use edge. This may replace the use, so don't hold a
1770  // use_iterator across it.
1771  Instruction *WideUse = widenIVUse(DU, Rewriter);
1772 
1773  // Follow all def-use edges from the previous narrow use.
1774  if (WideUse)
1775  pushNarrowIVUsers(DU.NarrowUse, WideUse);
1776 
1777  // widenIVUse may have removed the def-use edge.
1778  if (DU.NarrowDef->use_empty())
1779  DeadInsts.emplace_back(DU.NarrowDef);
1780  }
1781 
1782  // Attach any debug information to the new PHI. Since OrigPhi and WidePHI
1783  // evaluate the same recurrence, we can just copy the debug info over.
1785  llvm::findDbgValues(DbgValues, OrigPhi);
1786  auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(),
1787  ValueAsMetadata::get(WidePhi));
1788  for (auto &DbgValue : DbgValues)
1789  DbgValue->setOperand(0, MDPhi);
1790  return WidePhi;
1791 }
1792 
1793 /// Calculates control-dependent range for the given def at the given context
1794 /// by looking at dominating conditions inside of the loop
1795 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1796  Instruction *NarrowUser) {
1797  using namespace llvm::PatternMatch;
1798 
1799  Value *NarrowDefLHS;
1800  const APInt *NarrowDefRHS;
1801  if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1802  m_APInt(NarrowDefRHS))) ||
1803  !NarrowDefRHS->isNonNegative())
1804  return;
1805 
1806  auto UpdateRangeFromCondition = [&] (Value *Condition,
1807  bool TrueDest) {
1808  CmpInst::Predicate Pred;
1809  Value *CmpRHS;
1810  if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1811  m_Value(CmpRHS))))
1812  return;
1813 
1815  TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1816 
1817  auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1818  auto CmpConstrainedLHSRange =
1819  ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1820  auto NarrowDefRange =
1821  CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
1822 
1823  updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1824  };
1825 
1826  auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1827  if (!HasGuards)
1828  return;
1829 
1830  for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1831  Ctx->getParent()->rend())) {
1832  Value *C = nullptr;
1833  if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1834  UpdateRangeFromCondition(C, /*TrueDest=*/true);
1835  }
1836  };
1837 
1838  UpdateRangeFromGuards(NarrowUser);
1839 
1840  BasicBlock *NarrowUserBB = NarrowUser->getParent();
1841  // If NarrowUserBB is statically unreachable asking dominator queries may
1842  // yield surprising results. (e.g. the block may not have a dom tree node)
1843  if (!DT->isReachableFromEntry(NarrowUserBB))
1844  return;
1845 
1846  for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1847  L->contains(DTB->getBlock());
1848  DTB = DTB->getIDom()) {
1849  auto *BB = DTB->getBlock();
1850  auto *TI = BB->getTerminator();
1851  UpdateRangeFromGuards(TI);
1852 
1853  auto *BI = dyn_cast<BranchInst>(TI);
1854  if (!BI || !BI->isConditional())
1855  continue;
1856 
1857  auto *TrueSuccessor = BI->getSuccessor(0);
1858  auto *FalseSuccessor = BI->getSuccessor(1);
1859 
1860  auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1861  return BBE.isSingleEdge() &&
1862  DT->dominates(BBE, NarrowUser->getParent());
1863  };
1864 
1865  if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1866  UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1867 
1868  if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1869  UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1870  }
1871 }
1872 
1873 /// Calculates PostIncRangeInfos map for the given IV
1874 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1877  Worklist.push_back(OrigPhi);
1878  Visited.insert(OrigPhi);
1879 
1880  while (!Worklist.empty()) {
1881  Instruction *NarrowDef = Worklist.pop_back_val();
1882 
1883  for (Use &U : NarrowDef->uses()) {
1884  auto *NarrowUser = cast<Instruction>(U.getUser());
1885 
1886  // Don't go looking outside the current loop.
1887  auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1888  if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1889  continue;
1890 
1891  if (!Visited.insert(NarrowUser).second)
1892  continue;
1893 
1894  Worklist.push_back(NarrowUser);
1895 
1896  calculatePostIncRange(NarrowDef, NarrowUser);
1897  }
1898  }
1899 }
1900 
1901 //===----------------------------------------------------------------------===//
1902 // Live IV Reduction - Minimize IVs live across the loop.
1903 //===----------------------------------------------------------------------===//
1904 
1905 //===----------------------------------------------------------------------===//
1906 // Simplification of IV users based on SCEV evaluation.
1907 //===----------------------------------------------------------------------===//
1908 
1909 namespace {
1910 
1911 class IndVarSimplifyVisitor : public IVVisitor {
1912  ScalarEvolution *SE;
1913  const TargetTransformInfo *TTI;
1914  PHINode *IVPhi;
1915 
1916 public:
1917  WideIVInfo WI;
1918 
1919  IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1920  const TargetTransformInfo *TTI,
1921  const DominatorTree *DTree)
1922  : SE(SCEV), TTI(TTI), IVPhi(IV) {
1923  DT = DTree;
1924  WI.NarrowIV = IVPhi;
1925  }
1926 
1927  // Implement the interface used by simplifyUsersOfIV.
1928  void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1929 };
1930 
1931 } // end anonymous namespace
1932 
1933 /// Iteratively perform simplification on a worklist of IV users. Each
1934 /// successive simplification may push more users which may themselves be
1935 /// candidates for simplification.
1936 ///
1937 /// Sign/Zero extend elimination is interleaved with IV simplification.
1938 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1939  SCEVExpander &Rewriter,
1940  LoopInfo *LI) {
1942 
1943  auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1944  Intrinsic::getName(Intrinsic::experimental_guard));
1945  bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1946 
1947  SmallVector<PHINode*, 8> LoopPhis;
1948  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1949  LoopPhis.push_back(cast<PHINode>(I));
1950  }
1951  // Each round of simplification iterates through the SimplifyIVUsers worklist
1952  // for all current phis, then determines whether any IVs can be
1953  // widened. Widening adds new phis to LoopPhis, inducing another round of
1954  // simplification on the wide IVs.
1955  bool Changed = false;
1956  while (!LoopPhis.empty()) {
1957  // Evaluate as many IV expressions as possible before widening any IVs. This
1958  // forces SCEV to set no-wrap flags before evaluating sign/zero
1959  // extension. The first time SCEV attempts to normalize sign/zero extension,
1960  // the result becomes final. So for the most predictable results, we delay
1961  // evaluation of sign/zero extend evaluation until needed, and avoid running
1962  // other SCEV based analysis prior to simplifyAndExtend.
1963  do {
1964  PHINode *CurrIV = LoopPhis.pop_back_val();
1965 
1966  // Information about sign/zero extensions of CurrIV.
1967  IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1968 
1969  Changed |=
1970  simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1971 
1972  if (Visitor.WI.WidestNativeType) {
1973  WideIVs.push_back(Visitor.WI);
1974  }
1975  } while(!LoopPhis.empty());
1976 
1977  for (; !WideIVs.empty(); WideIVs.pop_back()) {
1978  WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1979  if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1980  Changed = true;
1981  LoopPhis.push_back(WidePhi);
1982  }
1983  }
1984  }
1985  return Changed;
1986 }
1987 
1988 //===----------------------------------------------------------------------===//
1989 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1990 //===----------------------------------------------------------------------===//
1991 
1992 /// Given an Value which is hoped to be part of an add recurance in the given
1993 /// loop, return the associated Phi node if so. Otherwise, return null. Note
1994 /// that this is less general than SCEVs AddRec checking.
1996  Instruction *IncI = dyn_cast<Instruction>(IncV);
1997  if (!IncI)
1998  return nullptr;
1999 
2000  switch (IncI->getOpcode()) {
2001  case Instruction::Add:
2002  case Instruction::Sub:
2003  break;
2004  case Instruction::GetElementPtr:
2005  // An IV counter must preserve its type.
2006  if (IncI->getNumOperands() == 2)
2007  break;
2009  default:
2010  return nullptr;
2011  }
2012 
2013  PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2014  if (Phi && Phi->getParent() == L->getHeader()) {
2015  if (L->isLoopInvariant(IncI->getOperand(1)))
2016  return Phi;
2017  return nullptr;
2018  }
2019  if (IncI->getOpcode() == Instruction::GetElementPtr)
2020  return nullptr;
2021 
2022  // Allow add/sub to be commuted.
2023  Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2024  if (Phi && Phi->getParent() == L->getHeader()) {
2025  if (L->isLoopInvariant(IncI->getOperand(0)))
2026  return Phi;
2027  }
2028  return nullptr;
2029 }
2030 
2031 /// Whether the current loop exit test is based on this value. Currently this
2032 /// is limited to a direct use in the loop condition.
2033 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
2034  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2035  ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
2036  // TODO: Allow non-icmp loop test.
2037  if (!ICmp)
2038  return false;
2039 
2040  // TODO: Allow indirect use.
2041  return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
2042 }
2043 
2044 /// linearFunctionTestReplace policy. Return true unless we can show that the
2045 /// current exit test is already sufficiently canonical.
2046 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
2047  assert(L->getLoopLatch() && "Must be in simplified form");
2048 
2049  // Avoid converting a constant or loop invariant test back to a runtime
2050  // test. This is critical for when SCEV's cached ExitCount is less precise
2051  // than the current IR (such as after we've proven a particular exit is
2052  // actually dead and thus the BE count never reaches our ExitCount.)
2053  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2054  if (L->isLoopInvariant(BI->getCondition()))
2055  return false;
2056 
2057  // Do LFTR to simplify the exit condition to an ICMP.
2058  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
2059  if (!Cond)
2060  return true;
2061 
2062  // Do LFTR to simplify the exit ICMP to EQ/NE
2063  ICmpInst::Predicate Pred = Cond->getPredicate();
2064  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2065  return true;
2066 
2067  // Look for a loop invariant RHS
2068  Value *LHS = Cond->getOperand(0);
2069  Value *RHS = Cond->getOperand(1);
2070  if (!L->isLoopInvariant(RHS)) {
2071  if (!L->isLoopInvariant(LHS))
2072  return true;
2073  std::swap(LHS, RHS);
2074  }
2075  // Look for a simple IV counter LHS
2076  PHINode *Phi = dyn_cast<PHINode>(LHS);
2077  if (!Phi)
2078  Phi = getLoopPhiForCounter(LHS, L);
2079 
2080  if (!Phi)
2081  return true;
2082 
2083  // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2084  int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2085  if (Idx < 0)
2086  return true;
2087 
2088  // Do LFTR if the exit condition's IV is *not* a simple counter.
2089  Value *IncV = Phi->getIncomingValue(Idx);
2090  return Phi != getLoopPhiForCounter(IncV, L);
2091 }
2092 
2093 /// Return true if undefined behavior would provable be executed on the path to
2094 /// OnPathTo if Root produced a posion result. Note that this doesn't say
2095 /// anything about whether OnPathTo is actually executed or whether Root is
2096 /// actually poison. This can be used to assess whether a new use of Root can
2097 /// be added at a location which is control equivalent with OnPathTo (such as
2098 /// immediately before it) without introducing UB which didn't previously
2099 /// exist. Note that a false result conveys no information.
2101  Instruction *OnPathTo,
2102  DominatorTree *DT) {
2103  // Basic approach is to assume Root is poison, propagate poison forward
2104  // through all users we can easily track, and then check whether any of those
2105  // users are provable UB and must execute before out exiting block might
2106  // exit.
2107 
2108  // The set of all recursive users we've visited (which are assumed to all be
2109  // poison because of said visit)
2110  SmallSet<const Value *, 16> KnownPoison;
2112  Worklist.push_back(Root);
2113  while (!Worklist.empty()) {
2114  const Instruction *I = Worklist.pop_back_val();
2115 
2116  // If we know this must trigger UB on a path leading our target.
2117  if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
2118  return true;
2119 
2120  // If we can't analyze propagation through this instruction, just skip it
2121  // and transitive users. Safe as false is a conservative result.
2122  if (!propagatesFullPoison(I) && I != Root)
2123  continue;
2124 
2125  if (KnownPoison.insert(I).second)
2126  for (const User *User : I->users())
2127  Worklist.push_back(cast<Instruction>(User));
2128  }
2129 
2130  // Might be non-UB, or might have a path we couldn't prove must execute on
2131  // way to exiting bb.
2132  return false;
2133 }
2134 
2135 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2136 /// down to checking that all operands are constant and listing instructions
2137 /// that may hide undef.
2139  unsigned Depth) {
2140  if (isa<Constant>(V))
2141  return !isa<UndefValue>(V);
2142 
2143  if (Depth >= 6)
2144  return false;
2145 
2146  // Conservatively handle non-constant non-instructions. For example, Arguments
2147  // may be undef.
2148  Instruction *I = dyn_cast<Instruction>(V);
2149  if (!I)
2150  return false;
2151 
2152  // Load and return values may be undef.
2153  if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2154  return false;
2155 
2156  // Optimistically handle other instructions.
2157  for (Value *Op : I->operands()) {
2158  if (!Visited.insert(Op).second)
2159  continue;
2160  if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2161  return false;
2162  }
2163  return true;
2164 }
2165 
2166 /// Return true if the given value is concrete. We must prove that undef can
2167 /// never reach it.
2168 ///
2169 /// TODO: If we decide that this is a good approach to checking for undef, we
2170 /// may factor it into a common location.
2171 static bool hasConcreteDef(Value *V) {
2172  SmallPtrSet<Value*, 8> Visited;
2173  Visited.insert(V);
2174  return hasConcreteDefImpl(V, Visited, 0);
2175 }
2176 
2177 /// Return true if this IV has any uses other than the (soon to be rewritten)
2178 /// loop exit test.
2179 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2180  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2181  Value *IncV = Phi->getIncomingValue(LatchIdx);
2182 
2183  for (User *U : Phi->users())
2184  if (U != Cond && U != IncV) return false;
2185 
2186  for (User *U : IncV->users())
2187  if (U != Cond && U != Phi) return false;
2188  return true;
2189 }
2190 
2191 /// Return true if the given phi is a "counter" in L. A counter is an
2192 /// add recurance (of integer or pointer type) with an arbitrary start, and a
2193 /// step of 1. Note that L must have exactly one latch.
2194 static bool isLoopCounter(PHINode* Phi, Loop *L,
2195  ScalarEvolution *SE) {
2196  assert(Phi->getParent() == L->getHeader());
2197  assert(L->getLoopLatch());
2198 
2199  if (!SE->isSCEVable(Phi->getType()))
2200  return false;
2201 
2202  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2203  if (!AR || AR->getLoop() != L || !AR->isAffine())
2204  return false;
2205 
2206  const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2207  if (!Step || !Step->isOne())
2208  return false;
2209 
2210  int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
2211  Value *IncV = Phi->getIncomingValue(LatchIdx);
2212  return (getLoopPhiForCounter(IncV, L) == Phi);
2213 }
2214 
2215 /// Search the loop header for a loop counter (anadd rec w/step of one)
2216 /// suitable for use by LFTR. If multiple counters are available, select the
2217 /// "best" one based profitable heuristics.
2218 ///
2219 /// BECount may be an i8* pointer type. The pointer difference is already
2220 /// valid count without scaling the address stride, so it remains a pointer
2221 /// expression as far as SCEV is concerned.
2222 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
2223  const SCEV *BECount,
2224  ScalarEvolution *SE, DominatorTree *DT) {
2225  uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2226 
2227  Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
2228 
2229  // Loop over all of the PHI nodes, looking for a simple counter.
2230  PHINode *BestPhi = nullptr;
2231  const SCEV *BestInit = nullptr;
2232  BasicBlock *LatchBlock = L->getLoopLatch();
2233  assert(LatchBlock && "Must be in simplified form");
2234  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2235 
2236  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2237  PHINode *Phi = cast<PHINode>(I);
2238  if (!isLoopCounter(Phi, L, SE))
2239  continue;
2240 
2241  // Avoid comparing an integer IV against a pointer Limit.
2242  if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2243  continue;
2244 
2245  const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2246 
2247  // AR may be a pointer type, while BECount is an integer type.
2248  // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2249  // AR may not be a narrower type, or we may never exit.
2250  uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2251  if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2252  continue;
2253 
2254  // Avoid reusing a potentially undef value to compute other values that may
2255  // have originally had a concrete definition.
2256  if (!hasConcreteDef(Phi)) {
2257  // We explicitly allow unknown phis as long as they are already used by
2258  // the loop exit test. This is legal since performing LFTR could not
2259  // increase the number of undef users.
2260  Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
2261  if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
2262  !isLoopExitTestBasedOn(IncPhi, ExitingBB))
2263  continue;
2264  }
2265 
2266  // Avoid introducing undefined behavior due to poison which didn't exist in
2267  // the original program. (Annoyingly, the rules for poison and undef
2268  // propagation are distinct, so this does NOT cover the undef case above.)
2269  // We have to ensure that we don't introduce UB by introducing a use on an
2270  // iteration where said IV produces poison. Our strategy here differs for
2271  // pointers and integer IVs. For integers, we strip and reinfer as needed,
2272  // see code in linearFunctionTestReplace. For pointers, we restrict
2273  // transforms as there is no good way to reinfer inbounds once lost.
2274  if (!Phi->getType()->isIntegerTy() &&
2275  !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
2276  continue;
2277 
2278  const SCEV *Init = AR->getStart();
2279 
2280  if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2281  // Don't force a live loop counter if another IV can be used.
2282  if (AlmostDeadIV(Phi, LatchBlock, Cond))
2283  continue;
2284 
2285  // Prefer to count-from-zero. This is a more "canonical" counter form. It
2286  // also prefers integer to pointer IVs.
2287  if (BestInit->isZero() != Init->isZero()) {
2288  if (BestInit->isZero())
2289  continue;
2290  }
2291  // If two IVs both count from zero or both count from nonzero then the
2292  // narrower is likely a dead phi that has been widened. Use the wider phi
2293  // to allow the other to be eliminated.
2294  else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2295  continue;
2296  }
2297  BestPhi = Phi;
2298  BestInit = Init;
2299  }
2300  return BestPhi;
2301 }
2302 
2303 /// Insert an IR expression which computes the value held by the IV IndVar
2304 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2305 /// is taken ExitCount times.
2306 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2307  const SCEV *ExitCount, bool UsePostInc, Loop *L,
2308  SCEVExpander &Rewriter, ScalarEvolution *SE) {
2309  assert(isLoopCounter(IndVar, L, SE));
2310  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2311  const SCEV *IVInit = AR->getStart();
2312 
2313  // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2314  // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2315  // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2316  // the existing GEPs whenever possible.
2317  if (IndVar->getType()->isPointerTy() &&
2318  !ExitCount->getType()->isPointerTy()) {
2319  // IVOffset will be the new GEP offset that is interpreted by GEP as a
2320  // signed value. ExitCount on the other hand represents the loop trip count,
2321  // which is an unsigned value. FindLoopCounter only allows induction
2322  // variables that have a positive unit stride of one. This means we don't
2323  // have to handle the case of negative offsets (yet) and just need to zero
2324  // extend ExitCount.
2325  Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2326  const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2327  if (UsePostInc)
2328  IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2329 
2330  // Expand the code for the iteration count.
2331  assert(SE->isLoopInvariant(IVOffset, L) &&
2332  "Computed iteration count is not loop invariant!");
2333 
2334  // We could handle pointer IVs other than i8*, but we need to compensate for
2335  // gep index scaling.
2337  cast<PointerType>(IndVar->getType())
2338  ->getElementType())->isOne() &&
2339  "unit stride pointer IV must be i8*");
2340 
2341  const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2342  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2343  return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2344  } else {
2345  // In any other case, convert both IVInit and ExitCount to integers before
2346  // comparing. This may result in SCEV expansion of pointers, but in practice
2347  // SCEV will fold the pointer arithmetic away as such:
2348  // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2349  //
2350  // Valid Cases: (1) both integers is most common; (2) both may be pointers
2351  // for simple memset-style loops.
2352  //
2353  // IVInit integer and ExitCount pointer would only occur if a canonical IV
2354  // were generated on top of case #2, which is not expected.
2355 
2356  assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2357  // For unit stride, IVCount = Start + ExitCount with 2's complement
2358  // overflow.
2359 
2360  // For integer IVs, truncate the IV before computing IVInit + BECount,
2361  // unless we know apriori that the limit must be a constant when evaluated
2362  // in the bitwidth of the IV. We prefer (potentially) keeping a truncate
2363  // of the IV in the loop over a (potentially) expensive expansion of the
2364  // widened exit count add(zext(add)) expression.
2365  if (SE->getTypeSizeInBits(IVInit->getType())
2366  > SE->getTypeSizeInBits(ExitCount->getType())) {
2367  if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2368  ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2369  else
2370  IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2371  }
2372 
2373  const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2374 
2375  if (UsePostInc)
2376  IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2377 
2378  // Expand the code for the iteration count.
2379  assert(SE->isLoopInvariant(IVLimit, L) &&
2380  "Computed iteration count is not loop invariant!");
2381  // Ensure that we generate the same type as IndVar, or a smaller integer
2382  // type. In the presence of null pointer values, we have an integer type
2383  // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2384  Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2385  IndVar->getType() : ExitCount->getType();
2386  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2387  return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2388  }
2389 }
2390 
2391 /// This method rewrites the exit condition of the loop to be a canonical !=
2392 /// comparison against the incremented loop induction variable. This pass is
2393 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2394 /// determine a loop-invariant trip count of the loop, which is actually a much
2395 /// broader range than just linear tests.
2396 bool IndVarSimplify::
2397 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2398  const SCEV *ExitCount,
2399  PHINode *IndVar, SCEVExpander &Rewriter) {
2400  assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2401  assert(isLoopCounter(IndVar, L, SE));
2402  Instruction * const IncVar =
2403  cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2404 
2405  // Initialize CmpIndVar to the preincremented IV.
2406  Value *CmpIndVar = IndVar;
2407  bool UsePostInc = false;
2408 
2409  // If the exiting block is the same as the backedge block, we prefer to
2410  // compare against the post-incremented value, otherwise we must compare
2411  // against the preincremented value.
2412  if (ExitingBB == L->getLoopLatch()) {
2413  // For pointer IVs, we chose to not strip inbounds which requires us not
2414  // to add a potentially UB introducing use. We need to either a) show
2415  // the loop test we're modifying is already in post-inc form, or b) show
2416  // that adding a use must not introduce UB.
2417  bool SafeToPostInc =
2418  IndVar->getType()->isIntegerTy() ||
2419  isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2420  mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2421  if (SafeToPostInc) {
2422  UsePostInc = true;
2423  CmpIndVar = IncVar;
2424  }
2425  }
2426 
2427  // It may be necessary to drop nowrap flags on the incrementing instruction
2428  // if either LFTR moves from a pre-inc check to a post-inc check (in which
2429  // case the increment might have previously been poison on the last iteration
2430  // only) or if LFTR switches to a different IV that was previously dynamically
2431  // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2432  // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2433  // check), because the pre-inc addrec flags may be adopted from the original
2434  // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2435  // TODO: This handling is inaccurate for one case: If we switch to a
2436  // dynamically dead IV that wraps on the first loop iteration only, which is
2437  // not covered by the post-inc addrec. (If the new IV was not dynamically
2438  // dead, it could not be poison on the first iteration in the first place.)
2439  if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2440  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2441  if (BO->hasNoUnsignedWrap())
2442  BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2443  if (BO->hasNoSignedWrap())
2444  BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2445  }
2446 
2447  Value *ExitCnt = genLoopLimit(
2448  IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2449  assert(ExitCnt->getType()->isPointerTy() ==
2450  IndVar->getType()->isPointerTy() &&
2451  "genLoopLimit missed a cast");
2452 
2453  // Insert a new icmp_ne or icmp_eq instruction before the branch.
2454  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2456  if (L->contains(BI->getSuccessor(0)))
2457  P = ICmpInst::ICMP_NE;
2458  else
2459  P = ICmpInst::ICMP_EQ;
2460 
2461  IRBuilder<> Builder(BI);
2462 
2463  // The new loop exit condition should reuse the debug location of the
2464  // original loop exit condition.
2465  if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2466  Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2467 
2468  // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2469  // avoid the expensive expansion of the limit expression in the wider type,
2470  // emit a truncate to narrow the IV to the ExitCount type. This is safe
2471  // since we know (from the exit count bitwidth), that we can't self-wrap in
2472  // the narrower type.
2473  unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2474  unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2475  if (CmpIndVarSize > ExitCntSize) {
2476  assert(!CmpIndVar->getType()->isPointerTy() &&
2477  !ExitCnt->getType()->isPointerTy());
2478 
2479  // Before resorting to actually inserting the truncate, use the same
2480  // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2481  // the other side of the comparison instead. We still evaluate the limit
2482  // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2483  // a truncate within in.
2484  bool Extended = false;
2485  const SCEV *IV = SE->getSCEV(CmpIndVar);
2486  const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2487  ExitCnt->getType());
2488  const SCEV *ZExtTrunc =
2489  SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2490 
2491  if (ZExtTrunc == IV) {
2492  Extended = true;
2493  ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2494  "wide.trip.count");
2495  } else {
2496  const SCEV *SExtTrunc =
2497  SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2498  if (SExtTrunc == IV) {
2499  Extended = true;
2500  ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2501  "wide.trip.count");
2502  }
2503  }
2504 
2505  if (Extended) {
2506  bool Discard;
2507  L->makeLoopInvariant(ExitCnt, Discard);
2508  } else
2509  CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2510  "lftr.wideiv");
2511  }
2512  LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2513  << " LHS:" << *CmpIndVar << '\n'
2514  << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2515  << "\n"
2516  << " RHS:\t" << *ExitCnt << "\n"
2517  << "ExitCount:\t" << *ExitCount << "\n"
2518  << " was: " << *BI->getCondition() << "\n");
2519 
2520  Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2521  Value *OrigCond = BI->getCondition();
2522  // It's tempting to use replaceAllUsesWith here to fully replace the old
2523  // comparison, but that's not immediately safe, since users of the old
2524  // comparison may not be dominated by the new comparison. Instead, just
2525  // update the branch to use the new comparison; in the common case this
2526  // will make old comparison dead.
2527  BI->setCondition(Cond);
2528  DeadInsts.push_back(OrigCond);
2529 
2530  ++NumLFTR;
2531  return true;
2532 }
2533 
2534 //===----------------------------------------------------------------------===//
2535 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2536 //===----------------------------------------------------------------------===//
2537 
2538 /// If there's a single exit block, sink any loop-invariant values that
2539 /// were defined in the preheader but not used inside the loop into the
2540 /// exit block to reduce register pressure in the loop.
2541 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2542  BasicBlock *ExitBlock = L->getExitBlock();
2543  if (!ExitBlock) return false;
2544 
2545  BasicBlock *Preheader = L->getLoopPreheader();
2546  if (!Preheader) return false;
2547 
2548  bool MadeAnyChanges = false;
2549  BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2550  BasicBlock::iterator I(Preheader->getTerminator());
2551  while (I != Preheader->begin()) {
2552  --I;
2553  // New instructions were inserted at the end of the preheader.
2554  if (isa<PHINode>(I))
2555  break;
2556 
2557  // Don't move instructions which might have side effects, since the side
2558  // effects need to complete before instructions inside the loop. Also don't
2559  // move instructions which might read memory, since the loop may modify
2560  // memory. Note that it's okay if the instruction might have undefined
2561  // behavior: LoopSimplify guarantees that the preheader dominates the exit
2562  // block.
2563  if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2564  continue;
2565 
2566  // Skip debug info intrinsics.
2567  if (isa<DbgInfoIntrinsic>(I))
2568  continue;
2569 
2570  // Skip eh pad instructions.
2571  if (I->isEHPad())
2572  continue;
2573 
2574  // Don't sink alloca: we never want to sink static alloca's out of the
2575  // entry block, and correctly sinking dynamic alloca's requires
2576  // checks for stacksave/stackrestore intrinsics.
2577  // FIXME: Refactor this check somehow?
2578  if (isa<AllocaInst>(I))
2579  continue;
2580 
2581  // Determine if there is a use in or before the loop (direct or
2582  // otherwise).
2583  bool UsedInLoop = false;
2584  for (Use &U : I->uses()) {
2585  Instruction *User = cast<Instruction>(U.getUser());
2586  BasicBlock *UseBB = User->getParent();
2587  if (PHINode *P = dyn_cast<PHINode>(User)) {
2588  unsigned i =
2589  PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2590  UseBB = P->getIncomingBlock(i);
2591  }
2592  if (UseBB == Preheader || L->contains(UseBB)) {
2593  UsedInLoop = true;
2594  break;
2595  }
2596  }
2597 
2598  // If there is, the def must remain in the preheader.
2599  if (UsedInLoop)
2600  continue;
2601 
2602  // Otherwise, sink it to the exit block.
2603  Instruction *ToMove = &*I;
2604  bool Done = false;
2605 
2606  if (I != Preheader->begin()) {
2607  // Skip debug info intrinsics.
2608  do {
2609  --I;
2610  } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2611 
2612  if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2613  Done = true;
2614  } else {
2615  Done = true;
2616  }
2617 
2618  MadeAnyChanges = true;
2619  ToMove->moveBefore(*ExitBlock, InsertPt);
2620  if (Done) break;
2621  InsertPt = ToMove->getIterator();
2622  }
2623 
2624  return MadeAnyChanges;
2625 }
2626 
2627 bool IndVarSimplify::optimizeLoopExits(Loop *L) {
2628  SmallVector<BasicBlock*, 16> ExitingBlocks;
2629  L->getExitingBlocks(ExitingBlocks);
2630 
2631  // Form an expression for the maximum exit count possible for this loop. We
2632  // merge the max and exact information to approximate a version of
2633  // getMaxBackedgeTakenInfo which isn't restricted to just constants.
2634  // TODO: factor this out as a version of getMaxBackedgeTakenCount which
2635  // isn't guaranteed to return a constant.
2636  SmallVector<const SCEV*, 4> ExitCounts;
2637  const SCEV *MaxConstEC = SE->getMaxBackedgeTakenCount(L);
2638  if (!isa<SCEVCouldNotCompute>(MaxConstEC))
2639  ExitCounts.push_back(MaxConstEC);
2640  for (BasicBlock *ExitingBB : ExitingBlocks) {
2641  const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2642  if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2643  assert(DT->dominates(ExitingBB, L->getLoopLatch()) &&
2644  "We should only have known counts for exiting blocks that "
2645  "dominate latch!");
2646  ExitCounts.push_back(ExitCount);
2647  }
2648  }
2649  if (ExitCounts.empty())
2650  return false;
2651  const SCEV *MaxExitCount = SE->getUMinFromMismatchedTypes(ExitCounts);
2652 
2653  bool Changed = false;
2654  for (BasicBlock *ExitingBB : ExitingBlocks) {
2655  // If our exitting block exits multiple loops, we can only rewrite the
2656  // innermost one. Otherwise, we're changing how many times the innermost
2657  // loop runs before it exits.
2658  if (LI->getLoopFor(ExitingBB) != L)
2659  continue;
2660 
2661  // Can't rewrite non-branch yet.
2662  BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2663  if (!BI)
2664  continue;
2665 
2666  // If already constant, nothing to do.
2667  if (isa<Constant>(BI->getCondition()))
2668  continue;
2669 
2670  const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2671  if (isa<SCEVCouldNotCompute>(ExitCount))
2672  continue;
2673 
2674  // If we know we'd exit on the first iteration, rewrite the exit to
2675  // reflect this. This does not imply the loop must exit through this
2676  // exit; there may be an earlier one taken on the first iteration.
2677  // TODO: Given we know the backedge can't be taken, we should go ahead
2678  // and break it. Or at least, kill all the header phis and simplify.
2679  if (ExitCount->isZero()) {
2680  bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2681  auto *OldCond = BI->getCondition();
2682  auto *NewCond = ExitIfTrue ? ConstantInt::getTrue(OldCond->getType()) :
2683  ConstantInt::getFalse(OldCond->getType());
2684  BI->setCondition(NewCond);
2685  if (OldCond->use_empty())
2686  DeadInsts.push_back(OldCond);
2687  Changed = true;
2688  continue;
2689  }
2690 
2691  // If we end up with a pointer exit count, bail.
2692  if (!ExitCount->getType()->isIntegerTy() ||
2693  !MaxExitCount->getType()->isIntegerTy())
2694  return false;
2695 
2696  Type *WiderType =
2697  SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2698  ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2699  MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2700  assert(MaxExitCount->getType() == ExitCount->getType());
2701 
2702  // Can we prove that some other exit must be taken strictly before this
2703  // one? TODO: handle cases where ule is known, and equality is covered
2704  // by a dominating exit
2705  if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2706  MaxExitCount, ExitCount)) {
2707  bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2708  auto *OldCond = BI->getCondition();
2709  auto *NewCond = ExitIfTrue ? ConstantInt::getFalse(OldCond->getType()) :
2710  ConstantInt::getTrue(OldCond->getType());
2711  BI->setCondition(NewCond);
2712  if (OldCond->use_empty())
2713  DeadInsts.push_back(OldCond);
2714  Changed = true;
2715  continue;
2716  }
2717 
2718  // TODO: If we can prove that the exiting iteration is equal to the exit
2719  // count for this exit and that no previous exit oppurtunities exist within
2720  // the loop, then we can discharge all other exits. (May fall out of
2721  // previous TODO.)
2722 
2723  // TODO: If we can't prove any relation between our exit count and the
2724  // loops exit count, but taking this exit doesn't require actually running
2725  // the loop (i.e. no side effects, no computed values used in exit), then
2726  // we can replace the exit test with a loop invariant test which exits on
2727  // the first iteration.
2728  }
2729  return Changed;
2730 }
2731 
2732 //===----------------------------------------------------------------------===//
2733 // IndVarSimplify driver. Manage several subpasses of IV simplification.
2734 //===----------------------------------------------------------------------===//
2735 
2736 bool IndVarSimplify::run(Loop *L) {
2737  // We need (and expect!) the incoming loop to be in LCSSA.
2738  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2739  "LCSSA required to run indvars!");
2740  bool Changed = false;
2741 
2742  // If LoopSimplify form is not available, stay out of trouble. Some notes:
2743  // - LSR currently only supports LoopSimplify-form loops. Indvars'
2744  // canonicalization can be a pessimization without LSR to "clean up"
2745  // afterwards.
2746  // - We depend on having a preheader; in particular,
2747  // Loop::getCanonicalInductionVariable only supports loops with preheaders,
2748  // and we're in trouble if we can't find the induction variable even when
2749  // we've manually inserted one.
2750  // - LFTR relies on having a single backedge.
2751  if (!L->isLoopSimplifyForm())
2752  return false;
2753 
2754  // If there are any floating-point recurrences, attempt to
2755  // transform them to use integer recurrences.
2756  Changed |= rewriteNonIntegerIVs(L);
2757 
2758  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2759 
2760  // Create a rewriter object which we'll use to transform the code with.
2761  SCEVExpander Rewriter(*SE, DL, "indvars");
2762 #ifndef NDEBUG
2763  Rewriter.setDebugType(DEBUG_TYPE);
2764 #endif
2765 
2766  // Eliminate redundant IV users.
2767  //
2768  // Simplification works best when run before other consumers of SCEV. We
2769  // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2770  // other expressions involving loop IVs have been evaluated. This helps SCEV
2771  // set no-wrap flags before normalizing sign/zero extension.
2772  Rewriter.disableCanonicalMode();
2773  Changed |= simplifyAndExtend(L, Rewriter, LI);
2774 
2775  // Check to see if this loop has a computable loop-invariant execution count.
2776  // If so, this means that we can compute the final value of any expressions
2777  // that are recurrent in the loop, and substitute the exit values from the
2778  // loop into any instructions outside of the loop that use the final values of
2779  // the current expressions.
2780  //
2781  if (ReplaceExitValue != NeverRepl &&
2782  !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2783  Changed |= rewriteLoopExitValues(L, Rewriter);
2784 
2785  // Eliminate redundant IV cycles.
2786  NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2787 
2788  Changed |= optimizeLoopExits(L);
2789 
2790  // If we have a trip count expression, rewrite the loop's exit condition
2791  // using it.
2792  if (!DisableLFTR) {
2793  SmallVector<BasicBlock*, 16> ExitingBlocks;
2794  L->getExitingBlocks(ExitingBlocks);
2795  for (BasicBlock *ExitingBB : ExitingBlocks) {
2796  // Can't rewrite non-branch yet.
2797  if (!isa<BranchInst>(ExitingBB->getTerminator()))
2798  continue;
2799 
2800  // If our exitting block exits multiple loops, we can only rewrite the
2801  // innermost one. Otherwise, we're changing how many times the innermost
2802  // loop runs before it exits.
2803  if (LI->getLoopFor(ExitingBB) != L)
2804  continue;
2805 
2806  if (!needsLFTR(L, ExitingBB))
2807  continue;
2808 
2809  const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2810  if (isa<SCEVCouldNotCompute>(ExitCount))
2811  continue;
2812 
2813  // This was handled above, but as we form SCEVs, we can sometimes refine
2814  // existing ones; this allows exit counts to be folded to zero which
2815  // weren't when optimizeLoopExits saw them. Arguably, we should iterate
2816  // until stable to handle cases like this better.
2817  if (ExitCount->isZero())
2818  continue;
2819 
2820  PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2821  if (!IndVar)
2822  continue;
2823 
2824  // Avoid high cost expansions. Note: This heuristic is questionable in
2825  // that our definition of "high cost" is not exactly principled.
2826  if (Rewriter.isHighCostExpansion(ExitCount, L))
2827  continue;
2828 
2829  // Check preconditions for proper SCEVExpander operation. SCEV does not
2830  // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2831  // any pass that uses the SCEVExpander must do it. This does not work
2832  // well for loop passes because SCEVExpander makes assumptions about
2833  // all loops, while LoopPassManager only forces the current loop to be
2834  // simplified.
2835  //
2836  // FIXME: SCEV expansion has no way to bail out, so the caller must
2837  // explicitly check any assumptions made by SCEV. Brittle.
2838  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2839  if (!AR || AR->getLoop()->getLoopPreheader())
2840  Changed |= linearFunctionTestReplace(L, ExitingBB,
2841  ExitCount, IndVar,
2842  Rewriter);
2843  }
2844  }
2845  // Clear the rewriter cache, because values that are in the rewriter's cache
2846  // can be deleted in the loop below, causing the AssertingVH in the cache to
2847  // trigger.
2848  Rewriter.clear();
2849 
2850  // Now that we're done iterating through lists, clean up any instructions
2851  // which are now dead.
2852  while (!DeadInsts.empty())
2853  if (Instruction *Inst =
2854  dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2855  Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
2856 
2857  // The Rewriter may not be used from this point on.
2858 
2859  // Loop-invariant instructions in the preheader that aren't used in the
2860  // loop may be sunk below the loop to reduce register pressure.
2861  Changed |= sinkUnusedInvariants(L);
2862 
2863  // rewriteFirstIterationLoopExitValues does not rely on the computation of
2864  // trip count and therefore can further simplify exit values in addition to
2865  // rewriteLoopExitValues.
2866  Changed |= rewriteFirstIterationLoopExitValues(L);
2867 
2868  // Clean up dead instructions.
2869  Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2870 
2871  // Check a post-condition.
2872  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2873  "Indvars did not preserve LCSSA!");
2874 
2875  // Verify that LFTR, and any other change have not interfered with SCEV's
2876  // ability to compute trip count.
2877 #ifndef NDEBUG
2878  if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2879  SE->forgetLoop(L);
2880  const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2881  if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2882  SE->getTypeSizeInBits(NewBECount->getType()))
2883  NewBECount = SE->getTruncateOrNoop(NewBECount,
2884  BackedgeTakenCount->getType());
2885  else
2886  BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2887  NewBECount->getType());
2888  assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
2889  }
2890 #endif
2891 
2892  return Changed;
2893 }
2894 
2897  LPMUpdater &) {
2898  Function *F = L.getHeader()->getParent();
2899  const DataLayout &DL = F->getParent()->getDataLayout();
2900 
2901  IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
2902  if (!IVS.run(&L))
2903  return PreservedAnalyses::all();
2904 
2905  auto PA = getLoopPassPreservedAnalyses();
2906  PA.preserveSet<CFGAnalyses>();
2907  return PA;
2908 }
2909 
2910 namespace {
2911 
2912 struct IndVarSimplifyLegacyPass : public LoopPass {
2913  static char ID; // Pass identification, replacement for typeid
2914 
2915  IndVarSimplifyLegacyPass() : LoopPass(ID) {
2917  }
2918 
2919  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2920  if (skipLoop(L))
2921  return false;
2922 
2923  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2924  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2925  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2926  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2927  auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
2928  auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2929  auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2930  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2931 
2932  IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
2933  return IVS.run(L);
2934  }
2935 
2936  void getAnalysisUsage(AnalysisUsage &AU) const override {
2937  AU.setPreservesCFG();
2939  }
2940 };
2941 
2942 } // end anonymous namespace
2943 
2945 
2946 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2947  "Induction Variable Simplification", false, false)
2949 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2950  "Induction Variable Simplification", false, false)
2951 
2953  return new IndVarSimplifyLegacyPass();
2954 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
bool hoistIVInc(Instruction *IncV, Instruction *InsertPos)
Utility for hoisting an IV increment.
Induction Variable Simplification
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:594
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2168
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L)
Given an Value which is hoped to be part of an add recurance in the given loop, return the associated...
iterator_range< use_iterator > uses()
Definition: Value.h:354
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:211
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
MutableArrayRef< T > makeMutableArrayRef(T &OneElt)
Construct a MutableArrayRef from a single element.
Definition: ArrayRef.h:502
This class represents lattice values for constants.
Definition: AllocatorList.h:23
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
bool isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:416
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:409
This class represents zero extension of integer types.
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
bool isZero() const
Return true if the expression is a constant zero.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:160
bool isHighCostExpansion(const SCEV *Expr, Loop *L, const Instruction *At=nullptr)
Return true for expressions that may incur non-trivial cost to evaluate at runtime.
unsigned less than
Definition: InstrTypes.h:757
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:738
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:732
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:748
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
void setDebugType(const char *s)
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
BasicBlock * getSuccessor(unsigned i) const
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1882
STATISTIC(NumFunctions, "Total number of functions")
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
This class represents a sign extension of integer types.
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:176
Hexagon Common GEP
Value * getCondition() const
static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, Instruction *OnPathTo, DominatorTree *DT)
Return true if undefined behavior would provable be executed on the path to OnPathTo if Root produced...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
static ConstantRange makeAllowedICmpRegion(CmpInst::Predicate Pred, const ConstantRange &Other)
Produce the smallest range such that all values that may satisfy the given predicate with any value c...
This defines the Use class.
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
ReplaceExitVal
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:299
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:67
bool propagatesFullPoison(const Instruction *I)
Return true if this function can prove that I is guaranteed to yield full-poison (all bits poison) if...
static Value * genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, const SCEV *ExitCount, bool UsePostInc, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Insert an IR expression which computes the value held by the IV IndVar (which must be an loop counter...
INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", "Induction Variable Simplification", false, false) INITIALIZE_PASS_END(IndVarSimplifyLegacyPass
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1079
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:743
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
Interface for visiting interesting IV users that are recognized but not simplified by this utility...
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:133
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
bool isSigned() const
Definition: InstrTypes.h:902
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:637
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:831
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:897
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:368
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:439
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:739
This file implements a class to represent arbitrary precision integral constant values and operations...
BlockT * getHeader() const
Definition: LoopInfo.h:102
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:102
Key
PAL metadata keys.
void findDbgValues(SmallVectorImpl< DbgValueInst *> &DbgValues, Value *V)
Finds the llvm.dbg.value intrinsics describing a value.
Definition: Local.cpp:1528
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:692
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:181
This node represents a polynomial recurrence on the trip count of the specified loop.
ConstantRange intersectWith(const ConstantRange &CR, PreferredRangeType Type=Smallest) const
Return the range that results from the intersection of this range with another range.
static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB)
Whether the current loop exit test is based on this value.
This instruction compares its operands according to the predicate given to the constructor.
static Instruction * getInsertPointForUses(Instruction *User, Value *Def, DominatorTree *DT, LoopInfo *LI)
Determine the insertion point for this user.
static cl::opt< bool > VerifyIndvars("verify-indvars", cl::Hidden, cl::desc("Verify the ScalarEvolution result after running indvars"))
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Examine each PHI in the given block and delete it if it is dead.
const SCEV * getSizeOfExpr(Type *IntTy, Type *AllocTy)
Return an expression for sizeof AllocTy that is type IntTy.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:156
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1878
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:291
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:132
Value * getOperand(unsigned i) const
Definition: User.h:169
void initializeIndVarSimplifyLegacyPassPass(PassRegistry &)
void clearInsertPoint()
Clear the current insertion point.
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:105
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt...
Definition: PatternMatch.h:175
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:216
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:318
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
Conditional or Unconditional Branch instruction.
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value *> &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:652
Value * getIncomingValueForBlock(const BasicBlock *BB) const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
const Instruction & front() const
Definition: BasicBlock.h:280
#define H(x, y, z)
Definition: MD5.cpp:57
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:572
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
bool mustTriggerUB(const Instruction *I, const SmallSet< const Value *, 16 > &KnownPoison)
Return true if the given instruction must trigger undefined behavior.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:541
Represent the analysis usage information of a pass.
This file declares a class to represent arbitrary precision floating point values and provide a varie...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1199
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl...
Definition: Operator.h:66
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
op_range operands()
Definition: User.h:237
self_iterator getIterator()
Definition: ilist_node.h:81
Class to represent integer types.
Definition: DerivedTypes.h:40
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:434
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:75
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:34
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
static unsigned getIncomingValueNumForOperand(unsigned i)
size_t size() const
Definition: SmallVector.h:52
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:747
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1874
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
signed greater than
Definition: InstrTypes.h:759
#define DEBUG_TYPE
const APFloat & getValueAPF() const
Definition: Constants.h:302
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:736
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:112
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
Type * getType() const
Return the LLVM type of this SCEV expression.
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:308
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static ValueAsMetadata * get(Value *V)
Definition: Metadata.cpp:348
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:746
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
Module.h This file contains the declarations for the Module class.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:63
Provides information about what library functions are available for the current target.
This class represents a range of values.
Definition: ConstantRange.h:47
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
Definition: DataLayout.h:254
signed less than
Definition: InstrTypes.h:761
static PHINode * FindLoopCounter(Loop *L, BasicBlock *ExitingBB, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Search the loop header for a loop counter (anadd rec w/step of one) suitable for use by LFTR...
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:631
bool isConditional() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:587
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:237
static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI)
This IV user cannot be widened.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:859
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
signed less or equal
Definition: InstrTypes.h:762
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class for arbitrary precision integers.
Definition: APInt.h:69
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:399
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property...
Definition: Operator.h:95
This class uses information about analyze scalars to rewrite expressions in canonical form...
Represents analyses that only rely on functions&#39; control flow.
Definition: PassManager.h:114
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:627
LoopT * getParentLoop() const
Definition: LoopInfo.h:103
Virtual Register Rewriter
Definition: VirtRegMap.cpp:221
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:807
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:425
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:506
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:151
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
user_iterator_impl< User > user_iterator
Definition: Value.h:368
bool mayReadFromMemory() const
Return true if this instruction may read memory.
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:136
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:740
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
static bool isLoopCounter(PHINode *Phi, Loop *L, ScalarEvolution *SE)
Return true if the given phi is a "counter" in L.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:324
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>()) const
This is an approximation of reciprocal throughput of a math/logic op.
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:744
void setCondition(Value *V)
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetTransformInfo *TTI=nullptr)
replace congruent phis with their most canonical representative.
This class represents a cast from signed integer to floating point.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:375
bool isOne() const
Return true if the expression is a constant one.
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:735
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:353
LLVM Value Representation.
Definition: Value.h:72
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:745
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond)
Return true if this IV has any uses other than the (soon to be rewritten) loop exit test...
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:86
void disableCanonicalMode()
Disable the behavior of expanding expressions in canonical form rather than in a more literal form...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:583
void getUniqueExitBlocks(SmallVectorImpl< BlockT *> &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:115
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:412
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
This header defines various interfaces for pass management in LLVM.
void setIncomingValue(unsigned i, Value *V)
#define LLVM_DEBUG(X)
Definition: Debug.h:122
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:158
static cl::opt< bool > DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization"))
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:737
static bool needsLFTR(Loop *L, BasicBlock *ExitingBB)
linearFunctionTestReplace policy.
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool use_empty() const
Definition: Value.h:322
Pass * createIndVarSimplifyPass()
signed greater or equal
Definition: InstrTypes.h:760
IntegerType * Int32Ty
const BasicBlock * getParent() const
Definition: Instruction.h:66
This class represents a constant integer value.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property...
Definition: Operator.h:89
bool makeLoopInvariant(Value *V, bool &Changed, Instruction *InsertPt=nullptr, MemorySSAUpdater *MSSAU=nullptr) const
If the given value is an instruction inside of the loop and it can be hoisted, do so to make it trivi...
Definition: LoopInfo.cpp:71