LLVM  4.0.0
ScalarEvolution.cpp
Go to the documentation of this file.
1 //===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the implementation of the scalar evolution analysis
11 // engine, which is used primarily to analyze expressions involving induction
12 // variables in loops.
13 //
14 // There are several aspects to this library. First is the representation of
15 // scalar expressions, which are represented as subclasses of the SCEV class.
16 // These classes are used to represent certain types of subexpressions that we
17 // can handle. We only create one SCEV of a particular shape, so
18 // pointer-comparisons for equality are legal.
19 //
20 // One important aspect of the SCEV objects is that they are never cyclic, even
21 // if there is a cycle in the dataflow for an expression (ie, a PHI node). If
22 // the PHI node is one of the idioms that we can represent (e.g., a polynomial
23 // recurrence) then we represent it directly as a recurrence node, otherwise we
24 // represent it as a SCEVUnknown node.
25 //
26 // In addition to being able to represent expressions of various types, we also
27 // have folders that are used to build the *canonical* representation for a
28 // particular expression. These folders are capable of using a variety of
29 // rewrite rules to simplify the expressions.
30 //
31 // Once the folders are defined, we can implement the more interesting
32 // higher-level code, such as the code that recognizes PHI nodes of various
33 // types, computes the execution count of a loop, etc.
34 //
35 // TODO: We should use these routines and value representations to implement
36 // dependence analysis!
37 //
38 //===----------------------------------------------------------------------===//
39 //
40 // There are several good references for the techniques used in this analysis.
41 //
42 // Chains of recurrences -- a method to expedite the evaluation
43 // of closed-form functions
44 // Olaf Bachmann, Paul S. Wang, Eugene V. Zima
45 //
46 // On computational properties of chains of recurrences
47 // Eugene V. Zima
48 //
49 // Symbolic Evaluation of Chains of Recurrences for Loop Optimization
50 // Robert A. van Engelen
51 //
52 // Efficient Symbolic Analysis for Optimizing Compilers
53 // Robert A. van Engelen
54 //
55 // Using the chains of recurrences algebra for data dependence testing and
56 // induction variable substitution
57 // MS Thesis, Johnie Birch
58 //
59 //===----------------------------------------------------------------------===//
60 
62 #include "llvm/ADT/Optional.h"
63 #include "llvm/ADT/STLExtras.h"
64 #include "llvm/ADT/ScopeExit.h"
65 #include "llvm/ADT/Sequence.h"
66 #include "llvm/ADT/SmallPtrSet.h"
67 #include "llvm/ADT/Statistic.h"
71 #include "llvm/Analysis/LoopInfo.h"
75 #include "llvm/IR/ConstantRange.h"
76 #include "llvm/IR/Constants.h"
77 #include "llvm/IR/DataLayout.h"
78 #include "llvm/IR/DerivedTypes.h"
79 #include "llvm/IR/Dominators.h"
81 #include "llvm/IR/GlobalAlias.h"
82 #include "llvm/IR/GlobalVariable.h"
83 #include "llvm/IR/InstIterator.h"
84 #include "llvm/IR/Instructions.h"
85 #include "llvm/IR/LLVMContext.h"
86 #include "llvm/IR/Metadata.h"
87 #include "llvm/IR/Operator.h"
88 #include "llvm/IR/PatternMatch.h"
90 #include "llvm/Support/Debug.h"
95 #include <algorithm>
96 using namespace llvm;
97 
98 #define DEBUG_TYPE "scalar-evolution"
99 
100 STATISTIC(NumArrayLenItCounts,
101  "Number of trip counts computed with array length");
102 STATISTIC(NumTripCountsComputed,
103  "Number of loops with predictable loop counts");
104 STATISTIC(NumTripCountsNotComputed,
105  "Number of loops without predictable loop counts");
106 STATISTIC(NumBruteForceTripCountsComputed,
107  "Number of loops with trip counts computed by force");
108 
109 static cl::opt<unsigned>
110 MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
111  cl::desc("Maximum number of iterations SCEV will "
112  "symbolically execute a constant "
113  "derived loop"),
114  cl::init(100));
115 
116 // FIXME: Enable this with EXPENSIVE_CHECKS when the test suite is clean.
117 static cl::opt<bool>
118 VerifySCEV("verify-scev",
119  cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
120 static cl::opt<bool>
121  VerifySCEVMap("verify-scev-maps",
122  cl::desc("Verify no dangling value in ScalarEvolution's "
123  "ExprValueMap (slow)"));
124 
126  "scev-mulops-inline-threshold", cl::Hidden,
127  cl::desc("Threshold for inlining multiplication operands into a SCEV"),
128  cl::init(1000));
129 
131  "scalar-evolution-max-scev-compare-depth", cl::Hidden,
132  cl::desc("Maximum depth of recursive SCEV complexity comparisons"),
133  cl::init(32));
134 
136  "scalar-evolution-max-value-compare-depth", cl::Hidden,
137  cl::desc("Maximum depth of recursive value complexity comparisons"),
138  cl::init(2));
139 
140 //===----------------------------------------------------------------------===//
141 // SCEV class definitions
142 //===----------------------------------------------------------------------===//
143 
144 //===----------------------------------------------------------------------===//
145 // Implementation of the SCEV class.
146 //
147 
149 void SCEV::dump() const {
150  print(dbgs());
151  dbgs() << '\n';
152 }
153 
154 void SCEV::print(raw_ostream &OS) const {
155  switch (static_cast<SCEVTypes>(getSCEVType())) {
156  case scConstant:
157  cast<SCEVConstant>(this)->getValue()->printAsOperand(OS, false);
158  return;
159  case scTruncate: {
160  const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
161  const SCEV *Op = Trunc->getOperand();
162  OS << "(trunc " << *Op->getType() << " " << *Op << " to "
163  << *Trunc->getType() << ")";
164  return;
165  }
166  case scZeroExtend: {
167  const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this);
168  const SCEV *Op = ZExt->getOperand();
169  OS << "(zext " << *Op->getType() << " " << *Op << " to "
170  << *ZExt->getType() << ")";
171  return;
172  }
173  case scSignExtend: {
174  const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this);
175  const SCEV *Op = SExt->getOperand();
176  OS << "(sext " << *Op->getType() << " " << *Op << " to "
177  << *SExt->getType() << ")";
178  return;
179  }
180  case scAddRecExpr: {
181  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this);
182  OS << "{" << *AR->getOperand(0);
183  for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i)
184  OS << ",+," << *AR->getOperand(i);
185  OS << "}<";
186  if (AR->hasNoUnsignedWrap())
187  OS << "nuw><";
188  if (AR->hasNoSignedWrap())
189  OS << "nsw><";
190  if (AR->hasNoSelfWrap() &&
192  OS << "nw><";
193  AR->getLoop()->getHeader()->printAsOperand(OS, /*PrintType=*/false);
194  OS << ">";
195  return;
196  }
197  case scAddExpr:
198  case scMulExpr:
199  case scUMaxExpr:
200  case scSMaxExpr: {
201  const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
202  const char *OpStr = nullptr;
203  switch (NAry->getSCEVType()) {
204  case scAddExpr: OpStr = " + "; break;
205  case scMulExpr: OpStr = " * "; break;
206  case scUMaxExpr: OpStr = " umax "; break;
207  case scSMaxExpr: OpStr = " smax "; break;
208  }
209  OS << "(";
210  for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
211  I != E; ++I) {
212  OS << **I;
213  if (std::next(I) != E)
214  OS << OpStr;
215  }
216  OS << ")";
217  switch (NAry->getSCEVType()) {
218  case scAddExpr:
219  case scMulExpr:
220  if (NAry->hasNoUnsignedWrap())
221  OS << "<nuw>";
222  if (NAry->hasNoSignedWrap())
223  OS << "<nsw>";
224  }
225  return;
226  }
227  case scUDivExpr: {
228  const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this);
229  OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")";
230  return;
231  }
232  case scUnknown: {
233  const SCEVUnknown *U = cast<SCEVUnknown>(this);
234  Type *AllocTy;
235  if (U->isSizeOf(AllocTy)) {
236  OS << "sizeof(" << *AllocTy << ")";
237  return;
238  }
239  if (U->isAlignOf(AllocTy)) {
240  OS << "alignof(" << *AllocTy << ")";
241  return;
242  }
243 
244  Type *CTy;
245  Constant *FieldNo;
246  if (U->isOffsetOf(CTy, FieldNo)) {
247  OS << "offsetof(" << *CTy << ", ";
248  FieldNo->printAsOperand(OS, false);
249  OS << ")";
250  return;
251  }
252 
253  // Otherwise just print it normally.
254  U->getValue()->printAsOperand(OS, false);
255  return;
256  }
257  case scCouldNotCompute:
258  OS << "***COULDNOTCOMPUTE***";
259  return;
260  }
261  llvm_unreachable("Unknown SCEV kind!");
262 }
263 
264 Type *SCEV::getType() const {
265  switch (static_cast<SCEVTypes>(getSCEVType())) {
266  case scConstant:
267  return cast<SCEVConstant>(this)->getType();
268  case scTruncate:
269  case scZeroExtend:
270  case scSignExtend:
271  return cast<SCEVCastExpr>(this)->getType();
272  case scAddRecExpr:
273  case scMulExpr:
274  case scUMaxExpr:
275  case scSMaxExpr:
276  return cast<SCEVNAryExpr>(this)->getType();
277  case scAddExpr:
278  return cast<SCEVAddExpr>(this)->getType();
279  case scUDivExpr:
280  return cast<SCEVUDivExpr>(this)->getType();
281  case scUnknown:
282  return cast<SCEVUnknown>(this)->getType();
283  case scCouldNotCompute:
284  llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
285  }
286  llvm_unreachable("Unknown SCEV kind!");
287 }
288 
289 bool SCEV::isZero() const {
290  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
291  return SC->getValue()->isZero();
292  return false;
293 }
294 
295 bool SCEV::isOne() const {
296  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
297  return SC->getValue()->isOne();
298  return false;
299 }
300 
301 bool SCEV::isAllOnesValue() const {
302  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
303  return SC->getValue()->isAllOnesValue();
304  return false;
305 }
306 
308  const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this);
309  if (!Mul) return false;
310 
311  // If there is a constant factor, it will be first.
312  const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
313  if (!SC) return false;
314 
315  // Return true if the value is negative, this matches things like (-42 * V).
316  return SC->getAPInt().isNegative();
317 }
318 
321 
323  return S->getSCEVType() == scCouldNotCompute;
324 }
325 
329  ID.AddPointer(V);
330  void *IP = nullptr;
331  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
332  SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
333  UniqueSCEVs.InsertNode(S, IP);
334  return S;
335 }
336 
338  return getConstant(ConstantInt::get(getContext(), Val));
339 }
340 
341 const SCEV *
342 ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
343  IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
344  return getConstant(ConstantInt::get(ITy, V, isSigned));
345 }
346 
348  unsigned SCEVTy, const SCEV *op, Type *ty)
349  : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
350 
351 SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
352  const SCEV *op, Type *ty)
353  : SCEVCastExpr(ID, scTruncate, op, ty) {
354  assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
355  (Ty->isIntegerTy() || Ty->isPointerTy()) &&
356  "Cannot truncate non-integer value!");
357 }
358 
359 SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
360  const SCEV *op, Type *ty)
361  : SCEVCastExpr(ID, scZeroExtend, op, ty) {
362  assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
363  (Ty->isIntegerTy() || Ty->isPointerTy()) &&
364  "Cannot zero extend non-integer value!");
365 }
366 
367 SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
368  const SCEV *op, Type *ty)
369  : SCEVCastExpr(ID, scSignExtend, op, ty) {
370  assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
371  (Ty->isIntegerTy() || Ty->isPointerTy()) &&
372  "Cannot sign extend non-integer value!");
373 }
374 
375 void SCEVUnknown::deleted() {
376  // Clear this SCEVUnknown from various maps.
377  SE->forgetMemoizedResults(this);
378 
379  // Remove this SCEVUnknown from the uniquing map.
380  SE->UniqueSCEVs.RemoveNode(this);
381 
382  // Release the value.
383  setValPtr(nullptr);
384 }
385 
386 void SCEVUnknown::allUsesReplacedWith(Value *New) {
387  // Clear this SCEVUnknown from various maps.
388  SE->forgetMemoizedResults(this);
389 
390  // Remove this SCEVUnknown from the uniquing map.
391  SE->UniqueSCEVs.RemoveNode(this);
392 
393  // Update this SCEVUnknown to point to the new value. This is needed
394  // because there may still be outstanding SCEVs which still point to
395  // this SCEVUnknown.
396  setValPtr(New);
397 }
398 
399 bool SCEVUnknown::isSizeOf(Type *&AllocTy) const {
400  if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
401  if (VCE->getOpcode() == Instruction::PtrToInt)
402  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
403  if (CE->getOpcode() == Instruction::GetElementPtr &&
404  CE->getOperand(0)->isNullValue() &&
405  CE->getNumOperands() == 2)
406  if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1)))
407  if (CI->isOne()) {
408  AllocTy = cast<PointerType>(CE->getOperand(0)->getType())
409  ->getElementType();
410  return true;
411  }
412 
413  return false;
414 }
415 
416 bool SCEVUnknown::isAlignOf(Type *&AllocTy) const {
417  if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
418  if (VCE->getOpcode() == Instruction::PtrToInt)
419  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
420  if (CE->getOpcode() == Instruction::GetElementPtr &&
421  CE->getOperand(0)->isNullValue()) {
422  Type *Ty =
423  cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
424  if (StructType *STy = dyn_cast<StructType>(Ty))
425  if (!STy->isPacked() &&
426  CE->getNumOperands() == 3 &&
427  CE->getOperand(1)->isNullValue()) {
428  if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
429  if (CI->isOne() &&
430  STy->getNumElements() == 2 &&
431  STy->getElementType(0)->isIntegerTy(1)) {
432  AllocTy = STy->getElementType(1);
433  return true;
434  }
435  }
436  }
437 
438  return false;
439 }
440 
441 bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
442  if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
443  if (VCE->getOpcode() == Instruction::PtrToInt)
444  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
445  if (CE->getOpcode() == Instruction::GetElementPtr &&
446  CE->getNumOperands() == 3 &&
447  CE->getOperand(0)->isNullValue() &&
448  CE->getOperand(1)->isNullValue()) {
449  Type *Ty =
450  cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
451  // Ignore vector types here so that ScalarEvolutionExpander doesn't
452  // emit getelementptrs that index into vectors.
453  if (Ty->isStructTy() || Ty->isArrayTy()) {
454  CTy = Ty;
455  FieldNo = CE->getOperand(2);
456  return true;
457  }
458  }
459 
460  return false;
461 }
462 
463 //===----------------------------------------------------------------------===//
464 // SCEV Utilities
465 //===----------------------------------------------------------------------===//
466 
467 /// Compare the two values \p LV and \p RV in terms of their "complexity" where
468 /// "complexity" is a partial (and somewhat ad-hoc) relation used to order
469 /// operands in SCEV expressions. \p EqCache is a set of pairs of values that
470 /// have been previously deemed to be "equally complex" by this routine. It is
471 /// intended to avoid exponential time complexity in cases like:
472 ///
473 /// %a = f(%x, %y)
474 /// %b = f(%a, %a)
475 /// %c = f(%b, %b)
476 ///
477 /// %d = f(%x, %y)
478 /// %e = f(%d, %d)
479 /// %f = f(%e, %e)
480 ///
481 /// CompareValueComplexity(%f, %c)
482 ///
483 /// Since we do not continue running this routine on expression trees once we
484 /// have seen unequal values, there is no need to track them in the cache.
485 static int
486 CompareValueComplexity(SmallSet<std::pair<Value *, Value *>, 8> &EqCache,
487  const LoopInfo *const LI, Value *LV, Value *RV,
488  unsigned Depth) {
489  if (Depth > MaxValueCompareDepth || EqCache.count({LV, RV}))
490  return 0;
491 
492  // Order pointer values after integer values. This helps SCEVExpander form
493  // GEPs.
494  bool LIsPointer = LV->getType()->isPointerTy(),
495  RIsPointer = RV->getType()->isPointerTy();
496  if (LIsPointer != RIsPointer)
497  return (int)LIsPointer - (int)RIsPointer;
498 
499  // Compare getValueID values.
500  unsigned LID = LV->getValueID(), RID = RV->getValueID();
501  if (LID != RID)
502  return (int)LID - (int)RID;
503 
504  // Sort arguments by their position.
505  if (const auto *LA = dyn_cast<Argument>(LV)) {
506  const auto *RA = cast<Argument>(RV);
507  unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
508  return (int)LArgNo - (int)RArgNo;
509  }
510 
511  if (const auto *LGV = dyn_cast<GlobalValue>(LV)) {
512  const auto *RGV = cast<GlobalValue>(RV);
513 
514  const auto IsGVNameSemantic = [&](const GlobalValue *GV) {
515  auto LT = GV->getLinkage();
516  return !(GlobalValue::isPrivateLinkage(LT) ||
518  };
519 
520  // Use the names to distinguish the two values, but only if the
521  // names are semantically important.
522  if (IsGVNameSemantic(LGV) && IsGVNameSemantic(RGV))
523  return LGV->getName().compare(RGV->getName());
524  }
525 
526  // For instructions, compare their loop depth, and their operand count. This
527  // is pretty loose.
528  if (const auto *LInst = dyn_cast<Instruction>(LV)) {
529  const auto *RInst = cast<Instruction>(RV);
530 
531  // Compare loop depths.
532  const BasicBlock *LParent = LInst->getParent(),
533  *RParent = RInst->getParent();
534  if (LParent != RParent) {
535  unsigned LDepth = LI->getLoopDepth(LParent),
536  RDepth = LI->getLoopDepth(RParent);
537  if (LDepth != RDepth)
538  return (int)LDepth - (int)RDepth;
539  }
540 
541  // Compare the number of operands.
542  unsigned LNumOps = LInst->getNumOperands(),
543  RNumOps = RInst->getNumOperands();
544  if (LNumOps != RNumOps)
545  return (int)LNumOps - (int)RNumOps;
546 
547  for (unsigned Idx : seq(0u, LNumOps)) {
548  int Result =
549  CompareValueComplexity(EqCache, LI, LInst->getOperand(Idx),
550  RInst->getOperand(Idx), Depth + 1);
551  if (Result != 0)
552  return Result;
553  }
554  }
555 
556  EqCache.insert({LV, RV});
557  return 0;
558 }
559 
560 // Return negative, zero, or positive, if LHS is less than, equal to, or greater
561 // than RHS, respectively. A three-way result allows recursive comparisons to be
562 // more efficient.
564  SmallSet<std::pair<const SCEV *, const SCEV *>, 8> &EqCacheSCEV,
565  const LoopInfo *const LI, const SCEV *LHS, const SCEV *RHS,
566  unsigned Depth = 0) {
567  // Fast-path: SCEVs are uniqued so we can do a quick equality check.
568  if (LHS == RHS)
569  return 0;
570 
571  // Primarily, sort the SCEVs by their getSCEVType().
572  unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
573  if (LType != RType)
574  return (int)LType - (int)RType;
575 
576  if (Depth > MaxSCEVCompareDepth || EqCacheSCEV.count({LHS, RHS}))
577  return 0;
578  // Aside from the getSCEVType() ordering, the particular ordering
579  // isn't very important except that it's beneficial to be consistent,
580  // so that (a + b) and (b + a) don't end up as different expressions.
581  switch (static_cast<SCEVTypes>(LType)) {
582  case scUnknown: {
583  const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
584  const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
585 
587  int X = CompareValueComplexity(EqCache, LI, LU->getValue(), RU->getValue(),
588  Depth + 1);
589  if (X == 0)
590  EqCacheSCEV.insert({LHS, RHS});
591  return X;
592  }
593 
594  case scConstant: {
595  const SCEVConstant *LC = cast<SCEVConstant>(LHS);
596  const SCEVConstant *RC = cast<SCEVConstant>(RHS);
597 
598  // Compare constant values.
599  const APInt &LA = LC->getAPInt();
600  const APInt &RA = RC->getAPInt();
601  unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
602  if (LBitWidth != RBitWidth)
603  return (int)LBitWidth - (int)RBitWidth;
604  return LA.ult(RA) ? -1 : 1;
605  }
606 
607  case scAddRecExpr: {
608  const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
609  const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
610 
611  // Compare addrec loop depths.
612  const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
613  if (LLoop != RLoop) {
614  unsigned LDepth = LLoop->getLoopDepth(), RDepth = RLoop->getLoopDepth();
615  if (LDepth != RDepth)
616  return (int)LDepth - (int)RDepth;
617  }
618 
619  // Addrec complexity grows with operand count.
620  unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
621  if (LNumOps != RNumOps)
622  return (int)LNumOps - (int)RNumOps;
623 
624  // Lexicographically compare.
625  for (unsigned i = 0; i != LNumOps; ++i) {
626  int X = CompareSCEVComplexity(EqCacheSCEV, LI, LA->getOperand(i),
627  RA->getOperand(i), Depth + 1);
628  if (X != 0)
629  return X;
630  }
631  EqCacheSCEV.insert({LHS, RHS});
632  return 0;
633  }
634 
635  case scAddExpr:
636  case scMulExpr:
637  case scSMaxExpr:
638  case scUMaxExpr: {
639  const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
640  const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
641 
642  // Lexicographically compare n-ary expressions.
643  unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
644  if (LNumOps != RNumOps)
645  return (int)LNumOps - (int)RNumOps;
646 
647  for (unsigned i = 0; i != LNumOps; ++i) {
648  if (i >= RNumOps)
649  return 1;
650  int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(i),
651  RC->getOperand(i), Depth + 1);
652  if (X != 0)
653  return X;
654  }
655  EqCacheSCEV.insert({LHS, RHS});
656  return 0;
657  }
658 
659  case scUDivExpr: {
660  const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
661  const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
662 
663  // Lexicographically compare udiv expressions.
664  int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getLHS(), RC->getLHS(),
665  Depth + 1);
666  if (X != 0)
667  return X;
668  X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getRHS(), RC->getRHS(),
669  Depth + 1);
670  if (X == 0)
671  EqCacheSCEV.insert({LHS, RHS});
672  return X;
673  }
674 
675  case scTruncate:
676  case scZeroExtend:
677  case scSignExtend: {
678  const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
679  const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
680 
681  // Compare cast expressions by operand.
682  int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(),
683  RC->getOperand(), Depth + 1);
684  if (X == 0)
685  EqCacheSCEV.insert({LHS, RHS});
686  return X;
687  }
688 
689  case scCouldNotCompute:
690  llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
691  }
692  llvm_unreachable("Unknown SCEV kind!");
693 }
694 
695 /// Given a list of SCEV objects, order them by their complexity, and group
696 /// objects of the same complexity together by value. When this routine is
697 /// finished, we know that any duplicates in the vector are consecutive and that
698 /// complexity is monotonically increasing.
699 ///
700 /// Note that we go take special precautions to ensure that we get deterministic
701 /// results from this routine. In other words, we don't want the results of
702 /// this to depend on where the addresses of various SCEV objects happened to
703 /// land in memory.
704 ///
706  LoopInfo *LI) {
707  if (Ops.size() < 2) return; // Noop
708 
710  if (Ops.size() == 2) {
711  // This is the common case, which also happens to be trivially simple.
712  // Special case it.
713  const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
714  if (CompareSCEVComplexity(EqCache, LI, RHS, LHS) < 0)
715  std::swap(LHS, RHS);
716  return;
717  }
718 
719  // Do the rough sort by complexity.
720  std::stable_sort(Ops.begin(), Ops.end(),
721  [&EqCache, LI](const SCEV *LHS, const SCEV *RHS) {
722  return CompareSCEVComplexity(EqCache, LI, LHS, RHS) < 0;
723  });
724 
725  // Now that we are sorted by complexity, group elements of the same
726  // complexity. Note that this is, at worst, N^2, but the vector is likely to
727  // be extremely short in practice. Note that we take this approach because we
728  // do not want to depend on the addresses of the objects we are grouping.
729  for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) {
730  const SCEV *S = Ops[i];
731  unsigned Complexity = S->getSCEVType();
732 
733  // If there are any objects of the same complexity and same value as this
734  // one, group them.
735  for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) {
736  if (Ops[j] == S) { // Found a duplicate.
737  // Move it to immediately after i'th element.
738  std::swap(Ops[i+1], Ops[j]);
739  ++i; // no need to rescan it.
740  if (i == e-2) return; // Done!
741  }
742  }
743  }
744 }
745 
746 // Returns the size of the SCEV S.
747 static inline int sizeOfSCEV(const SCEV *S) {
748  struct FindSCEVSize {
749  int Size;
750  FindSCEVSize() : Size(0) {}
751 
752  bool follow(const SCEV *S) {
753  ++Size;
754  // Keep looking at all operands of S.
755  return true;
756  }
757  bool isDone() const {
758  return false;
759  }
760  };
761 
762  FindSCEVSize F;
764  ST.visitAll(S);
765  return F.Size;
766 }
767 
768 namespace {
769 
770 struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> {
771 public:
772  // Computes the Quotient and Remainder of the division of Numerator by
773  // Denominator.
774  static void divide(ScalarEvolution &SE, const SCEV *Numerator,
775  const SCEV *Denominator, const SCEV **Quotient,
776  const SCEV **Remainder) {
777  assert(Numerator && Denominator && "Uninitialized SCEV");
778 
779  SCEVDivision D(SE, Numerator, Denominator);
780 
781  // Check for the trivial case here to avoid having to check for it in the
782  // rest of the code.
783  if (Numerator == Denominator) {
784  *Quotient = D.One;
785  *Remainder = D.Zero;
786  return;
787  }
788 
789  if (Numerator->isZero()) {
790  *Quotient = D.Zero;
791  *Remainder = D.Zero;
792  return;
793  }
794 
795  // A simple case when N/1. The quotient is N.
796  if (Denominator->isOne()) {
797  *Quotient = Numerator;
798  *Remainder = D.Zero;
799  return;
800  }
801 
802  // Split the Denominator when it is a product.
803  if (const SCEVMulExpr *T = dyn_cast<SCEVMulExpr>(Denominator)) {
804  const SCEV *Q, *R;
805  *Quotient = Numerator;
806  for (const SCEV *Op : T->operands()) {
807  divide(SE, *Quotient, Op, &Q, &R);
808  *Quotient = Q;
809 
810  // Bail out when the Numerator is not divisible by one of the terms of
811  // the Denominator.
812  if (!R->isZero()) {
813  *Quotient = D.Zero;
814  *Remainder = Numerator;
815  return;
816  }
817  }
818  *Remainder = D.Zero;
819  return;
820  }
821 
822  D.visit(Numerator);
823  *Quotient = D.Quotient;
824  *Remainder = D.Remainder;
825  }
826 
827  // Except in the trivial case described above, we do not know how to divide
828  // Expr by Denominator for the following functions with empty implementation.
829  void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {}
830  void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {}
831  void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {}
832  void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
833  void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
834  void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
835  void visitUnknown(const SCEVUnknown *Numerator) {}
836  void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
837 
838  void visitConstant(const SCEVConstant *Numerator) {
839  if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
840  APInt NumeratorVal = Numerator->getAPInt();
841  APInt DenominatorVal = D->getAPInt();
842  uint32_t NumeratorBW = NumeratorVal.getBitWidth();
843  uint32_t DenominatorBW = DenominatorVal.getBitWidth();
844 
845  if (NumeratorBW > DenominatorBW)
846  DenominatorVal = DenominatorVal.sext(NumeratorBW);
847  else if (NumeratorBW < DenominatorBW)
848  NumeratorVal = NumeratorVal.sext(DenominatorBW);
849 
850  APInt QuotientVal(NumeratorVal.getBitWidth(), 0);
851  APInt RemainderVal(NumeratorVal.getBitWidth(), 0);
852  APInt::sdivrem(NumeratorVal, DenominatorVal, QuotientVal, RemainderVal);
853  Quotient = SE.getConstant(QuotientVal);
854  Remainder = SE.getConstant(RemainderVal);
855  return;
856  }
857  }
858 
859  void visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
860  const SCEV *StartQ, *StartR, *StepQ, *StepR;
861  if (!Numerator->isAffine())
862  return cannotDivide(Numerator);
863  divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);
864  divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);
865  // Bail out if the types do not match.
866  Type *Ty = Denominator->getType();
867  if (Ty != StartQ->getType() || Ty != StartR->getType() ||
868  Ty != StepQ->getType() || Ty != StepR->getType())
869  return cannotDivide(Numerator);
870  Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
871  Numerator->getNoWrapFlags());
872  Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
873  Numerator->getNoWrapFlags());
874  }
875 
876  void visitAddExpr(const SCEVAddExpr *Numerator) {
878  Type *Ty = Denominator->getType();
879 
880  for (const SCEV *Op : Numerator->operands()) {
881  const SCEV *Q, *R;
882  divide(SE, Op, Denominator, &Q, &R);
883 
884  // Bail out if types do not match.
885  if (Ty != Q->getType() || Ty != R->getType())
886  return cannotDivide(Numerator);
887 
888  Qs.push_back(Q);
889  Rs.push_back(R);
890  }
891 
892  if (Qs.size() == 1) {
893  Quotient = Qs[0];
894  Remainder = Rs[0];
895  return;
896  }
897 
898  Quotient = SE.getAddExpr(Qs);
899  Remainder = SE.getAddExpr(Rs);
900  }
901 
902  void visitMulExpr(const SCEVMulExpr *Numerator) {
904  Type *Ty = Denominator->getType();
905 
906  bool FoundDenominatorTerm = false;
907  for (const SCEV *Op : Numerator->operands()) {
908  // Bail out if types do not match.
909  if (Ty != Op->getType())
910  return cannotDivide(Numerator);
911 
912  if (FoundDenominatorTerm) {
913  Qs.push_back(Op);
914  continue;
915  }
916 
917  // Check whether Denominator divides one of the product operands.
918  const SCEV *Q, *R;
919  divide(SE, Op, Denominator, &Q, &R);
920  if (!R->isZero()) {
921  Qs.push_back(Op);
922  continue;
923  }
924 
925  // Bail out if types do not match.
926  if (Ty != Q->getType())
927  return cannotDivide(Numerator);
928 
929  FoundDenominatorTerm = true;
930  Qs.push_back(Q);
931  }
932 
933  if (FoundDenominatorTerm) {
934  Remainder = Zero;
935  if (Qs.size() == 1)
936  Quotient = Qs[0];
937  else
938  Quotient = SE.getMulExpr(Qs);
939  return;
940  }
941 
942  if (!isa<SCEVUnknown>(Denominator))
943  return cannotDivide(Numerator);
944 
945  // The Remainder is obtained by replacing Denominator by 0 in Numerator.
946  ValueToValueMap RewriteMap;
947  RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
948  cast<SCEVConstant>(Zero)->getValue();
949  Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
950 
951  if (Remainder->isZero()) {
952  // The Quotient is obtained by replacing Denominator by 1 in Numerator.
953  RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
954  cast<SCEVConstant>(One)->getValue();
955  Quotient =
956  SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
957  return;
958  }
959 
960  // Quotient is (Numerator - Remainder) divided by Denominator.
961  const SCEV *Q, *R;
962  const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
963  // This SCEV does not seem to simplify: fail the division here.
964  if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator))
965  return cannotDivide(Numerator);
966  divide(SE, Diff, Denominator, &Q, &R);
967  if (R != Zero)
968  return cannotDivide(Numerator);
969  Quotient = Q;
970  }
971 
972 private:
973  SCEVDivision(ScalarEvolution &S, const SCEV *Numerator,
974  const SCEV *Denominator)
975  : SE(S), Denominator(Denominator) {
976  Zero = SE.getZero(Denominator->getType());
977  One = SE.getOne(Denominator->getType());
978 
979  // We generally do not know how to divide Expr by Denominator. We
980  // initialize the division to a "cannot divide" state to simplify the rest
981  // of the code.
982  cannotDivide(Numerator);
983  }
984 
985  // Convenience function for giving up on the division. We set the quotient to
986  // be equal to zero and the remainder to be equal to the numerator.
987  void cannotDivide(const SCEV *Numerator) {
988  Quotient = Zero;
989  Remainder = Numerator;
990  }
991 
992  ScalarEvolution &SE;
993  const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One;
994 };
995 
996 }
997 
998 //===----------------------------------------------------------------------===//
999 // Simple SCEV method implementations
1000 //===----------------------------------------------------------------------===//
1001 
1002 /// Compute BC(It, K). The result has width W. Assume, K > 0.
1003 static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
1004  ScalarEvolution &SE,
1005  Type *ResultTy) {
1006  // Handle the simplest case efficiently.
1007  if (K == 1)
1008  return SE.getTruncateOrZeroExtend(It, ResultTy);
1009 
1010  // We are using the following formula for BC(It, K):
1011  //
1012  // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K!
1013  //
1014  // Suppose, W is the bitwidth of the return value. We must be prepared for
1015  // overflow. Hence, we must assure that the result of our computation is
1016  // equal to the accurate one modulo 2^W. Unfortunately, division isn't
1017  // safe in modular arithmetic.
1018  //
1019  // However, this code doesn't use exactly that formula; the formula it uses
1020  // is something like the following, where T is the number of factors of 2 in
1021  // K! (i.e. trailing zeros in the binary representation of K!), and ^ is
1022  // exponentiation:
1023  //
1024  // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T)
1025  //
1026  // This formula is trivially equivalent to the previous formula. However,
1027  // this formula can be implemented much more efficiently. The trick is that
1028  // K! / 2^T is odd, and exact division by an odd number *is* safe in modular
1029  // arithmetic. To do exact division in modular arithmetic, all we have
1030  // to do is multiply by the inverse. Therefore, this step can be done at
1031  // width W.
1032  //
1033  // The next issue is how to safely do the division by 2^T. The way this
1034  // is done is by doing the multiplication step at a width of at least W + T
1035  // bits. This way, the bottom W+T bits of the product are accurate. Then,
1036  // when we perform the division by 2^T (which is equivalent to a right shift
1037  // by T), the bottom W bits are accurate. Extra bits are okay; they'll get
1038  // truncated out after the division by 2^T.
1039  //
1040  // In comparison to just directly using the first formula, this technique
1041  // is much more efficient; using the first formula requires W * K bits,
1042  // but this formula less than W + K bits. Also, the first formula requires
1043  // a division step, whereas this formula only requires multiplies and shifts.
1044  //
1045  // It doesn't matter whether the subtraction step is done in the calculation
1046  // width or the input iteration count's width; if the subtraction overflows,
1047  // the result must be zero anyway. We prefer here to do it in the width of
1048  // the induction variable because it helps a lot for certain cases; CodeGen
1049  // isn't smart enough to ignore the overflow, which leads to much less
1050  // efficient code if the width of the subtraction is wider than the native
1051  // register width.
1052  //
1053  // (It's possible to not widen at all by pulling out factors of 2 before
1054  // the multiplication; for example, K=2 can be calculated as
1055  // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires
1056  // extra arithmetic, so it's not an obvious win, and it gets
1057  // much more complicated for K > 3.)
1058 
1059  // Protection from insane SCEVs; this bound is conservative,
1060  // but it probably doesn't matter.
1061  if (K > 1000)
1062  return SE.getCouldNotCompute();
1063 
1064  unsigned W = SE.getTypeSizeInBits(ResultTy);
1065 
1066  // Calculate K! / 2^T and T; we divide out the factors of two before
1067  // multiplying for calculating K! / 2^T to avoid overflow.
1068  // Other overflow doesn't matter because we only care about the bottom
1069  // W bits of the result.
1070  APInt OddFactorial(W, 1);
1071  unsigned T = 1;
1072  for (unsigned i = 3; i <= K; ++i) {
1073  APInt Mult(W, i);
1074  unsigned TwoFactors = Mult.countTrailingZeros();
1075  T += TwoFactors;
1076  Mult = Mult.lshr(TwoFactors);
1077  OddFactorial *= Mult;
1078  }
1079 
1080  // We need at least W + T bits for the multiplication step
1081  unsigned CalculationBits = W + T;
1082 
1083  // Calculate 2^T, at width T+W.
1084  APInt DivFactor = APInt::getOneBitSet(CalculationBits, T);
1085 
1086  // Calculate the multiplicative inverse of K! / 2^T;
1087  // this multiplication factor will perform the exact division by
1088  // K! / 2^T.
1089  APInt Mod = APInt::getSignedMinValue(W+1);
1090  APInt MultiplyFactor = OddFactorial.zext(W+1);
1091  MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod);
1092  MultiplyFactor = MultiplyFactor.trunc(W);
1093 
1094  // Calculate the product, at width T+W
1095  IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
1096  CalculationBits);
1097  const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
1098  for (unsigned i = 1; i != K; ++i) {
1099  const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i));
1100  Dividend = SE.getMulExpr(Dividend,
1101  SE.getTruncateOrZeroExtend(S, CalculationTy));
1102  }
1103 
1104  // Divide by 2^T
1105  const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
1106 
1107  // Truncate the result, and divide by K! / 2^T.
1108 
1109  return SE.getMulExpr(SE.getConstant(MultiplyFactor),
1110  SE.getTruncateOrZeroExtend(DivResult, ResultTy));
1111 }
1112 
1113 /// Return the value of this chain of recurrences at the specified iteration
1114 /// number. We can evaluate this recurrence by multiplying each element in the
1115 /// chain by the binomial coefficient corresponding to it. In other words, we
1116 /// can evaluate {A,+,B,+,C,+,D} as:
1117 ///
1118 /// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3)
1119 ///
1120 /// where BC(It, k) stands for binomial coefficient.
1121 ///
1122 const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
1123  ScalarEvolution &SE) const {
1124  const SCEV *Result = getStart();
1125  for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
1126  // The computation is correct in the face of overflow provided that the
1127  // multiplication is performed _after_ the evaluation of the binomial
1128  // coefficient.
1129  const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType());
1130  if (isa<SCEVCouldNotCompute>(Coeff))
1131  return Coeff;
1132 
1133  Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff));
1134  }
1135  return Result;
1136 }
1137 
1138 //===----------------------------------------------------------------------===//
1139 // SCEV Expression folder implementations
1140 //===----------------------------------------------------------------------===//
1141 
1143  Type *Ty) {
1145  "This is not a truncating conversion!");
1146  assert(isSCEVable(Ty) &&
1147  "This is not a conversion to a SCEVable type!");
1148  Ty = getEffectiveSCEVType(Ty);
1149 
1151  ID.AddInteger(scTruncate);
1152  ID.AddPointer(Op);
1153  ID.AddPointer(Ty);
1154  void *IP = nullptr;
1155  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
1156 
1157  // Fold if the operand is constant.
1158  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
1159  return getConstant(
1160  cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty)));
1161 
1162  // trunc(trunc(x)) --> trunc(x)
1163  if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
1164  return getTruncateExpr(ST->getOperand(), Ty);
1165 
1166  // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
1167  if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
1168  return getTruncateOrSignExtend(SS->getOperand(), Ty);
1169 
1170  // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
1171  if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
1172  return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
1173 
1174  // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can
1175  // eliminate all the truncates, or we replace other casts with truncates.
1176  if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) {
1178  bool hasTrunc = false;
1179  for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) {
1180  const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty);
1181  if (!isa<SCEVCastExpr>(SA->getOperand(i)))
1182  hasTrunc = isa<SCEVTruncateExpr>(S);
1183  Operands.push_back(S);
1184  }
1185  if (!hasTrunc)
1186  return getAddExpr(Operands);
1187  UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL.
1188  }
1189 
1190  // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
1191  // eliminate all the truncates, or we replace other casts with truncates.
1192  if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) {
1194  bool hasTrunc = false;
1195  for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) {
1196  const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty);
1197  if (!isa<SCEVCastExpr>(SM->getOperand(i)))
1198  hasTrunc = isa<SCEVTruncateExpr>(S);
1199  Operands.push_back(S);
1200  }
1201  if (!hasTrunc)
1202  return getMulExpr(Operands);
1203  UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL.
1204  }
1205 
1206  // If the input value is a chrec scev, truncate the chrec's operands.
1207  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
1209  for (const SCEV *Op : AddRec->operands())
1210  Operands.push_back(getTruncateExpr(Op, Ty));
1211  return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
1212  }
1213 
1214  // The cast wasn't folded; create an explicit cast node. We can reuse
1215  // the existing insert position since if we get here, we won't have
1216  // made any changes which would invalidate it.
1217  SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
1218  Op, Ty);
1219  UniqueSCEVs.InsertNode(S, IP);
1220  return S;
1221 }
1222 
1223 // Get the limit of a recurrence such that incrementing by Step cannot cause
1224 // signed overflow as long as the value of the recurrence within the
1225 // loop does not exceed this limit before incrementing.
1226 static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step,
1227  ICmpInst::Predicate *Pred,
1228  ScalarEvolution *SE) {
1229  unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
1230  if (SE->isKnownPositive(Step)) {
1231  *Pred = ICmpInst::ICMP_SLT;
1232  return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
1233  SE->getSignedRange(Step).getSignedMax());
1234  }
1235  if (SE->isKnownNegative(Step)) {
1236  *Pred = ICmpInst::ICMP_SGT;
1237  return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
1238  SE->getSignedRange(Step).getSignedMin());
1239  }
1240  return nullptr;
1241 }
1242 
1243 // Get the limit of a recurrence such that incrementing by Step cannot cause
1244 // unsigned overflow as long as the value of the recurrence within the loop does
1245 // not exceed this limit before incrementing.
1246 static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step,
1247  ICmpInst::Predicate *Pred,
1248  ScalarEvolution *SE) {
1249  unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
1250  *Pred = ICmpInst::ICMP_ULT;
1251 
1252  return SE->getConstant(APInt::getMinValue(BitWidth) -
1253  SE->getUnsignedRange(Step).getUnsignedMax());
1254 }
1255 
1256 namespace {
1257 
1258 struct ExtendOpTraitsBase {
1259  typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *);
1260 };
1261 
1262 // Used to make code generic over signed and unsigned overflow.
1263 template <typename ExtendOp> struct ExtendOpTraits {
1264  // Members present:
1265  //
1266  // static const SCEV::NoWrapFlags WrapType;
1267  //
1268  // static const ExtendOpTraitsBase::GetExtendExprTy GetExtendExpr;
1269  //
1270  // static const SCEV *getOverflowLimitForStep(const SCEV *Step,
1271  // ICmpInst::Predicate *Pred,
1272  // ScalarEvolution *SE);
1273 };
1274 
1275 template <>
1276 struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase {
1277  static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW;
1278 
1279  static const GetExtendExprTy GetExtendExpr;
1280 
1281  static const SCEV *getOverflowLimitForStep(const SCEV *Step,
1282  ICmpInst::Predicate *Pred,
1283  ScalarEvolution *SE) {
1284  return getSignedOverflowLimitForStep(Step, Pred, SE);
1285  }
1286 };
1287 
1288 const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
1290 
1291 template <>
1292 struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
1293  static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW;
1294 
1295  static const GetExtendExprTy GetExtendExpr;
1296 
1297  static const SCEV *getOverflowLimitForStep(const SCEV *Step,
1298  ICmpInst::Predicate *Pred,
1299  ScalarEvolution *SE) {
1300  return getUnsignedOverflowLimitForStep(Step, Pred, SE);
1301  }
1302 };
1303 
1304 const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
1306 }
1307 
1308 // The recurrence AR has been shown to have no signed/unsigned wrap or something
1309 // close to it. Typically, if we can prove NSW/NUW for AR, then we can just as
1310 // easily prove NSW/NUW for its preincrement or postincrement sibling. This
1311 // allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step +
1312 // Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the
1313 // expression "Step + sext/zext(PreIncAR)" is congruent with
1314 // "sext/zext(PostIncAR)"
1315 template <typename ExtendOpTy>
1316 static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
1317  ScalarEvolution *SE) {
1318  auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
1319  auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
1320 
1321  const Loop *L = AR->getLoop();
1322  const SCEV *Start = AR->getStart();
1323  const SCEV *Step = AR->getStepRecurrence(*SE);
1324 
1325  // Check for a simple looking step prior to loop entry.
1326  const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
1327  if (!SA)
1328  return nullptr;
1329 
1330  // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
1331  // subtraction is expensive. For this purpose, perform a quick and dirty
1332  // difference, by checking for Step in the operand list.
1334  for (const SCEV *Op : SA->operands())
1335  if (Op != Step)
1336  DiffOps.push_back(Op);
1337 
1338  if (DiffOps.size() == SA->getNumOperands())
1339  return nullptr;
1340 
1341  // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` +
1342  // `Step`:
1343 
1344  // 1. NSW/NUW flags on the step increment.
1345  auto PreStartFlags =
1347  const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags);
1348  const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
1349  SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
1350 
1351  // "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies
1352  // "S+X does not sign/unsign-overflow".
1353  //
1354 
1355  const SCEV *BECount = SE->getBackedgeTakenCount(L);
1356  if (PreAR && PreAR->getNoWrapFlags(WrapType) &&
1357  !isa<SCEVCouldNotCompute>(BECount) && SE->isKnownPositive(BECount))
1358  return PreStart;
1359 
1360  // 2. Direct overflow check on the step operation's expression.
1361  unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
1362  Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
1363  const SCEV *OperandExtendedStart =
1364  SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy),
1365  (SE->*GetExtendExpr)(Step, WideTy));
1366  if ((SE->*GetExtendExpr)(Start, WideTy) == OperandExtendedStart) {
1367  if (PreAR && AR->getNoWrapFlags(WrapType)) {
1368  // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW
1369  // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then
1370  // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`. Cache this fact.
1371  const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(WrapType);
1372  }
1373  return PreStart;
1374  }
1375 
1376  // 3. Loop precondition.
1377  ICmpInst::Predicate Pred;
1378  const SCEV *OverflowLimit =
1379  ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE);
1380 
1381  if (OverflowLimit &&
1382  SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit))
1383  return PreStart;
1384 
1385  return nullptr;
1386 }
1387 
1388 // Get the normalized zero or sign extended expression for this AddRec's Start.
1389 template <typename ExtendOpTy>
1390 static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty,
1391  ScalarEvolution *SE) {
1392  auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
1393 
1394  const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE);
1395  if (!PreStart)
1396  return (SE->*GetExtendExpr)(AR->getStart(), Ty);
1397 
1398  return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty),
1399  (SE->*GetExtendExpr)(PreStart, Ty));
1400 }
1401 
1402 // Try to prove away overflow by looking at "nearby" add recurrences. A
1403 // motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it
1404 // does not itself wrap then we can conclude that `{1,+,4}` is `nuw`.
1405 //
1406 // Formally:
1407 //
1408 // {S,+,X} == {S-T,+,X} + T
1409 // => Ext({S,+,X}) == Ext({S-T,+,X} + T)
1410 //
1411 // If ({S-T,+,X} + T) does not overflow ... (1)
1412 //
1413 // RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T)
1414 //
1415 // If {S-T,+,X} does not overflow ... (2)
1416 //
1417 // RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T)
1418 // == {Ext(S-T)+Ext(T),+,Ext(X)}
1419 //
1420 // If (S-T)+T does not overflow ... (3)
1421 //
1422 // RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)}
1423 // == {Ext(S),+,Ext(X)} == LHS
1424 //
1425 // Thus, if (1), (2) and (3) are true for some T, then
1426 // Ext({S,+,X}) == {Ext(S),+,Ext(X)}
1427 //
1428 // (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T)
1429 // does not overflow" restricted to the 0th iteration. Therefore we only need
1430 // to check for (1) and (2).
1431 //
1432 // In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T
1433 // is `Delta` (defined below).
1434 //
1435 template <typename ExtendOpTy>
1436 bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
1437  const SCEV *Step,
1438  const Loop *L) {
1439  auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
1440 
1441  // We restrict `Start` to a constant to prevent SCEV from spending too much
1442  // time here. It is correct (but more expensive) to continue with a
1443  // non-constant `Start` and do a general SCEV subtraction to compute
1444  // `PreStart` below.
1445  //
1446  const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start);
1447  if (!StartC)
1448  return false;
1449 
1450  APInt StartAI = StartC->getAPInt();
1451 
1452  for (unsigned Delta : {-2, -1, 1, 2}) {
1453  const SCEV *PreStart = getConstant(StartAI - Delta);
1454 
1457  ID.AddPointer(PreStart);
1458  ID.AddPointer(Step);
1459  ID.AddPointer(L);
1460  void *IP = nullptr;
1461  const auto *PreAR =
1462  static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
1463 
1464  // Give up if we don't already have the add recurrence we need because
1465  // actually constructing an add recurrence is relatively expensive.
1466  if (PreAR && PreAR->getNoWrapFlags(WrapType)) { // proves (2)
1467  const SCEV *DeltaS = getConstant(StartC->getType(), Delta);
1469  const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(
1470  DeltaS, &Pred, this);
1471  if (Limit && isKnownPredicate(Pred, PreAR, Limit)) // proves (1)
1472  return true;
1473  }
1474  }
1475 
1476  return false;
1477 }
1478 
1480  Type *Ty) {
1482  "This is not an extending conversion!");
1483  assert(isSCEVable(Ty) &&
1484  "This is not a conversion to a SCEVable type!");
1485  Ty = getEffectiveSCEVType(Ty);
1486 
1487  // Fold if the operand is constant.
1488  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
1489  return getConstant(
1490  cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
1491 
1492  // zext(zext(x)) --> zext(x)
1493  if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
1494  return getZeroExtendExpr(SZ->getOperand(), Ty);
1495 
1496  // Before doing any expensive analysis, check to see if we've already
1497  // computed a SCEV for this Op and Ty.
1500  ID.AddPointer(Op);
1501  ID.AddPointer(Ty);
1502  void *IP = nullptr;
1503  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
1504 
1505  // zext(trunc(x)) --> zext(x) or x or trunc(x)
1506  if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
1507  // It's possible the bits taken off by the truncate were all zero bits. If
1508  // so, we should be able to simplify this further.
1509  const SCEV *X = ST->getOperand();
1511  unsigned TruncBits = getTypeSizeInBits(ST->getType());
1512  unsigned NewBits = getTypeSizeInBits(Ty);
1513  if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
1514  CR.zextOrTrunc(NewBits)))
1515  return getTruncateOrZeroExtend(X, Ty);
1516  }
1517 
1518  // If the input value is a chrec scev, and we can prove that the value
1519  // did not overflow the old, smaller, value, we can zero extend all of the
1520  // operands (often constants). This allows analysis of something like
1521  // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
1522  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
1523  if (AR->isAffine()) {
1524  const SCEV *Start = AR->getStart();
1525  const SCEV *Step = AR->getStepRecurrence(*this);
1526  unsigned BitWidth = getTypeSizeInBits(AR->getType());
1527  const Loop *L = AR->getLoop();
1528 
1529  if (!AR->hasNoUnsignedWrap()) {
1530  auto NewFlags = proveNoWrapViaConstantRanges(AR);
1531  const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(NewFlags);
1532  }
1533 
1534  // If we have special knowledge that this addrec won't overflow,
1535  // we don't need to do any further analysis.
1536  if (AR->hasNoUnsignedWrap())
1537  return getAddRecExpr(
1538  getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
1539  getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
1540 
1541  // Check whether the backedge-taken count is SCEVCouldNotCompute.
1542  // Note that this serves two purposes: It filters out loops that are
1543  // simply not analyzable, and it covers the case where this code is
1544  // being called from within backedge-taken count analysis, such that
1545  // attempting to ask for the backedge-taken count would likely result
1546  // in infinite recursion. In the later case, the analysis code will
1547  // cope with a conservative value, and it will take care to purge
1548  // that value once it has finished.
1549  const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
1550  if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
1551  // Manually compute the final value for AR, checking for
1552  // overflow.
1553 
1554  // Check whether the backedge-taken count can be losslessly casted to
1555  // the addrec's type. The count is always unsigned.
1556  const SCEV *CastedMaxBECount =
1557  getTruncateOrZeroExtend(MaxBECount, Start->getType());
1558  const SCEV *RecastedMaxBECount =
1559  getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
1560  if (MaxBECount == RecastedMaxBECount) {
1561  Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
1562  // Check whether Start+Step*MaxBECount has no unsigned overflow.
1563  const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
1564  const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul), WideTy);
1565  const SCEV *WideStart = getZeroExtendExpr(Start, WideTy);
1566  const SCEV *WideMaxBECount =
1567  getZeroExtendExpr(CastedMaxBECount, WideTy);
1568  const SCEV *OperandExtendedAdd =
1569  getAddExpr(WideStart,
1570  getMulExpr(WideMaxBECount,
1571  getZeroExtendExpr(Step, WideTy)));
1572  if (ZAdd == OperandExtendedAdd) {
1573  // Cache knowledge of AR NUW, which is propagated to this AddRec.
1574  const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
1575  // Return the expression with the addrec on the outside.
1576  return getAddRecExpr(
1577  getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
1578  getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
1579  }
1580  // Similar to above, only this time treat the step value as signed.
1581  // This covers loops that count down.
1582  OperandExtendedAdd =
1583  getAddExpr(WideStart,
1584  getMulExpr(WideMaxBECount,
1585  getSignExtendExpr(Step, WideTy)));
1586  if (ZAdd == OperandExtendedAdd) {
1587  // Cache knowledge of AR NW, which is propagated to this AddRec.
1588  // Negative step causes unsigned wrap, but it still can't self-wrap.
1589  const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
1590  // Return the expression with the addrec on the outside.
1591  return getAddRecExpr(
1592  getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
1593  getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
1594  }
1595  }
1596  }
1597 
1598  // Normally, in the cases we can prove no-overflow via a
1599  // backedge guarding condition, we can also compute a backedge
1600  // taken count for the loop. The exceptions are assumptions and
1601  // guards present in the loop -- SCEV is not great at exploiting
1602  // these to compute max backedge taken counts, but can still use
1603  // these to prove lack of overflow. Use this fact to avoid
1604  // doing extra work that may not pay off.
1605  if (!isa<SCEVCouldNotCompute>(MaxBECount) || HasGuards ||
1606  !AC.assumptions().empty()) {
1607  // If the backedge is guarded by a comparison with the pre-inc
1608  // value the addrec is safe. Also, if the entry is guarded by
1609  // a comparison with the start value and the backedge is
1610  // guarded by a comparison with the post-inc value, the addrec
1611  // is safe.
1612  if (isKnownPositive(Step)) {
1613  const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
1614  getUnsignedRange(Step).getUnsignedMax());
1618  AR->getPostIncExpr(*this), N))) {
1619  // Cache knowledge of AR NUW, which is propagated to this
1620  // AddRec.
1621  const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
1622  // Return the expression with the addrec on the outside.
1623  return getAddRecExpr(
1624  getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
1625  getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
1626  }
1627  } else if (isKnownNegative(Step)) {
1628  const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
1629  getSignedRange(Step).getSignedMin());
1633  AR->getPostIncExpr(*this), N))) {
1634  // Cache knowledge of AR NW, which is propagated to this
1635  // AddRec. Negative step causes unsigned wrap, but it
1636  // still can't self-wrap.
1637  const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
1638  // Return the expression with the addrec on the outside.
1639  return getAddRecExpr(
1640  getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
1641  getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
1642  }
1643  }
1644  }
1645 
1646  if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
1647  const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
1648  return getAddRecExpr(
1649  getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
1650  getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
1651  }
1652  }
1653 
1654  if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
1655  // zext((A + B + ...)<nuw>) --> (zext(A) + zext(B) + ...)<nuw>
1656  if (SA->hasNoUnsignedWrap()) {
1657  // If the addition does not unsign overflow then we can, by definition,
1658  // commute the zero extension with the addition operation.
1660  for (const auto *Op : SA->operands())
1661  Ops.push_back(getZeroExtendExpr(Op, Ty));
1662  return getAddExpr(Ops, SCEV::FlagNUW);
1663  }
1664  }
1665 
1666  // The cast wasn't folded; create an explicit cast node.
1667  // Recompute the insert position, as it may have been invalidated.
1668  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
1669  SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
1670  Op, Ty);
1671  UniqueSCEVs.InsertNode(S, IP);
1672  return S;
1673 }
1674 
1676  Type *Ty) {
1678  "This is not an extending conversion!");
1679  assert(isSCEVable(Ty) &&
1680  "This is not a conversion to a SCEVable type!");
1681  Ty = getEffectiveSCEVType(Ty);
1682 
1683  // Fold if the operand is constant.
1684  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
1685  return getConstant(
1686  cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
1687 
1688  // sext(sext(x)) --> sext(x)
1689  if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
1690  return getSignExtendExpr(SS->getOperand(), Ty);
1691 
1692  // sext(zext(x)) --> zext(x)
1693  if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
1694  return getZeroExtendExpr(SZ->getOperand(), Ty);
1695 
1696  // Before doing any expensive analysis, check to see if we've already
1697  // computed a SCEV for this Op and Ty.
1700  ID.AddPointer(Op);
1701  ID.AddPointer(Ty);
1702  void *IP = nullptr;
1703  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
1704 
1705  // sext(trunc(x)) --> sext(x) or x or trunc(x)
1706  if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
1707  // It's possible the bits taken off by the truncate were all sign bits. If
1708  // so, we should be able to simplify this further.
1709  const SCEV *X = ST->getOperand();
1710  ConstantRange CR = getSignedRange(X);
1711  unsigned TruncBits = getTypeSizeInBits(ST->getType());
1712  unsigned NewBits = getTypeSizeInBits(Ty);
1713  if (CR.truncate(TruncBits).signExtend(NewBits).contains(
1714  CR.sextOrTrunc(NewBits)))
1715  return getTruncateOrSignExtend(X, Ty);
1716  }
1717 
1718  // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2
1719  if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
1720  if (SA->getNumOperands() == 2) {
1721  auto *SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0));
1722  auto *SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1));
1723  if (SMul && SC1) {
1724  if (auto *SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) {
1725  const APInt &C1 = SC1->getAPInt();
1726  const APInt &C2 = SC2->getAPInt();
1727  if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&
1728  C2.ugt(C1) && C2.isPowerOf2())
1729  return getAddExpr(getSignExtendExpr(SC1, Ty),
1730  getSignExtendExpr(SMul, Ty));
1731  }
1732  }
1733  }
1734 
1735  // sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw>
1736  if (SA->hasNoSignedWrap()) {
1737  // If the addition does not sign overflow then we can, by definition,
1738  // commute the sign extension with the addition operation.
1740  for (const auto *Op : SA->operands())
1741  Ops.push_back(getSignExtendExpr(Op, Ty));
1742  return getAddExpr(Ops, SCEV::FlagNSW);
1743  }
1744  }
1745  // If the input value is a chrec scev, and we can prove that the value
1746  // did not overflow the old, smaller, value, we can sign extend all of the
1747  // operands (often constants). This allows analysis of something like
1748  // this: for (signed char X = 0; X < 100; ++X) { int Y = X; }
1749  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
1750  if (AR->isAffine()) {
1751  const SCEV *Start = AR->getStart();
1752  const SCEV *Step = AR->getStepRecurrence(*this);
1753  unsigned BitWidth = getTypeSizeInBits(AR->getType());
1754  const Loop *L = AR->getLoop();
1755 
1756  if (!AR->hasNoSignedWrap()) {
1757  auto NewFlags = proveNoWrapViaConstantRanges(AR);
1758  const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(NewFlags);
1759  }
1760 
1761  // If we have special knowledge that this addrec won't overflow,
1762  // we don't need to do any further analysis.
1763  if (AR->hasNoSignedWrap())
1764  return getAddRecExpr(
1765  getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
1766  getSignExtendExpr(Step, Ty), L, SCEV::FlagNSW);
1767 
1768  // Check whether the backedge-taken count is SCEVCouldNotCompute.
1769  // Note that this serves two purposes: It filters out loops that are
1770  // simply not analyzable, and it covers the case where this code is
1771  // being called from within backedge-taken count analysis, such that
1772  // attempting to ask for the backedge-taken count would likely result
1773  // in infinite recursion. In the later case, the analysis code will
1774  // cope with a conservative value, and it will take care to purge
1775  // that value once it has finished.
1776  const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
1777  if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
1778  // Manually compute the final value for AR, checking for
1779  // overflow.
1780 
1781  // Check whether the backedge-taken count can be losslessly casted to
1782  // the addrec's type. The count is always unsigned.
1783  const SCEV *CastedMaxBECount =
1784  getTruncateOrZeroExtend(MaxBECount, Start->getType());
1785  const SCEV *RecastedMaxBECount =
1786  getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
1787  if (MaxBECount == RecastedMaxBECount) {
1788  Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
1789  // Check whether Start+Step*MaxBECount has no signed overflow.
1790  const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
1791  const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul), WideTy);
1792  const SCEV *WideStart = getSignExtendExpr(Start, WideTy);
1793  const SCEV *WideMaxBECount =
1794  getZeroExtendExpr(CastedMaxBECount, WideTy);
1795  const SCEV *OperandExtendedAdd =
1796  getAddExpr(WideStart,
1797  getMulExpr(WideMaxBECount,
1798  getSignExtendExpr(Step, WideTy)));
1799  if (SAdd == OperandExtendedAdd) {
1800  // Cache knowledge of AR NSW, which is propagated to this AddRec.
1801  const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
1802  // Return the expression with the addrec on the outside.
1803  return getAddRecExpr(
1804  getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
1805  getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
1806  }
1807  // Similar to above, only this time treat the step value as unsigned.
1808  // This covers loops that count up with an unsigned step.
1809  OperandExtendedAdd =
1810  getAddExpr(WideStart,
1811  getMulExpr(WideMaxBECount,
1812  getZeroExtendExpr(Step, WideTy)));
1813  if (SAdd == OperandExtendedAdd) {
1814  // If AR wraps around then
1815  //
1816  // abs(Step) * MaxBECount > unsigned-max(AR->getType())
1817  // => SAdd != OperandExtendedAdd
1818  //
1819  // Thus (AR is not NW => SAdd != OperandExtendedAdd) <=>
1820  // (SAdd == OperandExtendedAdd => AR is NW)
1821 
1822  const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
1823 
1824  // Return the expression with the addrec on the outside.
1825  return getAddRecExpr(
1826  getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
1827  getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
1828  }
1829  }
1830  }
1831 
1832  // Normally, in the cases we can prove no-overflow via a
1833  // backedge guarding condition, we can also compute a backedge
1834  // taken count for the loop. The exceptions are assumptions and
1835  // guards present in the loop -- SCEV is not great at exploiting
1836  // these to compute max backedge taken counts, but can still use
1837  // these to prove lack of overflow. Use this fact to avoid
1838  // doing extra work that may not pay off.
1839 
1840  if (!isa<SCEVCouldNotCompute>(MaxBECount) || HasGuards ||
1841  !AC.assumptions().empty()) {
1842  // If the backedge is guarded by a comparison with the pre-inc
1843  // value the addrec is safe. Also, if the entry is guarded by
1844  // a comparison with the start value and the backedge is
1845  // guarded by a comparison with the post-inc value, the addrec
1846  // is safe.
1847  ICmpInst::Predicate Pred;
1848  const SCEV *OverflowLimit =
1849  getSignedOverflowLimitForStep(Step, &Pred, this);
1850  if (OverflowLimit &&
1851  (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
1852  (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
1853  isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this),
1854  OverflowLimit)))) {
1855  // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
1856  const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
1857  return getAddRecExpr(
1858  getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
1859  getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
1860  }
1861  }
1862 
1863  // If Start and Step are constants, check if we can apply this
1864  // transformation:
1865  // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2
1866  auto *SC1 = dyn_cast<SCEVConstant>(Start);
1867  auto *SC2 = dyn_cast<SCEVConstant>(Step);
1868  if (SC1 && SC2) {
1869  const APInt &C1 = SC1->getAPInt();
1870  const APInt &C2 = SC2->getAPInt();
1871  if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) &&
1872  C2.isPowerOf2()) {
1873  Start = getSignExtendExpr(Start, Ty);
1874  const SCEV *NewAR = getAddRecExpr(getZero(AR->getType()), Step, L,
1875  AR->getNoWrapFlags());
1876  return getAddExpr(Start, getSignExtendExpr(NewAR, Ty));
1877  }
1878  }
1879 
1880  if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
1881  const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
1882  return getAddRecExpr(
1883  getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
1884  getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
1885  }
1886  }
1887 
1888  // If the input value is provably positive and we could not simplify
1889  // away the sext build a zext instead.
1890  if (isKnownNonNegative(Op))
1891  return getZeroExtendExpr(Op, Ty);
1892 
1893  // The cast wasn't folded; create an explicit cast node.
1894  // Recompute the insert position, as it may have been invalidated.
1895  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
1896  SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
1897  Op, Ty);
1898  UniqueSCEVs.InsertNode(S, IP);
1899  return S;
1900 }
1901 
1902 /// getAnyExtendExpr - Return a SCEV for the given operand extended with
1903 /// unspecified bits out to the given type.
1904 ///
1906  Type *Ty) {
1908  "This is not an extending conversion!");
1909  assert(isSCEVable(Ty) &&
1910  "This is not a conversion to a SCEVable type!");
1911  Ty = getEffectiveSCEVType(Ty);
1912 
1913  // Sign-extend negative constants.
1914  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
1915  if (SC->getAPInt().isNegative())
1916  return getSignExtendExpr(Op, Ty);
1917 
1918  // Peel off a truncate cast.
1919  if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
1920  const SCEV *NewOp = T->getOperand();
1921  if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
1922  return getAnyExtendExpr(NewOp, Ty);
1923  return getTruncateOrNoop(NewOp, Ty);
1924  }
1925 
1926  // Next try a zext cast. If the cast is folded, use it.
1927  const SCEV *ZExt = getZeroExtendExpr(Op, Ty);
1928  if (!isa<SCEVZeroExtendExpr>(ZExt))
1929  return ZExt;
1930 
1931  // Next try a sext cast. If the cast is folded, use it.
1932  const SCEV *SExt = getSignExtendExpr(Op, Ty);
1933  if (!isa<SCEVSignExtendExpr>(SExt))
1934  return SExt;
1935 
1936  // Force the cast to be folded into the operands of an addrec.
1937  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
1939  for (const SCEV *Op : AR->operands())
1940  Ops.push_back(getAnyExtendExpr(Op, Ty));
1941  return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
1942  }
1943 
1944  // If the expression is obviously signed, use the sext cast value.
1945  if (isa<SCEVSMaxExpr>(Op))
1946  return SExt;
1947 
1948  // Absent any other information, use the zext cast value.
1949  return ZExt;
1950 }
1951 
1952 /// Process the given Ops list, which is a list of operands to be added under
1953 /// the given scale, update the given map. This is a helper function for
1954 /// getAddRecExpr. As an example of what it does, given a sequence of operands
1955 /// that would form an add expression like this:
1956 ///
1957 /// m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r)
1958 ///
1959 /// where A and B are constants, update the map with these values:
1960 ///
1961 /// (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0)
1962 ///
1963 /// and add 13 + A*B*29 to AccumulatedConstant.
1964 /// This will allow getAddRecExpr to produce this:
1965 ///
1966 /// 13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B)
1967 ///
1968 /// This form often exposes folding opportunities that are hidden in
1969 /// the original operand list.
1970 ///
1971 /// Return true iff it appears that any interesting folding opportunities
1972 /// may be exposed. This helps getAddRecExpr short-circuit extra work in
1973 /// the common case where no interesting opportunities are present, and
1974 /// is also used as a check to avoid infinite recursion.
1975 ///
1976 static bool
1979  APInt &AccumulatedConstant,
1980  const SCEV *const *Ops, size_t NumOperands,
1981  const APInt &Scale,
1982  ScalarEvolution &SE) {
1983  bool Interesting = false;
1984 
1985  // Iterate over the add operands. They are sorted, with constants first.
1986  unsigned i = 0;
1987  while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
1988  ++i;
1989  // Pull a buried constant out to the outside.
1990  if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
1991  Interesting = true;
1992  AccumulatedConstant += Scale * C->getAPInt();
1993  }
1994 
1995  // Next comes everything else. We're especially interested in multiplies
1996  // here, but they're in the middle, so just visit the rest with one loop.
1997  for (; i != NumOperands; ++i) {
1998  const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
1999  if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
2000  APInt NewScale =
2001  Scale * cast<SCEVConstant>(Mul->getOperand(0))->getAPInt();
2002  if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
2003  // A multiplication of a constant with another add; recurse.
2004  const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
2005  Interesting |=
2006  CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
2007  Add->op_begin(), Add->getNumOperands(),
2008  NewScale, SE);
2009  } else {
2010  // A multiplication of a constant with some other value. Update
2011  // the map.
2012  SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
2013  const SCEV *Key = SE.getMulExpr(MulOps);
2014  auto Pair = M.insert({Key, NewScale});
2015  if (Pair.second) {
2016  NewOps.push_back(Pair.first->first);
2017  } else {
2018  Pair.first->second += NewScale;
2019  // The map already had an entry for this value, which may indicate
2020  // a folding opportunity.
2021  Interesting = true;
2022  }
2023  }
2024  } else {
2025  // An ordinary operand. Update the map.
2026  std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
2027  M.insert({Ops[i], Scale});
2028  if (Pair.second) {
2029  NewOps.push_back(Pair.first->first);
2030  } else {
2031  Pair.first->second += Scale;
2032  // The map already had an entry for this value, which may indicate
2033  // a folding opportunity.
2034  Interesting = true;
2035  }
2036  }
2037  }
2038 
2039  return Interesting;
2040 }
2041 
2042 // We're trying to construct a SCEV of type `Type' with `Ops' as operands and
2043 // `OldFlags' as can't-wrap behavior. Infer a more aggressive set of
2044 // can't-overflow flags for the operation if possible.
2045 static SCEV::NoWrapFlags
2047  const SmallVectorImpl<const SCEV *> &Ops,
2049  using namespace std::placeholders;
2050  typedef OverflowingBinaryOperator OBO;
2051 
2052  bool CanAnalyze =
2053  Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr;
2054  (void)CanAnalyze;
2055  assert(CanAnalyze && "don't call from other places!");
2056 
2057  int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
2058  SCEV::NoWrapFlags SignOrUnsignWrap =
2059  ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);
2060 
2061  // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
2062  auto IsKnownNonNegative = [&](const SCEV *S) {
2063  return SE->isKnownNonNegative(S);
2064  };
2065 
2066  if (SignOrUnsignWrap == SCEV::FlagNSW && all_of(Ops, IsKnownNonNegative))
2067  Flags =
2068  ScalarEvolution::setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
2069 
2070  SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);
2071 
2072  if (SignOrUnsignWrap != SignOrUnsignMask && Type == scAddExpr &&
2073  Ops.size() == 2 && isa<SCEVConstant>(Ops[0])) {
2074 
2075  // (A + C) --> (A + C)<nsw> if the addition does not sign overflow
2076  // (A + C) --> (A + C)<nuw> if the addition does not unsign overflow
2077 
2078  const APInt &C = cast<SCEVConstant>(Ops[0])->getAPInt();
2079  if (!(SignOrUnsignWrap & SCEV::FlagNSW)) {
2081  Instruction::Add, C, OBO::NoSignedWrap);
2082  if (NSWRegion.contains(SE->getSignedRange(Ops[1])))
2083  Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
2084  }
2085  if (!(SignOrUnsignWrap & SCEV::FlagNUW)) {
2087  Instruction::Add, C, OBO::NoUnsignedWrap);
2088  if (NUWRegion.contains(SE->getUnsignedRange(Ops[1])))
2089  Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
2090  }
2091  }
2092 
2093  return Flags;
2094 }
2095 
2096 /// Get a canonical add expression, or something simpler if possible.
2099  assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
2100  "only nuw or nsw allowed");
2101  assert(!Ops.empty() && "Cannot get empty add!");
2102  if (Ops.size() == 1) return Ops[0];
2103 #ifndef NDEBUG
2104  Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
2105  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
2106  assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
2107  "SCEVAddExpr operand types don't match!");
2108 #endif
2109 
2110  // Sort by complexity, this groups all similar expression types together.
2111  GroupByComplexity(Ops, &LI);
2112 
2113  Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);
2114 
2115  // If there are any constants, fold them together.
2116  unsigned Idx = 0;
2117  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
2118  ++Idx;
2119  assert(Idx < Ops.size());
2120  while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
2121  // We found two constants, fold them together!
2122  Ops[0] = getConstant(LHSC->getAPInt() + RHSC->getAPInt());
2123  if (Ops.size() == 2) return Ops[0];
2124  Ops.erase(Ops.begin()+1); // Erase the folded element
2125  LHSC = cast<SCEVConstant>(Ops[0]);
2126  }
2127 
2128  // If we are left with a constant zero being added, strip it off.
2129  if (LHSC->getValue()->isZero()) {
2130  Ops.erase(Ops.begin());
2131  --Idx;
2132  }
2133 
2134  if (Ops.size() == 1) return Ops[0];
2135  }
2136 
2137  // Okay, check to see if the same value occurs in the operand list more than
2138  // once. If so, merge them together into an multiply expression. Since we
2139  // sorted the list, these values are required to be adjacent.
2140  Type *Ty = Ops[0]->getType();
2141  bool FoundMatch = false;
2142  for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
2143  if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2
2144  // Scan ahead to count how many equal operands there are.
2145  unsigned Count = 2;
2146  while (i+Count != e && Ops[i+Count] == Ops[i])
2147  ++Count;
2148  // Merge the values into a multiply.
2149  const SCEV *Scale = getConstant(Ty, Count);
2150  const SCEV *Mul = getMulExpr(Scale, Ops[i]);
2151  if (Ops.size() == Count)
2152  return Mul;
2153  Ops[i] = Mul;
2154  Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
2155  --i; e -= Count - 1;
2156  FoundMatch = true;
2157  }
2158  if (FoundMatch)
2159  return getAddExpr(Ops, Flags);
2160 
2161  // Check for truncates. If all the operands are truncated from the same
2162  // type, see if factoring out the truncate would permit the result to be
2163  // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n)
2164  // if the contents of the resulting outer trunc fold to something simple.
2165  for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) {
2166  const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
2167  Type *DstType = Trunc->getType();
2168  Type *SrcType = Trunc->getOperand()->getType();
2170  bool Ok = true;
2171  // Check all the operands to see if they can be represented in the
2172  // source type of the truncate.
2173  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
2174  if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) {
2175  if (T->getOperand()->getType() != SrcType) {
2176  Ok = false;
2177  break;
2178  }
2179  LargeOps.push_back(T->getOperand());
2180  } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
2181  LargeOps.push_back(getAnyExtendExpr(C, SrcType));
2182  } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
2183  SmallVector<const SCEV *, 8> LargeMulOps;
2184  for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
2185  if (const SCEVTruncateExpr *T =
2186  dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
2187  if (T->getOperand()->getType() != SrcType) {
2188  Ok = false;
2189  break;
2190  }
2191  LargeMulOps.push_back(T->getOperand());
2192  } else if (const auto *C = dyn_cast<SCEVConstant>(M->getOperand(j))) {
2193  LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
2194  } else {
2195  Ok = false;
2196  break;
2197  }
2198  }
2199  if (Ok)
2200  LargeOps.push_back(getMulExpr(LargeMulOps));
2201  } else {
2202  Ok = false;
2203  break;
2204  }
2205  }
2206  if (Ok) {
2207  // Evaluate the expression in the larger type.
2208  const SCEV *Fold = getAddExpr(LargeOps, Flags);
2209  // If it folds to something simple, use it. Otherwise, don't.
2210  if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
2211  return getTruncateExpr(Fold, DstType);
2212  }
2213  }
2214 
2215  // Skip past any other cast SCEVs.
2216  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
2217  ++Idx;
2218 
2219  // If there are add operands they would be next.
2220  if (Idx < Ops.size()) {
2221  bool DeletedAdd = false;
2222  while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
2223  // If we have an add, expand the add operands onto the end of the operands
2224  // list.
2225  Ops.erase(Ops.begin()+Idx);
2226  Ops.append(Add->op_begin(), Add->op_end());
2227  DeletedAdd = true;
2228  }
2229 
2230  // If we deleted at least one add, we added operands to the end of the list,
2231  // and they are not necessarily sorted. Recurse to resort and resimplify
2232  // any operands we just acquired.
2233  if (DeletedAdd)
2234  return getAddExpr(Ops);
2235  }
2236 
2237  // Skip over the add expression until we get to a multiply.
2238  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
2239  ++Idx;
2240 
2241  // Check to see if there are any folding opportunities present with
2242  // operands multiplied by constant values.
2243  if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
2244  uint64_t BitWidth = getTypeSizeInBits(Ty);
2247  APInt AccumulatedConstant(BitWidth, 0);
2248  if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
2249  Ops.data(), Ops.size(),
2250  APInt(BitWidth, 1), *this)) {
2251  struct APIntCompare {
2252  bool operator()(const APInt &LHS, const APInt &RHS) const {
2253  return LHS.ult(RHS);
2254  }
2255  };
2256 
2257  // Some interesting folding opportunity is present, so its worthwhile to
2258  // re-generate the operands list. Group the operands by constant scale,
2259  // to avoid multiplying by the same constant scale multiple times.
2260  std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
2261  for (const SCEV *NewOp : NewOps)
2262  MulOpLists[M.find(NewOp)->second].push_back(NewOp);
2263  // Re-generate the operands list.
2264  Ops.clear();
2265  if (AccumulatedConstant != 0)
2266  Ops.push_back(getConstant(AccumulatedConstant));
2267  for (auto &MulOp : MulOpLists)
2268  if (MulOp.first != 0)
2269  Ops.push_back(getMulExpr(getConstant(MulOp.first),
2270  getAddExpr(MulOp.second)));
2271  if (Ops.empty())
2272  return getZero(Ty);
2273  if (Ops.size() == 1)
2274  return Ops[0];
2275  return getAddExpr(Ops);
2276  }
2277  }
2278 
2279  // If we are adding something to a multiply expression, make sure the
2280  // something is not already an operand of the multiply. If so, merge it into
2281  // the multiply.
2282  for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) {
2283  const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
2284  for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
2285  const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
2286  if (isa<SCEVConstant>(MulOpSCEV))
2287  continue;
2288  for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
2289  if (MulOpSCEV == Ops[AddOp]) {
2290  // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1))
2291  const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
2292  if (Mul->getNumOperands() != 2) {
2293  // If the multiply has more than two operands, we must get the
2294  // Y*Z term.
2295  SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
2296  Mul->op_begin()+MulOp);
2297  MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
2298  InnerMul = getMulExpr(MulOps);
2299  }
2300  const SCEV *One = getOne(Ty);
2301  const SCEV *AddOne = getAddExpr(One, InnerMul);
2302  const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
2303  if (Ops.size() == 2) return OuterMul;
2304  if (AddOp < Idx) {
2305  Ops.erase(Ops.begin()+AddOp);
2306  Ops.erase(Ops.begin()+Idx-1);
2307  } else {
2308  Ops.erase(Ops.begin()+Idx);
2309  Ops.erase(Ops.begin()+AddOp-1);
2310  }
2311  Ops.push_back(OuterMul);
2312  return getAddExpr(Ops);
2313  }
2314 
2315  // Check this multiply against other multiplies being added together.
2316  for (unsigned OtherMulIdx = Idx+1;
2317  OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]);
2318  ++OtherMulIdx) {
2319  const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
2320  // If MulOp occurs in OtherMul, we can fold the two multiplies
2321  // together.
2322  for (unsigned OMulOp = 0, e = OtherMul->getNumOperands();
2323  OMulOp != e; ++OMulOp)
2324  if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
2325  // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
2326  const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
2327  if (Mul->getNumOperands() != 2) {
2328  SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
2329  Mul->op_begin()+MulOp);
2330  MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
2331  InnerMul1 = getMulExpr(MulOps);
2332  }
2333  const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
2334  if (OtherMul->getNumOperands() != 2) {
2335  SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
2336  OtherMul->op_begin()+OMulOp);
2337  MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
2338  InnerMul2 = getMulExpr(MulOps);
2339  }
2340  const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
2341  const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
2342  if (Ops.size() == 2) return OuterMul;
2343  Ops.erase(Ops.begin()+Idx);
2344  Ops.erase(Ops.begin()+OtherMulIdx-1);
2345  Ops.push_back(OuterMul);
2346  return getAddExpr(Ops);
2347  }
2348  }
2349  }
2350  }
2351 
2352  // If there are any add recurrences in the operands list, see if any other
2353  // added values are loop invariant. If so, we can fold them into the
2354  // recurrence.
2355  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
2356  ++Idx;
2357 
2358  // Scan over all recurrences, trying to fold loop invariants into them.
2359  for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
2360  // Scan all of the other operands to this add and add them to the vector if
2361  // they are loop invariant w.r.t. the recurrence.
2363  const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
2364  const Loop *AddRecLoop = AddRec->getLoop();
2365  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2366  if (isLoopInvariant(Ops[i], AddRecLoop)) {
2367  LIOps.push_back(Ops[i]);
2368  Ops.erase(Ops.begin()+i);
2369  --i; --e;
2370  }
2371 
2372  // If we found some loop invariants, fold them into the recurrence.
2373  if (!LIOps.empty()) {
2374  // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step}
2375  LIOps.push_back(AddRec->getStart());
2376 
2377  SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
2378  AddRec->op_end());
2379  // This follows from the fact that the no-wrap flags on the outer add
2380  // expression are applicable on the 0th iteration, when the add recurrence
2381  // will be equal to its start value.
2382  AddRecOps[0] = getAddExpr(LIOps, Flags);
2383 
2384  // Build the new addrec. Propagate the NUW and NSW flags if both the
2385  // outer add and the inner addrec are guaranteed to have no overflow.
2386  // Always propagate NW.
2387  Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW));
2388  const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags);
2389 
2390  // If all of the other operands were loop invariant, we are done.
2391  if (Ops.size() == 1) return NewRec;
2392 
2393  // Otherwise, add the folded AddRec by the non-invariant parts.
2394  for (unsigned i = 0;; ++i)
2395  if (Ops[i] == AddRec) {
2396  Ops[i] = NewRec;
2397  break;
2398  }
2399  return getAddExpr(Ops);
2400  }
2401 
2402  // Okay, if there weren't any loop invariants to be folded, check to see if
2403  // there are multiple AddRec's with the same loop induction variable being
2404  // added together. If so, we can fold them.
2405  for (unsigned OtherIdx = Idx+1;
2406  OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
2407  ++OtherIdx)
2408  if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
2409  // Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L>
2410  SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
2411  AddRec->op_end());
2412  for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
2413  ++OtherIdx)
2414  if (const auto *OtherAddRec = dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
2415  if (OtherAddRec->getLoop() == AddRecLoop) {
2416  for (unsigned i = 0, e = OtherAddRec->getNumOperands();
2417  i != e; ++i) {
2418  if (i >= AddRecOps.size()) {
2419  AddRecOps.append(OtherAddRec->op_begin()+i,
2420  OtherAddRec->op_end());
2421  break;
2422  }
2423  AddRecOps[i] = getAddExpr(AddRecOps[i],
2424  OtherAddRec->getOperand(i));
2425  }
2426  Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
2427  }
2428  // Step size has changed, so we cannot guarantee no self-wraparound.
2429  Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
2430  return getAddExpr(Ops);
2431  }
2432 
2433  // Otherwise couldn't fold anything into this recurrence. Move onto the
2434  // next one.
2435  }
2436 
2437  // Okay, it looks like we really DO need an add expr. Check to see if we
2438  // already have one, otherwise create a new one.
2440  ID.AddInteger(scAddExpr);
2441  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2442  ID.AddPointer(Ops[i]);
2443  void *IP = nullptr;
2444  SCEVAddExpr *S =
2445  static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
2446  if (!S) {
2447  const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
2448  std::uninitialized_copy(Ops.begin(), Ops.end(), O);
2449  S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator),
2450  O, Ops.size());
2451  UniqueSCEVs.InsertNode(S, IP);
2452  }
2453  S->setNoWrapFlags(Flags);
2454  return S;
2455 }
2456 
2457 static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) {
2458  uint64_t k = i*j;
2459  if (j > 1 && k / j != i) Overflow = true;
2460  return k;
2461 }
2462 
2463 /// Compute the result of "n choose k", the binomial coefficient. If an
2464 /// intermediate computation overflows, Overflow will be set and the return will
2465 /// be garbage. Overflow is not cleared on absence of overflow.
2466 static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
2467  // We use the multiplicative formula:
2468  // n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 .
2469  // At each iteration, we take the n-th term of the numeral and divide by the
2470  // (k-n)th term of the denominator. This division will always produce an
2471  // integral result, and helps reduce the chance of overflow in the
2472  // intermediate computations. However, we can still overflow even when the
2473  // final result would fit.
2474 
2475  if (n == 0 || n == k) return 1;
2476  if (k > n) return 0;
2477 
2478  if (k > n/2)
2479  k = n-k;
2480 
2481  uint64_t r = 1;
2482  for (uint64_t i = 1; i <= k; ++i) {
2483  r = umul_ov(r, n-(i-1), Overflow);
2484  r /= i;
2485  }
2486  return r;
2487 }
2488 
2489 /// Determine if any of the operands in this SCEV are a constant or if
2490 /// any of the add or multiply expressions in this SCEV contain a constant.
2491 static bool containsConstantSomewhere(const SCEV *StartExpr) {
2493  Ops.push_back(StartExpr);
2494  while (!Ops.empty()) {
2495  const SCEV *CurrentExpr = Ops.pop_back_val();
2496  if (isa<SCEVConstant>(*CurrentExpr))
2497  return true;
2498 
2499  if (isa<SCEVAddExpr>(*CurrentExpr) || isa<SCEVMulExpr>(*CurrentExpr)) {
2500  const auto *CurrentNAry = cast<SCEVNAryExpr>(CurrentExpr);
2501  Ops.append(CurrentNAry->op_begin(), CurrentNAry->op_end());
2502  }
2503  }
2504  return false;
2505 }
2506 
2507 /// Get a canonical multiply expression, or something simpler if possible.
2510  assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) &&
2511  "only nuw or nsw allowed");
2512  assert(!Ops.empty() && "Cannot get empty mul!");
2513  if (Ops.size() == 1) return Ops[0];
2514 #ifndef NDEBUG
2515  Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
2516  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
2517  assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
2518  "SCEVMulExpr operand types don't match!");
2519 #endif
2520 
2521  // Sort by complexity, this groups all similar expression types together.
2522  GroupByComplexity(Ops, &LI);
2523 
2524  Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
2525 
2526  // If there are any constants, fold them together.
2527  unsigned Idx = 0;
2528  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
2529 
2530  // C1*(C2+V) -> C1*C2 + C1*V
2531  if (Ops.size() == 2)
2532  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
2533  // If any of Add's ops are Adds or Muls with a constant,
2534  // apply this transformation as well.
2535  if (Add->getNumOperands() == 2)
2537  return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)),
2538  getMulExpr(LHSC, Add->getOperand(1)));
2539 
2540  ++Idx;
2541  while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
2542  // We found two constants, fold them together!
2543  ConstantInt *Fold =
2544  ConstantInt::get(getContext(), LHSC->getAPInt() * RHSC->getAPInt());
2545  Ops[0] = getConstant(Fold);
2546  Ops.erase(Ops.begin()+1); // Erase the folded element
2547  if (Ops.size() == 1) return Ops[0];
2548  LHSC = cast<SCEVConstant>(Ops[0]);
2549  }
2550 
2551  // If we are left with a constant one being multiplied, strip it off.
2552  if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) {
2553  Ops.erase(Ops.begin());
2554  --Idx;
2555  } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
2556  // If we have a multiply of zero, it will always be zero.
2557  return Ops[0];
2558  } else if (Ops[0]->isAllOnesValue()) {
2559  // If we have a mul by -1 of an add, try distributing the -1 among the
2560  // add operands.
2561  if (Ops.size() == 2) {
2562  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
2564  bool AnyFolded = false;
2565  for (const SCEV *AddOp : Add->operands()) {
2566  const SCEV *Mul = getMulExpr(Ops[0], AddOp);
2567  if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
2568  NewOps.push_back(Mul);
2569  }
2570  if (AnyFolded)
2571  return getAddExpr(NewOps);
2572  } else if (const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
2573  // Negation preserves a recurrence's no self-wrap property.
2575  for (const SCEV *AddRecOp : AddRec->operands())
2576  Operands.push_back(getMulExpr(Ops[0], AddRecOp));
2577 
2578  return getAddRecExpr(Operands, AddRec->getLoop(),
2579  AddRec->getNoWrapFlags(SCEV::FlagNW));
2580  }
2581  }
2582  }
2583 
2584  if (Ops.size() == 1)
2585  return Ops[0];
2586  }
2587 
2588  // Skip over the add expression until we get to a multiply.
2589  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
2590  ++Idx;
2591 
2592  // If there are mul operands inline them all into this expression.
2593  if (Idx < Ops.size()) {
2594  bool DeletedMul = false;
2595  while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
2596  if (Ops.size() > MulOpsInlineThreshold)
2597  break;
2598  // If we have an mul, expand the mul operands onto the end of the operands
2599  // list.
2600  Ops.erase(Ops.begin()+Idx);
2601  Ops.append(Mul->op_begin(), Mul->op_end());
2602  DeletedMul = true;
2603  }
2604 
2605  // If we deleted at least one mul, we added operands to the end of the list,
2606  // and they are not necessarily sorted. Recurse to resort and resimplify
2607  // any operands we just acquired.
2608  if (DeletedMul)
2609  return getMulExpr(Ops);
2610  }
2611 
2612  // If there are any add recurrences in the operands list, see if any other
2613  // added values are loop invariant. If so, we can fold them into the
2614  // recurrence.
2615  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
2616  ++Idx;
2617 
2618  // Scan over all recurrences, trying to fold loop invariants into them.
2619  for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
2620  // Scan all of the other operands to this mul and add them to the vector if
2621  // they are loop invariant w.r.t. the recurrence.
2623  const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
2624  const Loop *AddRecLoop = AddRec->getLoop();
2625  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2626  if (isLoopInvariant(Ops[i], AddRecLoop)) {
2627  LIOps.push_back(Ops[i]);
2628  Ops.erase(Ops.begin()+i);
2629  --i; --e;
2630  }
2631 
2632  // If we found some loop invariants, fold them into the recurrence.
2633  if (!LIOps.empty()) {
2634  // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step}
2636  NewOps.reserve(AddRec->getNumOperands());
2637  const SCEV *Scale = getMulExpr(LIOps);
2638  for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
2639  NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
2640 
2641  // Build the new addrec. Propagate the NUW and NSW flags if both the
2642  // outer mul and the inner addrec are guaranteed to have no overflow.
2643  //
2644  // No self-wrap cannot be guaranteed after changing the step size, but
2645  // will be inferred if either NUW or NSW is true.
2646  Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW));
2647  const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags);
2648 
2649  // If all of the other operands were loop invariant, we are done.
2650  if (Ops.size() == 1) return NewRec;
2651 
2652  // Otherwise, multiply the folded AddRec by the non-invariant parts.
2653  for (unsigned i = 0;; ++i)
2654  if (Ops[i] == AddRec) {
2655  Ops[i] = NewRec;
2656  break;
2657  }
2658  return getMulExpr(Ops);
2659  }
2660 
2661  // Okay, if there weren't any loop invariants to be folded, check to see if
2662  // there are multiple AddRec's with the same loop induction variable being
2663  // multiplied together. If so, we can fold them.
2664 
2665  // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
2666  // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
2667  // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
2668  // ]]],+,...up to x=2n}.
2669  // Note that the arguments to choose() are always integers with values
2670  // known at compile time, never SCEV objects.
2671  //
2672  // The implementation avoids pointless extra computations when the two
2673  // addrec's are of different length (mathematically, it's equivalent to
2674  // an infinite stream of zeros on the right).
2675  bool OpsModified = false;
2676  for (unsigned OtherIdx = Idx+1;
2677  OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
2678  ++OtherIdx) {
2679  const SCEVAddRecExpr *OtherAddRec =
2680  dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
2681  if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
2682  continue;
2683 
2684  bool Overflow = false;
2685  Type *Ty = AddRec->getType();
2686  bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
2687  SmallVector<const SCEV*, 7> AddRecOps;
2688  for (int x = 0, xe = AddRec->getNumOperands() +
2689  OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
2690  const SCEV *Term = getZero(Ty);
2691  for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
2692  uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
2693  for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
2694  ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
2695  z < ze && !Overflow; ++z) {
2696  uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
2697  uint64_t Coeff;
2698  if (LargerThan64Bits)
2699  Coeff = umul_ov(Coeff1, Coeff2, Overflow);
2700  else
2701  Coeff = Coeff1*Coeff2;
2702  const SCEV *CoeffTerm = getConstant(Ty, Coeff);
2703  const SCEV *Term1 = AddRec->getOperand(y-z);
2704  const SCEV *Term2 = OtherAddRec->getOperand(z);
2705  Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
2706  }
2707  }
2708  AddRecOps.push_back(Term);
2709  }
2710  if (!Overflow) {
2711  const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
2713  if (Ops.size() == 2) return NewAddRec;
2714  Ops[Idx] = NewAddRec;
2715  Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
2716  OpsModified = true;
2717  AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
2718  if (!AddRec)
2719  break;
2720  }
2721  }
2722  if (OpsModified)
2723  return getMulExpr(Ops);
2724 
2725  // Otherwise couldn't fold anything into this recurrence. Move onto the
2726  // next one.
2727  }
2728 
2729  // Okay, it looks like we really DO need an mul expr. Check to see if we
2730  // already have one, otherwise create a new one.
2732  ID.AddInteger(scMulExpr);
2733  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2734  ID.AddPointer(Ops[i]);
2735  void *IP = nullptr;
2736  SCEVMulExpr *S =
2737  static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
2738  if (!S) {
2739  const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
2740  std::uninitialized_copy(Ops.begin(), Ops.end(), O);
2741  S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
2742  O, Ops.size());
2743  UniqueSCEVs.InsertNode(S, IP);
2744  }
2745  S->setNoWrapFlags(Flags);
2746  return S;
2747 }
2748 
2749 /// Get a canonical unsigned division expression, or something simpler if
2750 /// possible.
2752  const SCEV *RHS) {
2754  getEffectiveSCEVType(RHS->getType()) &&
2755  "SCEVUDivExpr operand types don't match!");
2756 
2757  if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
2758  if (RHSC->getValue()->equalsInt(1))
2759  return LHS; // X udiv 1 --> x
2760  // If the denominator is zero, the result of the udiv is undefined. Don't
2761  // try to analyze it, because the resolution chosen here may differ from
2762  // the resolution chosen in other parts of the compiler.
2763  if (!RHSC->getValue()->isZero()) {
2764  // Determine if the division can be folded into the operands of
2765  // its operands.
2766  // TODO: Generalize this to non-constants by using known-bits information.
2767  Type *Ty = LHS->getType();
2768  unsigned LZ = RHSC->getAPInt().countLeadingZeros();
2769  unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
2770  // For non-power-of-two values, effectively round the value up to the
2771  // nearest power of two.
2772  if (!RHSC->getAPInt().isPowerOf2())
2773  ++MaxShiftAmt;
2774  IntegerType *ExtTy =
2775  IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
2776  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
2777  if (const SCEVConstant *Step =
2778  dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {
2779  // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
2780  const APInt &StepInt = Step->getAPInt();
2781  const APInt &DivInt = RHSC->getAPInt();
2782  if (!StepInt.urem(DivInt) &&
2783  getZeroExtendExpr(AR, ExtTy) ==
2784  getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
2785  getZeroExtendExpr(Step, ExtTy),
2786  AR->getLoop(), SCEV::FlagAnyWrap)) {
2788  for (const SCEV *Op : AR->operands())
2789  Operands.push_back(getUDivExpr(Op, RHS));
2790  return getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagNW);
2791  }
2792  /// Get a canonical UDivExpr for a recurrence.
2793  /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
2794  // We can currently only fold X%N if X is constant.
2795  const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
2796  if (StartC && !DivInt.urem(StepInt) &&
2797  getZeroExtendExpr(AR, ExtTy) ==
2798  getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
2799  getZeroExtendExpr(Step, ExtTy),
2800  AR->getLoop(), SCEV::FlagAnyWrap)) {
2801  const APInt &StartInt = StartC->getAPInt();
2802  const APInt &StartRem = StartInt.urem(StepInt);
2803  if (StartRem != 0)
2804  LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step,
2805  AR->getLoop(), SCEV::FlagNW);
2806  }
2807  }
2808  // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
2809  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
2811  for (const SCEV *Op : M->operands())
2812  Operands.push_back(getZeroExtendExpr(Op, ExtTy));
2813  if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
2814  // Find an operand that's safely divisible.
2815  for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
2816  const SCEV *Op = M->getOperand(i);
2817  const SCEV *Div = getUDivExpr(Op, RHSC);
2818  if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
2819  Operands = SmallVector<const SCEV *, 4>(M->op_begin(),
2820  M->op_end());
2821  Operands[i] = Div;
2822  return getMulExpr(Operands);
2823  }
2824  }
2825  }
2826  // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
2827  if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
2829  for (const SCEV *Op : A->operands())
2830  Operands.push_back(getZeroExtendExpr(Op, ExtTy));
2831  if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
2832  Operands.clear();
2833  for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
2834  const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
2835  if (isa<SCEVUDivExpr>(Op) ||
2836  getMulExpr(Op, RHS) != A->getOperand(i))
2837  break;
2838  Operands.push_back(Op);
2839  }
2840  if (Operands.size() == A->getNumOperands())
2841  return getAddExpr(Operands);
2842  }
2843  }
2844 
2845  // Fold if both operands are constant.
2846  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
2847  Constant *LHSCV = LHSC->getValue();
2848  Constant *RHSCV = RHSC->getValue();
2849  return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
2850  RHSCV)));
2851  }
2852  }
2853  }
2854 
2856  ID.AddInteger(scUDivExpr);
2857  ID.AddPointer(LHS);
2858  ID.AddPointer(RHS);
2859  void *IP = nullptr;
2860  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
2861  SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
2862  LHS, RHS);
2863  UniqueSCEVs.InsertNode(S, IP);
2864  return S;
2865 }
2866 
2867 static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
2868  APInt A = C1->getAPInt().abs();
2869  APInt B = C2->getAPInt().abs();
2870  uint32_t ABW = A.getBitWidth();
2871  uint32_t BBW = B.getBitWidth();
2872 
2873  if (ABW > BBW)
2874  B = B.zext(ABW);
2875  else if (ABW < BBW)
2876  A = A.zext(BBW);
2877 
2878  return APIntOps::GreatestCommonDivisor(A, B);
2879 }
2880 
2881 /// Get a canonical unsigned division expression, or something simpler if
2882 /// possible. There is no representation for an exact udiv in SCEV IR, but we
2883 /// can attempt to remove factors from the LHS and RHS. We can't do this when
2884 /// it's not exact because the udiv may be clearing bits.
2886  const SCEV *RHS) {
2887  // TODO: we could try to find factors in all sorts of things, but for now we
2888  // just deal with u/exact (multiply, constant). See SCEVDivision towards the
2889  // end of this file for inspiration.
2890 
2891  const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS);
2892  if (!Mul)
2893  return getUDivExpr(LHS, RHS);
2894 
2895  if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) {
2896  // If the mulexpr multiplies by a constant, then that constant must be the
2897  // first element of the mulexpr.
2898  if (const auto *LHSCst = dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
2899  if (LHSCst == RHSCst) {
2901  Operands.append(Mul->op_begin() + 1, Mul->op_end());
2902  return getMulExpr(Operands);
2903  }
2904 
2905  // We can't just assume that LHSCst divides RHSCst cleanly, it could be
2906  // that there's a factor provided by one of the other terms. We need to
2907  // check.
2908  APInt Factor = gcd(LHSCst, RHSCst);
2909  if (!Factor.isIntN(1)) {
2910  LHSCst =
2911  cast<SCEVConstant>(getConstant(LHSCst->getAPInt().udiv(Factor)));
2912  RHSCst =
2913  cast<SCEVConstant>(getConstant(RHSCst->getAPInt().udiv(Factor)));
2915  Operands.push_back(LHSCst);
2916  Operands.append(Mul->op_begin() + 1, Mul->op_end());
2917  LHS = getMulExpr(Operands);
2918  RHS = RHSCst;
2919  Mul = dyn_cast<SCEVMulExpr>(LHS);
2920  if (!Mul)
2921  return getUDivExactExpr(LHS, RHS);
2922  }
2923  }
2924  }
2925 
2926  for (int i = 0, e = Mul->getNumOperands(); i != e; ++i) {
2927  if (Mul->getOperand(i) == RHS) {
2929  Operands.append(Mul->op_begin(), Mul->op_begin() + i);
2930  Operands.append(Mul->op_begin() + i + 1, Mul->op_end());
2931  return getMulExpr(Operands);
2932  }
2933  }
2934 
2935  return getUDivExpr(LHS, RHS);
2936 }
2937 
2938 /// Get an add recurrence expression for the specified loop. Simplify the
2939 /// expression as much as possible.
2940 const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step,
2941  const Loop *L,
2944  Operands.push_back(Start);
2945  if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
2946  if (StepChrec->getLoop() == L) {
2947  Operands.append(StepChrec->op_begin(), StepChrec->op_end());
2948  return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW));
2949  }
2950 
2951  Operands.push_back(Step);
2952  return getAddRecExpr(Operands, L, Flags);
2953 }
2954 
2955 /// Get an add recurrence expression for the specified loop. Simplify the
2956 /// expression as much as possible.
2957 const SCEV *
2959  const Loop *L, SCEV::NoWrapFlags Flags) {
2960  if (Operands.size() == 1) return Operands[0];
2961 #ifndef NDEBUG
2962  Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
2963  for (unsigned i = 1, e = Operands.size(); i != e; ++i)
2964  assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
2965  "SCEVAddRecExpr operand types don't match!");
2966  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
2967  assert(isLoopInvariant(Operands[i], L) &&
2968  "SCEVAddRecExpr operand is not loop-invariant!");
2969 #endif
2970 
2971  if (Operands.back()->isZero()) {
2972  Operands.pop_back();
2973  return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X
2974  }
2975 
2976  // It's tempting to want to call getMaxBackedgeTakenCount count here and
2977  // use that information to infer NUW and NSW flags. However, computing a
2978  // BE count requires calling getAddRecExpr, so we may not yet have a
2979  // meaningful BE count at this point (and if we don't, we'd be stuck
2980  // with a SCEVCouldNotCompute as the cached BE count).
2981 
2982  Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags);
2983 
2984  // Canonicalize nested AddRecs in by nesting them in order of loop depth.
2985  if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
2986  const Loop *NestedLoop = NestedAR->getLoop();
2987  if (L->contains(NestedLoop)
2988  ? (L->getLoopDepth() < NestedLoop->getLoopDepth())
2989  : (!NestedLoop->contains(L) &&
2990  DT.dominates(L->getHeader(), NestedLoop->getHeader()))) {
2991  SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
2992  NestedAR->op_end());
2993  Operands[0] = NestedAR->getStart();
2994  // AddRecs require their operands be loop-invariant with respect to their
2995  // loops. Don't perform this transformation if it would break this
2996  // requirement.
2997  bool AllInvariant = all_of(
2998  Operands, [&](const SCEV *Op) { return isLoopInvariant(Op, L); });
2999 
3000  if (AllInvariant) {
3001  // Create a recurrence for the outer loop with the same step size.
3002  //
3003  // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the
3004  // inner recurrence has the same property.
3005  SCEV::NoWrapFlags OuterFlags =
3006  maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
3007 
3008  NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
3009  AllInvariant = all_of(NestedOperands, [&](const SCEV *Op) {
3010  return isLoopInvariant(Op, NestedLoop);
3011  });
3012 
3013  if (AllInvariant) {
3014  // Ok, both add recurrences are valid after the transformation.
3015  //
3016  // The inner recurrence keeps its NW flag but only keeps NUW/NSW if
3017  // the outer recurrence has the same property.
3018  SCEV::NoWrapFlags InnerFlags =
3019  maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags);
3020  return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags);
3021  }
3022  }
3023  // Reset Operands to its original state.
3024  Operands[0] = NestedAR;
3025  }
3026  }
3027 
3028  // Okay, it looks like we really DO need an addrec expr. Check to see if we
3029  // already have one, otherwise create a new one.
3032  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
3033  ID.AddPointer(Operands[i]);
3034  ID.AddPointer(L);
3035  void *IP = nullptr;
3036  SCEVAddRecExpr *S =
3037  static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
3038  if (!S) {
3039  const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size());
3040  std::uninitialized_copy(Operands.begin(), Operands.end(), O);
3041  S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator),
3042  O, Operands.size(), L);
3043  UniqueSCEVs.InsertNode(S, IP);
3044  }
3045  S->setNoWrapFlags(Flags);
3046  return S;
3047 }
3048 
3049 const SCEV *
3051  const SmallVectorImpl<const SCEV *> &IndexExprs) {
3052  const SCEV *BaseExpr = getSCEV(GEP->getPointerOperand());
3053  // getSCEV(Base)->getType() has the same address space as Base->getType()
3054  // because SCEV::getType() preserves the address space.
3055  Type *IntPtrTy = getEffectiveSCEVType(BaseExpr->getType());
3056  // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP
3057  // instruction to its SCEV, because the Instruction may be guarded by control
3058  // flow and the no-overflow bits may not be valid for the expression in any
3059  // context. This can be fixed similarly to how these flags are handled for
3060  // adds.
3063 
3064  const SCEV *TotalOffset = getZero(IntPtrTy);
3065  // The array size is unimportant. The first thing we do on CurTy is getting
3066  // its element type.
3067  Type *CurTy = ArrayType::get(GEP->getSourceElementType(), 0);
3068  for (const SCEV *IndexExpr : IndexExprs) {
3069  // Compute the (potentially symbolic) offset in bytes for this index.
3070  if (StructType *STy = dyn_cast<StructType>(CurTy)) {
3071  // For a struct, add the member offset.
3072  ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue();
3073  unsigned FieldNo = Index->getZExtValue();
3074  const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo);
3075 
3076  // Add the field offset to the running total offset.
3077  TotalOffset = getAddExpr(TotalOffset, FieldOffset);
3078 
3079  // Update CurTy to the type of the field at Index.
3080  CurTy = STy->getTypeAtIndex(Index);
3081  } else {
3082  // Update CurTy to its element type.
3083  CurTy = cast<SequentialType>(CurTy)->getElementType();
3084  // For an array, add the element offset, explicitly scaled.
3085  const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, CurTy);
3086  // Getelementptr indices are signed.
3087  IndexExpr = getTruncateOrSignExtend(IndexExpr, IntPtrTy);
3088 
3089  // Multiply the index by the element size to compute the element offset.
3090  const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, Wrap);
3091 
3092  // Add the element offset to the running total offset.
3093  TotalOffset = getAddExpr(TotalOffset, LocalOffset);
3094  }
3095  }
3096 
3097  // Add the total offset from all the GEP indices to the base.
3098  return getAddExpr(BaseExpr, TotalOffset, Wrap);
3099 }
3100 
3102  const SCEV *RHS) {
3103  SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
3104  return getSMaxExpr(Ops);
3105 }
3106 
3107 const SCEV *
3109  assert(!Ops.empty() && "Cannot get empty smax!");
3110  if (Ops.size() == 1) return Ops[0];
3111 #ifndef NDEBUG
3112  Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
3113  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
3114  assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
3115  "SCEVSMaxExpr operand types don't match!");
3116 #endif
3117 
3118  // Sort by complexity, this groups all similar expression types together.
3119  GroupByComplexity(Ops, &LI);
3120 
3121  // If there are any constants, fold them together.
3122  unsigned Idx = 0;
3123  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
3124  ++Idx;
3125  assert(Idx < Ops.size());
3126  while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
3127  // We found two constants, fold them together!
3128  ConstantInt *Fold = ConstantInt::get(
3129  getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt()));
3130  Ops[0] = getConstant(Fold);
3131  Ops.erase(Ops.begin()+1); // Erase the folded element
3132  if (Ops.size() == 1) return Ops[0];
3133  LHSC = cast<SCEVConstant>(Ops[0]);
3134  }
3135 
3136  // If we are left with a constant minimum-int, strip it off.
3137  if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
3138  Ops.erase(Ops.begin());
3139  --Idx;
3140  } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
3141  // If we have an smax with a constant maximum-int, it will always be
3142  // maximum-int.
3143  return Ops[0];
3144  }
3145 
3146  if (Ops.size() == 1) return Ops[0];
3147  }
3148 
3149  // Find the first SMax
3150  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
3151  ++Idx;
3152 
3153  // Check to see if one of the operands is an SMax. If so, expand its operands
3154  // onto our operand list, and recurse to simplify.
3155  if (Idx < Ops.size()) {
3156  bool DeletedSMax = false;
3157  while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
3158  Ops.erase(Ops.begin()+Idx);
3159  Ops.append(SMax->op_begin(), SMax->op_end());
3160  DeletedSMax = true;
3161  }
3162 
3163  if (DeletedSMax)
3164  return getSMaxExpr(Ops);
3165  }
3166 
3167  // Okay, check to see if the same value occurs in the operand list twice. If
3168  // so, delete one. Since we sorted the list, these values are required to
3169  // be adjacent.
3170  for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
3171  // X smax Y smax Y --> X smax Y
3172  // X smax Y --> X, if X is always greater than Y
3173  if (Ops[i] == Ops[i+1] ||
3174  isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
3175  Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
3176  --i; --e;
3177  } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
3178  Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
3179  --i; --e;
3180  }
3181 
3182  if (Ops.size() == 1) return Ops[0];
3183 
3184  assert(!Ops.empty() && "Reduced smax down to nothing!");
3185 
3186  // Okay, it looks like we really DO need an smax expr. Check to see if we
3187  // already have one, otherwise create a new one.
3189  ID.AddInteger(scSMaxExpr);
3190  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
3191  ID.AddPointer(Ops[i]);
3192  void *IP = nullptr;
3193  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
3194  const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
3195  std::uninitialized_copy(Ops.begin(), Ops.end(), O);
3196  SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
3197  O, Ops.size());
3198  UniqueSCEVs.InsertNode(S, IP);
3199  return S;
3200 }
3201 
3203  const SCEV *RHS) {
3204  SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
3205  return getUMaxExpr(Ops);
3206 }
3207 
3208 const SCEV *
3210  assert(!Ops.empty() && "Cannot get empty umax!");
3211  if (Ops.size() == 1) return Ops[0];
3212 #ifndef NDEBUG
3213  Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
3214  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
3215  assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
3216  "SCEVUMaxExpr operand types don't match!");
3217 #endif
3218 
3219  // Sort by complexity, this groups all similar expression types together.
3220  GroupByComplexity(Ops, &LI);
3221 
3222  // If there are any constants, fold them together.
3223  unsigned Idx = 0;
3224  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
3225  ++Idx;
3226  assert(Idx < Ops.size());
3227  while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
3228  // We found two constants, fold them together!
3229  ConstantInt *Fold = ConstantInt::get(
3230  getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt()));
3231  Ops[0] = getConstant(Fold);
3232  Ops.erase(Ops.begin()+1); // Erase the folded element
3233  if (Ops.size() == 1) return Ops[0];
3234  LHSC = cast<SCEVConstant>(Ops[0]);
3235  }
3236 
3237  // If we are left with a constant minimum-int, strip it off.
3238  if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
3239  Ops.erase(Ops.begin());
3240  --Idx;
3241  } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
3242  // If we have an umax with a constant maximum-int, it will always be
3243  // maximum-int.
3244  return Ops[0];
3245  }
3246 
3247  if (Ops.size() == 1) return Ops[0];
3248  }
3249 
3250  // Find the first UMax
3251  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
3252  ++Idx;
3253 
3254  // Check to see if one of the operands is a UMax. If so, expand its operands
3255  // onto our operand list, and recurse to simplify.
3256  if (Idx < Ops.size()) {
3257  bool DeletedUMax = false;
3258  while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
3259  Ops.erase(Ops.begin()+Idx);
3260  Ops.append(UMax->op_begin(), UMax->op_end());
3261  DeletedUMax = true;
3262  }
3263 
3264  if (DeletedUMax)
3265  return getUMaxExpr(Ops);
3266  }
3267 
3268  // Okay, check to see if the same value occurs in the operand list twice. If
3269  // so, delete one. Since we sorted the list, these values are required to
3270  // be adjacent.
3271  for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
3272  // X umax Y umax Y --> X umax Y
3273  // X umax Y --> X, if X is always greater than Y
3274  if (Ops[i] == Ops[i+1] ||
3275  isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) {
3276  Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
3277  --i; --e;
3278  } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) {
3279  Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
3280  --i; --e;
3281  }
3282 
3283  if (Ops.size() == 1) return Ops[0];
3284 
3285  assert(!Ops.empty() && "Reduced umax down to nothing!");
3286 
3287  // Okay, it looks like we really DO need a umax expr. Check to see if we
3288  // already have one, otherwise create a new one.
3290  ID.AddInteger(scUMaxExpr);
3291  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
3292  ID.AddPointer(Ops[i]);
3293  void *IP = nullptr;
3294  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
3295  const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
3296  std::uninitialized_copy(Ops.begin(), Ops.end(), O);
3297  SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
3298  O, Ops.size());
3299  UniqueSCEVs.InsertNode(S, IP);
3300  return S;
3301 }
3302 
3304  const SCEV *RHS) {
3305  // ~smax(~x, ~y) == smin(x, y).
3306  return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
3307 }
3308 
3310  const SCEV *RHS) {
3311  // ~umax(~x, ~y) == umin(x, y)
3312  return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
3313 }
3314 
3315 const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
3316  // We can bypass creating a target-independent
3317  // constant expression and then folding it back into a ConstantInt.
3318  // This is just a compile-time optimization.
3319  return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy));
3320 }
3321 
3323  StructType *STy,
3324  unsigned FieldNo) {
3325  // We can bypass creating a target-independent
3326  // constant expression and then folding it back into a ConstantInt.
3327  // This is just a compile-time optimization.
3328  return getConstant(
3329  IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo));
3330 }
3331 
3333  // Don't attempt to do anything other than create a SCEVUnknown object
3334  // here. createSCEV only calls getUnknown after checking for all other
3335  // interesting possibilities, and any other code that calls getUnknown
3336  // is doing so in order to hide a value from SCEV canonicalization.
3337 
3339  ID.AddInteger(scUnknown);
3340  ID.AddPointer(V);
3341  void *IP = nullptr;
3342  if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
3343  assert(cast<SCEVUnknown>(S)->getValue() == V &&
3344  "Stale SCEVUnknown in uniquing map!");
3345  return S;
3346  }
3347  SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
3348  FirstUnknown);
3349  FirstUnknown = cast<SCEVUnknown>(S);
3350  UniqueSCEVs.InsertNode(S, IP);
3351  return S;
3352 }
3353 
3354 //===----------------------------------------------------------------------===//
3355 // Basic SCEV Analysis and PHI Idiom Recognition Code
3356 //
3357 
3358 /// Test if values of the given type are analyzable within the SCEV
3359 /// framework. This primarily includes integer types, and it can optionally
3360 /// include pointer types if the ScalarEvolution class has access to
3361 /// target-specific information.
3363  // Integers and pointers are always SCEVable.
3364  return Ty->isIntegerTy() || Ty->isPointerTy();
3365 }
3366 
3367 /// Return the size in bits of the specified type, for which isSCEVable must
3368 /// return true.
3370  assert(isSCEVable(Ty) && "Type is not SCEVable!");
3371  return getDataLayout().getTypeSizeInBits(Ty);
3372 }
3373 
3374 /// Return a type with the same bitwidth as the given type and which represents
3375 /// how SCEV will treat the given type, for which isSCEVable must return
3376 /// true. For pointer types, this is the pointer-sized integer type.
3378  assert(isSCEVable(Ty) && "Type is not SCEVable!");
3379 
3380  if (Ty->isIntegerTy())
3381  return Ty;
3382 
3383  // The only other support type is pointer.
3384  assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
3385  return getDataLayout().getIntPtrType(Ty);
3386 }
3387 
3389  return CouldNotCompute.get();
3390 }
3391 
3392 bool ScalarEvolution::checkValidity(const SCEV *S) const {
3393  bool ContainsNulls = SCEVExprContains(S, [](const SCEV *S) {
3394  auto *SU = dyn_cast<SCEVUnknown>(S);
3395  return SU && SU->getValue() == nullptr;
3396  });
3397 
3398  return !ContainsNulls;
3399 }
3400 
3402  HasRecMapType::iterator I = HasRecMap.find(S);
3403  if (I != HasRecMap.end())
3404  return I->second;
3405 
3406  bool FoundAddRec = SCEVExprContains(S, isa<SCEVAddRecExpr, const SCEV *>);
3407  HasRecMap.insert({S, FoundAddRec});
3408  return FoundAddRec;
3409 }
3410 
3411 /// Try to split a SCEVAddExpr into a pair of {SCEV, ConstantInt}.
3412 /// If \p S is a SCEVAddExpr and is composed of a sub SCEV S' and an
3413 /// offset I, then return {S', I}, else return {\p S, nullptr}.
3414 static std::pair<const SCEV *, ConstantInt *> splitAddExpr(const SCEV *S) {
3415  const auto *Add = dyn_cast<SCEVAddExpr>(S);
3416  if (!Add)
3417  return {S, nullptr};
3418 
3419  if (Add->getNumOperands() != 2)
3420  return {S, nullptr};
3421 
3422  auto *ConstOp = dyn_cast<SCEVConstant>(Add->getOperand(0));
3423  if (!ConstOp)
3424  return {S, nullptr};
3425 
3426  return {Add->getOperand(1), ConstOp->getValue()};
3427 }
3428 
3429 /// Return the ValueOffsetPair set for \p S. \p S can be represented
3430 /// by the value and offset from any ValueOffsetPair in the set.
3433  ExprValueMapType::iterator SI = ExprValueMap.find_as(S);
3434  if (SI == ExprValueMap.end())
3435  return nullptr;
3436 #ifndef NDEBUG
3437  if (VerifySCEVMap) {
3438  // Check there is no dangling Value in the set returned.
3439  for (const auto &VE : SI->second)
3440  assert(ValueExprMap.count(VE.first));
3441  }
3442 #endif
3443  return &SI->second;
3444 }
3445 
3446 /// Erase Value from ValueExprMap and ExprValueMap. ValueExprMap.erase(V)
3447 /// cannot be used separately. eraseValueFromMap should be used to remove
3448 /// V from ValueExprMap and ExprValueMap at the same time.
3450  ValueExprMapType::iterator I = ValueExprMap.find_as(V);
3451  if (I != ValueExprMap.end()) {
3452  const SCEV *S = I->second;
3453  // Remove {V, 0} from the set of ExprValueMap[S]
3455  SV->remove({V, nullptr});
3456 
3457  // Remove {V, Offset} from the set of ExprValueMap[Stripped]
3458  const SCEV *Stripped;
3460  std::tie(Stripped, Offset) = splitAddExpr(S);
3461  if (Offset != nullptr) {
3462  if (SetVector<ValueOffsetPair> *SV = getSCEVValues(Stripped))
3463  SV->remove({V, Offset});
3464  }
3465  ValueExprMap.erase(V);
3466  }
3467 }
3468 
3469 /// Return an existing SCEV if it exists, otherwise analyze the expression and
3470 /// create a new one.
3472  assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
3473 
3474  const SCEV *S = getExistingSCEV(V);
3475  if (S == nullptr) {
3476  S = createSCEV(V);
3477  // During PHI resolution, it is possible to create two SCEVs for the same
3478  // V, so it is needed to double check whether V->S is inserted into
3479  // ValueExprMap before insert S->{V, 0} into ExprValueMap.
3480  std::pair<ValueExprMapType::iterator, bool> Pair =
3481  ValueExprMap.insert({SCEVCallbackVH(V, this), S});
3482  if (Pair.second) {
3483  ExprValueMap[S].insert({V, nullptr});
3484 
3485  // If S == Stripped + Offset, add Stripped -> {V, Offset} into
3486  // ExprValueMap.
3487  const SCEV *Stripped = S;
3488  ConstantInt *Offset = nullptr;
3489  std::tie(Stripped, Offset) = splitAddExpr(S);
3490  // If stripped is SCEVUnknown, don't bother to save
3491  // Stripped -> {V, offset}. It doesn't simplify and sometimes even
3492  // increase the complexity of the expansion code.
3493  // If V is GetElementPtrInst, don't save Stripped -> {V, offset}
3494  // because it may generate add/sub instead of GEP in SCEV expansion.
3495  if (Offset != nullptr && !isa<SCEVUnknown>(Stripped) &&
3496  !isa<GetElementPtrInst>(V))
3497  ExprValueMap[Stripped].insert({V, Offset});
3498  }
3499  }
3500  return S;
3501 }
3502 
3503 const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
3504  assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
3505 
3506  ValueExprMapType::iterator I = ValueExprMap.find_as(V);
3507  if (I != ValueExprMap.end()) {
3508  const SCEV *S = I->second;
3509  if (checkValidity(S))
3510  return S;
3511  eraseValueFromMap(V);
3512  forgetMemoizedResults(S);
3513  }
3514  return nullptr;
3515 }
3516 
3517 /// Return a SCEV corresponding to -V = -1*V
3518 ///
3521  if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
3522  return getConstant(
3523  cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
3524 
3525  Type *Ty = V->getType();
3526  Ty = getEffectiveSCEVType(Ty);
3527  return getMulExpr(
3528  V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags);
3529 }
3530 
3531 /// Return a SCEV corresponding to ~V = -1-V
3533  if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
3534  return getConstant(
3535  cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
3536 
3537  Type *Ty = V->getType();
3538  Ty = getEffectiveSCEVType(Ty);
3539  const SCEV *AllOnes =
3540  getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)));
3541  return getMinusSCEV(AllOnes, V);
3542 }
3543 
3544 const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
3546  // Fast path: X - X --> 0.
3547  if (LHS == RHS)
3548  return getZero(LHS->getType());
3549 
3550  // We represent LHS - RHS as LHS + (-1)*RHS. This transformation
3551  // makes it so that we cannot make much use of NUW.
3552  auto AddFlags = SCEV::FlagAnyWrap;
3553  const bool RHSIsNotMinSigned =
3555  if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) {
3556  // Let M be the minimum representable signed value. Then (-1)*RHS
3557  // signed-wraps if and only if RHS is M. That can happen even for
3558  // a NSW subtraction because e.g. (-1)*M signed-wraps even though
3559  // -1 - M does not. So to transfer NSW from LHS - RHS to LHS +
3560  // (-1)*RHS, we need to prove that RHS != M.
3561  //
3562  // If LHS is non-negative and we know that LHS - RHS does not
3563  // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap
3564  // either by proving that RHS > M or that LHS >= 0.
3565  if (RHSIsNotMinSigned || isKnownNonNegative(LHS)) {
3566  AddFlags = SCEV::FlagNSW;
3567  }
3568  }
3569 
3570  // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS -
3571  // RHS is NSW and LHS >= 0.
3572  //
3573  // The difficulty here is that the NSW flag may have been proven
3574  // relative to a loop that is to be found in a recurrence in LHS and
3575  // not in RHS. Applying NSW to (-1)*M may then let the NSW have a
3576  // larger scope than intended.
3577  auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
3578 
3579  return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags);
3580 }
3581 
3582 const SCEV *
3584  Type *SrcTy = V->getType();
3585  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
3586  (Ty->isIntegerTy() || Ty->isPointerTy()) &&
3587  "Cannot truncate or zero extend with non-integer arguments!");
3588  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
3589  return V; // No conversion
3590  if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
3591  return getTruncateExpr(V, Ty);
3592  return getZeroExtendExpr(V, Ty);
3593 }
3594 
3595 const SCEV *
3597  Type *Ty) {
3598  Type *SrcTy = V->getType();
3599  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
3600  (Ty->isIntegerTy() || Ty->isPointerTy()) &&
3601  "Cannot truncate or zero extend with non-integer arguments!");
3602  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
3603  return V; // No conversion
3604  if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
3605  return getTruncateExpr(V, Ty);
3606  return getSignExtendExpr(V, Ty);
3607 }
3608 
3609 const SCEV *
3611  Type *SrcTy = V->getType();
3612  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
3613  (Ty->isIntegerTy() || Ty->isPointerTy()) &&
3614  "Cannot noop or zero extend with non-integer arguments!");
3615  assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
3616  "getNoopOrZeroExtend cannot truncate!");
3617  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
3618  return V; // No conversion
3619  return getZeroExtendExpr(V, Ty);
3620 }
3621 
3622 const SCEV *
3624  Type *SrcTy = V->getType();
3625  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
3626  (Ty->isIntegerTy() || Ty->isPointerTy()) &&
3627  "Cannot noop or sign extend with non-integer arguments!");
3628  assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
3629  "getNoopOrSignExtend cannot truncate!");
3630  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
3631  return V; // No conversion
3632  return getSignExtendExpr(V, Ty);
3633 }
3634 
3635 const SCEV *
3637  Type *SrcTy = V->getType();
3638  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
3639  (Ty->isIntegerTy() || Ty->isPointerTy()) &&
3640  "Cannot noop or any extend with non-integer arguments!");
3641  assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
3642  "getNoopOrAnyExtend cannot truncate!");
3643  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
3644  return V; // No conversion
3645  return getAnyExtendExpr(V, Ty);
3646 }
3647 
3648 const SCEV *
3650  Type *SrcTy = V->getType();
3651  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
3652  (Ty->isIntegerTy() || Ty->isPointerTy()) &&
3653  "Cannot truncate or noop with non-integer arguments!");
3654  assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
3655  "getTruncateOrNoop cannot extend!");
3656  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
3657  return V; // No conversion
3658  return getTruncateExpr(V, Ty);
3659 }
3660 
3662  const SCEV *RHS) {
3663  const SCEV *PromotedLHS = LHS;
3664  const SCEV *PromotedRHS = RHS;
3665 
3666  if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
3667  PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
3668  else
3669  PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
3670 
3671  return getUMaxExpr(PromotedLHS, PromotedRHS);
3672 }
3673 
3675  const SCEV *RHS) {
3676  const SCEV *PromotedLHS = LHS;
3677  const SCEV *PromotedRHS = RHS;
3678 
3679  if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
3680  PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
3681  else
3682  PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
3683 
3684  return getUMinExpr(PromotedLHS, PromotedRHS);
3685 }
3686 
3688  // A pointer operand may evaluate to a nonpointer expression, such as null.
3689  if (!V->getType()->isPointerTy())
3690  return V;
3691 
3692  if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {
3693  return getPointerBase(Cast->getOperand());
3694  } else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
3695  const SCEV *PtrOp = nullptr;
3696  for (const SCEV *NAryOp : NAry->operands()) {
3697  if (NAryOp->getType()->isPointerTy()) {
3698  // Cannot find the base of an expression with multiple pointer operands.
3699  if (PtrOp)
3700  return V;
3701  PtrOp = NAryOp;
3702  }
3703  }
3704  if (!PtrOp)
3705  return V;
3706  return getPointerBase(PtrOp);
3707  }
3708  return V;
3709 }
3710 
3711 /// Push users of the given Instruction onto the given Worklist.
3712 static void
3714  SmallVectorImpl<Instruction *> &Worklist) {
3715  // Push the def-use children onto the Worklist stack.
3716  for (User *U : I->users())
3717  Worklist.push_back(cast<Instruction>(U));
3718 }
3719 
3720 void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) {
3722  PushDefUseChildren(PN, Worklist);
3723 
3725  Visited.insert(PN);
3726  while (!Worklist.empty()) {
3727  Instruction *I = Worklist.pop_back_val();
3728  if (!Visited.insert(I).second)
3729  continue;
3730 
3731  auto It = ValueExprMap.find_as(static_cast<Value *>(I));
3732  if (It != ValueExprMap.end()) {
3733  const SCEV *Old = It->second;
3734 
3735  // Short-circuit the def-use traversal if the symbolic name
3736  // ceases to appear in expressions.
3737  if (Old != SymName && !hasOperand(Old, SymName))
3738  continue;
3739 
3740  // SCEVUnknown for a PHI either means that it has an unrecognized
3741  // structure, it's a PHI that's in the progress of being computed
3742  // by createNodeForPHI, or it's a single-value PHI. In the first case,
3743  // additional loop trip count information isn't going to change anything.
3744  // In the second case, createNodeForPHI will perform the necessary
3745  // updates on its own when it gets to that point. In the third, we do
3746  // want to forget the SCEVUnknown.
3747  if (!isa<PHINode>(I) ||
3748  !isa<SCEVUnknown>(Old) ||
3749  (I != PN && Old == SymName)) {
3750  eraseValueFromMap(It->first);
3751  forgetMemoizedResults(Old);
3752  }
3753  }
3754 
3755  PushDefUseChildren(I, Worklist);
3756  }
3757 }
3758 
3759 namespace {
3760 class SCEVInitRewriter : public SCEVRewriteVisitor<SCEVInitRewriter> {
3761 public:
3762  static const SCEV *rewrite(const SCEV *S, const Loop *L,
3763  ScalarEvolution &SE) {
3764  SCEVInitRewriter Rewriter(L, SE);
3765  const SCEV *Result = Rewriter.visit(S);
3766  return Rewriter.isValid() ? Result : SE.getCouldNotCompute();
3767  }
3768 
3769  SCEVInitRewriter(const Loop *L, ScalarEvolution &SE)
3770  : SCEVRewriteVisitor(SE), L(L), Valid(true) {}
3771 
3772  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
3773  if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant))
3774  Valid = false;
3775  return Expr;
3776  }
3777 
3778  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
3779  // Only allow AddRecExprs for this loop.
3780  if (Expr->getLoop() == L)
3781  return Expr->getStart();
3782  Valid = false;
3783  return Expr;
3784  }
3785 
3786  bool isValid() { return Valid; }
3787 
3788 private:
3789  const Loop *L;
3790  bool Valid;
3791 };
3792 
3793 class SCEVShiftRewriter : public SCEVRewriteVisitor<SCEVShiftRewriter> {
3794 public:
3795  static const SCEV *rewrite(const SCEV *S, const Loop *L,
3796  ScalarEvolution &SE) {
3797  SCEVShiftRewriter Rewriter(L, SE);
3798  const SCEV *Result = Rewriter.visit(S);
3799  return Rewriter.isValid() ? Result : SE.getCouldNotCompute();
3800  }
3801 
3802  SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE)
3803  : SCEVRewriteVisitor(SE), L(L), Valid(true) {}
3804 
3805  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
3806  // Only allow AddRecExprs for this loop.
3807  if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant))
3808  Valid = false;
3809  return Expr;
3810  }
3811 
3812  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
3813  if (Expr->getLoop() == L && Expr->isAffine())
3814  return SE.getMinusSCEV(Expr, Expr->getStepRecurrence(SE));
3815  Valid = false;
3816  return Expr;
3817  }
3818  bool isValid() { return Valid; }
3819 
3820 private:
3821  const Loop *L;
3822  bool Valid;
3823 };
3824 } // end anonymous namespace
3825 
3827 ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) {
3828  if (!AR->isAffine())
3829  return SCEV::FlagAnyWrap;
3830 
3831  typedef OverflowingBinaryOperator OBO;
3833 
3834  if (!AR->hasNoSignedWrap()) {
3835  ConstantRange AddRecRange = getSignedRange(AR);
3836  ConstantRange IncRange = getSignedRange(AR->getStepRecurrence(*this));
3837 
3839  Instruction::Add, IncRange, OBO::NoSignedWrap);
3840  if (NSWRegion.contains(AddRecRange))
3841  Result = ScalarEvolution::setFlags(Result, SCEV::FlagNSW);
3842  }
3843 
3844  if (!AR->hasNoUnsignedWrap()) {
3845  ConstantRange AddRecRange = getUnsignedRange(AR);
3846  ConstantRange IncRange = getUnsignedRange(AR->getStepRecurrence(*this));
3847 
3849  Instruction::Add, IncRange, OBO::NoUnsignedWrap);
3850  if (NUWRegion.contains(AddRecRange))
3851  Result = ScalarEvolution::setFlags(Result, SCEV::FlagNUW);
3852  }
3853 
3854  return Result;
3855 }
3856 
3857 namespace {
3858 /// Represents an abstract binary operation. This may exist as a
3859 /// normal instruction or constant expression, or may have been
3860 /// derived from an expression tree.
3861 struct BinaryOp {
3862  unsigned Opcode;
3863  Value *LHS;
3864  Value *RHS;
3865  bool IsNSW;
3866  bool IsNUW;
3867 
3868  /// Op is set if this BinaryOp corresponds to a concrete LLVM instruction or
3869  /// constant expression.
3870  Operator *Op;
3871 
3872  explicit BinaryOp(Operator *Op)
3873  : Opcode(Op->getOpcode()), LHS(Op->getOperand(0)), RHS(Op->getOperand(1)),
3874  IsNSW(false), IsNUW(false), Op(Op) {
3875  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Op)) {
3876  IsNSW = OBO->hasNoSignedWrap();
3877  IsNUW = OBO->hasNoUnsignedWrap();
3878  }
3879  }
3880 
3881  explicit BinaryOp(unsigned Opcode, Value *LHS, Value *RHS, bool IsNSW = false,
3882  bool IsNUW = false)
3883  : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW),
3884  Op(nullptr) {}
3885 };
3886 }
3887 
3888 
3889 /// Try to map \p V into a BinaryOp, and return \c None on failure.
3891  auto *Op = dyn_cast<Operator>(V);
3892  if (!Op)
3893  return None;
3894 
3895  // Implementation detail: all the cleverness here should happen without
3896  // creating new SCEV expressions -- our caller knowns tricks to avoid creating
3897  // SCEV expressions when possible, and we should not break that.
3898 
3899  switch (Op->getOpcode()) {
3900  case Instruction::Add:
3901  case Instruction::Sub:
3902  case Instruction::Mul:
3903  case Instruction::UDiv:
3904  case Instruction::And:
3905  case Instruction::Or:
3906  case Instruction::AShr:
3907  case Instruction::Shl:
3908  return BinaryOp(Op);
3909 
3910  case Instruction::Xor:
3911  if (auto *RHSC = dyn_cast<ConstantInt>(Op->getOperand(1)))
3912  // If the RHS of the xor is a signbit, then this is just an add.
3913  // Instcombine turns add of signbit into xor as a strength reduction step.
3914  if (RHSC->getValue().isSignBit())
3915  return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1));
3916  return BinaryOp(Op);
3917 
3918  case Instruction::LShr:
3919  // Turn logical shift right of a constant into a unsigned divide.
3920  if (ConstantInt *SA = dyn_cast<ConstantInt>(Op->getOperand(1))) {
3921  uint32_t BitWidth = cast<IntegerType>(Op->getType())->getBitWidth();
3922 
3923  // If the shift count is not less than the bitwidth, the result of
3924  // the shift is undefined. Don't try to analyze it, because the
3925  // resolution chosen here may differ from the resolution chosen in
3926  // other parts of the compiler.
3927  if (SA->getValue().ult(BitWidth)) {
3928  Constant *X =
3929  ConstantInt::get(SA->getContext(),
3930  APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
3931  return BinaryOp(Instruction::UDiv, Op->getOperand(0), X);
3932  }
3933  }
3934  return BinaryOp(Op);
3935 
3936  case Instruction::ExtractValue: {
3937  auto *EVI = cast<ExtractValueInst>(Op);
3938  if (EVI->getNumIndices() != 1 || EVI->getIndices()[0] != 0)
3939  break;
3940 
3941  auto *CI = dyn_cast<CallInst>(EVI->getAggregateOperand());
3942  if (!CI)
3943  break;
3944 
3945  if (auto *F = CI->getCalledFunction())
3946  switch (F->getIntrinsicID()) {
3947  case Intrinsic::sadd_with_overflow:
3948  case Intrinsic::uadd_with_overflow: {
3949  if (!isOverflowIntrinsicNoWrap(cast<IntrinsicInst>(CI), DT))
3950  return BinaryOp(Instruction::Add, CI->getArgOperand(0),
3951  CI->getArgOperand(1));
3952 
3953  // Now that we know that all uses of the arithmetic-result component of
3954  // CI are guarded by the overflow check, we can go ahead and pretend
3955  // that the arithmetic is non-overflowing.
3956  if (F->getIntrinsicID() == Intrinsic::sadd_with_overflow)
3957  return BinaryOp(Instruction::Add, CI->getArgOperand(0),
3958  CI->getArgOperand(1), /* IsNSW = */ true,
3959  /* IsNUW = */ false);
3960  else
3961  return BinaryOp(Instruction::Add, CI->getArgOperand(0),
3962  CI->getArgOperand(1), /* IsNSW = */ false,
3963  /* IsNUW*/ true);
3964  }
3965 
3966  case Intrinsic::ssub_with_overflow:
3967  case Intrinsic::usub_with_overflow:
3968  return BinaryOp(Instruction::Sub, CI->getArgOperand(0),
3969  CI->getArgOperand(1));
3970 
3971  case Intrinsic::smul_with_overflow:
3972  case Intrinsic::umul_with_overflow:
3973  return BinaryOp(Instruction::Mul, CI->getArgOperand(0),
3974  CI->getArgOperand(1));
3975  default:
3976  break;
3977  }
3978  }
3979 
3980  default:
3981  break;
3982  }
3983 
3984  return None;
3985 }
3986 
3987 const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
3988  const Loop *L = LI.getLoopFor(PN->getParent());
3989  if (!L || L->getHeader() != PN->getParent())
3990  return nullptr;
3991 
3992  // The loop may have multiple entrances or multiple exits; we can analyze
3993  // this phi as an addrec if it has a unique entry value and a unique
3994  // backedge value.
3995  Value *BEValueV = nullptr, *StartValueV = nullptr;
3996  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
3997  Value *V = PN->getIncomingValue(i);
3998  if (L->contains(PN->getIncomingBlock(i))) {
3999  if (!BEValueV) {
4000  BEValueV = V;
4001  } else if (BEValueV != V) {
4002  BEValueV = nullptr;
4003  break;
4004  }
4005  } else if (!StartValueV) {
4006  StartValueV = V;
4007  } else if (StartValueV != V) {
4008  StartValueV = nullptr;
4009  break;
4010  }
4011  }
4012  if (BEValueV && StartValueV) {
4013  // While we are analyzing this PHI node, handle its value symbolically.
4014  const SCEV *SymbolicName = getUnknown(PN);
4015  assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
4016  "PHI node already processed?");
4017  ValueExprMap.insert({SCEVCallbackVH(PN, this), SymbolicName});
4018 
4019  // Using this symbolic name for the PHI, analyze the value coming around
4020  // the back-edge.
4021  const SCEV *BEValue = getSCEV(BEValueV);
4022 
4023  // NOTE: If BEValue is loop invariant, we know that the PHI node just
4024  // has a special value for the first iteration of the loop.
4025 
4026  // If the value coming around the backedge is an add with the symbolic
4027  // value we just inserted, then we found a simple induction variable!
4028  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
4029  // If there is a single occurrence of the symbolic value, replace it
4030  // with a recurrence.
4031  unsigned FoundIndex = Add->getNumOperands();
4032  for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
4033  if (Add->getOperand(i) == SymbolicName)
4034  if (FoundIndex == e) {
4035  FoundIndex = i;
4036  break;
4037  }
4038 
4039  if (FoundIndex != Add->getNumOperands()) {
4040  // Create an add with everything but the specified operand.
4042  for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
4043  if (i != FoundIndex)
4044  Ops.push_back(Add->getOperand(i));
4045  const SCEV *Accum = getAddExpr(Ops);
4046 
4047  // This is not a valid addrec if the step amount is varying each
4048  // loop iteration, but is not itself an addrec in this loop.
4049  if (isLoopInvariant(Accum, L) ||
4050  (isa<SCEVAddRecExpr>(Accum) &&
4051  cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
4053 
4054  if (auto BO = MatchBinaryOp(BEValueV, DT)) {
4055  if (BO->Opcode == Instruction::Add && BO->LHS == PN) {
4056  if (BO->IsNUW)
4057  Flags = setFlags(Flags, SCEV::FlagNUW);
4058  if (BO->IsNSW)
4059  Flags = setFlags(Flags, SCEV::FlagNSW);
4060  }
4061  } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
4062  // If the increment is an inbounds GEP, then we know the address
4063  // space cannot be wrapped around. We cannot make any guarantee
4064  // about signed or unsigned overflow because pointers are
4065  // unsigned but we may have a negative index from the base
4066  // pointer. We can guarantee that no unsigned wrap occurs if the
4067  // indices form a positive value.
4068  if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
4069  Flags = setFlags(Flags, SCEV::FlagNW);
4070 
4071  const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
4073  Flags = setFlags(Flags, SCEV::FlagNUW);
4074  }
4075 
4076  // We cannot transfer nuw and nsw flags from subtraction
4077  // operations -- sub nuw X, Y is not the same as add nuw X, -Y
4078  // for instance.
4079  }
4080 
4081  const SCEV *StartVal = getSCEV(StartValueV);
4082  const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
4083 
4084  // Okay, for the entire analysis of this edge we assumed the PHI
4085  // to be symbolic. We now need to go back and purge all of the
4086  // entries for the scalars that use the symbolic expression.
4087  forgetSymbolicName(PN, SymbolicName);
4088  ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
4089 
4090  // We can add Flags to the post-inc expression only if we
4091  // know that it us *undefined behavior* for BEValueV to
4092  // overflow.
4093  if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
4094  if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
4095  (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
4096 
4097  return PHISCEV;
4098  }
4099  }
4100  } else {
4101  // Otherwise, this could be a loop like this:
4102  // i = 0; for (j = 1; ..; ++j) { .... i = j; }
4103  // In this case, j = {1,+,1} and BEValue is j.
4104  // Because the other in-value of i (0) fits the evolution of BEValue
4105  // i really is an addrec evolution.
4106  //
4107  // We can generalize this saying that i is the shifted value of BEValue
4108  // by one iteration:
4109  // PHI(f(0), f({1,+,1})) --> f({0,+,1})
4110  const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this);
4111  const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this);
4112  if (Shifted != getCouldNotCompute() &&
4113  Start != getCouldNotCompute()) {
4114  const SCEV *StartVal = getSCEV(StartValueV);
4115  if (Start == StartVal) {
4116  // Okay, for the entire analysis of this edge we assumed the PHI
4117  // to be symbolic. We now need to go back and purge all of the
4118  // entries for the scalars that use the symbolic expression.
4119  forgetSymbolicName(PN, SymbolicName);
4120  ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted;
4121  return Shifted;
4122  }
4123  }
4124  }
4125 
4126  // Remove the temporary PHI node SCEV that has been inserted while intending
4127  // to create an AddRecExpr for this PHI node. We can not keep this temporary
4128  // as it will prevent later (possibly simpler) SCEV expressions to be added
4129  // to the ValueExprMap.
4130  eraseValueFromMap(PN);
4131  }
4132 
4133  return nullptr;
4134 }
4135 
4136 // Checks if the SCEV S is available at BB. S is considered available at BB
4137 // if S can be materialized at BB without introducing a fault.
4138 static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S,
4139  BasicBlock *BB) {
4140  struct CheckAvailable {
4141  bool TraversalDone = false;
4142  bool Available = true;
4143 
4144  const Loop *L = nullptr; // The loop BB is in (can be nullptr)
4145  BasicBlock *BB = nullptr;
4146  DominatorTree &DT;
4147 
4148  CheckAvailable(const Loop *L, BasicBlock *BB, DominatorTree &DT)
4149  : L(L), BB(BB), DT(DT) {}
4150 
4151  bool setUnavailable() {
4152  TraversalDone = true;
4153  Available = false;
4154  return false;
4155  }
4156 
4157  bool follow(const SCEV *S) {
4158  switch (S->getSCEVType()) {
4159  case scConstant: case scTruncate: case scZeroExtend: case scSignExtend:
4160  case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr:
4161  // These expressions are available if their operand(s) is/are.
4162  return true;
4163 
4164  case scAddRecExpr: {
4165  // We allow add recurrences that are on the loop BB is in, or some
4166  // outer loop. This guarantees availability because the value of the
4167  // add recurrence at BB is simply the "current" value of the induction
4168  // variable. We can relax this in the future; for instance an add
4169  // recurrence on a sibling dominating loop is also available at BB.
4170  const auto *ARLoop = cast<SCEVAddRecExpr>(S)->getLoop();
4171  if (L && (ARLoop == L || ARLoop->contains(L)))
4172  return true;
4173 
4174  return setUnavailable();
4175  }
4176 
4177  case scUnknown: {
4178  // For SCEVUnknown, we check for simple dominance.
4179  const auto *SU = cast<SCEVUnknown>(S);
4180  Value *V = SU->getValue();
4181 
4182  if (isa<Argument>(V))
4183  return false;
4184 
4185  if (isa<Instruction>(V) && DT.dominates(cast<Instruction>(V), BB))
4186  return false;
4187 
4188  return setUnavailable();
4189  }
4190 
4191  case scUDivExpr:
4192  case scCouldNotCompute:
4193  // We do not try to smart about these at all.
4194  return setUnavailable();
4195  }
4196  llvm_unreachable("switch should be fully covered!");
4197  }
4198 
4199  bool isDone() { return TraversalDone; }
4200  };
4201 
4202  CheckAvailable CA(L, BB, DT);
4204 
4205  ST.visitAll(S);
4206  return CA.Available;
4207 }
4208 
4209 // Try to match a control flow sequence that branches out at BI and merges back
4210 // at Merge into a "C ? LHS : RHS" select pattern. Return true on a successful
4211 // match.
4213  Value *&C, Value *&LHS, Value *&RHS) {
4214  C = BI->getCondition();
4215 
4216  BasicBlockEdge LeftEdge(BI->getParent(), BI->getSuccessor(0));
4217  BasicBlockEdge RightEdge(BI->getParent(), BI->getSuccessor(1));
4218 
4219  if (!LeftEdge.isSingleEdge())
4220  return false;
4221 
4222  assert(RightEdge.isSingleEdge() && "Follows from LeftEdge.isSingleEdge()");
4223 
4224  Use &LeftUse = Merge->getOperandUse(0);
4225  Use &RightUse = Merge->getOperandUse(1);
4226 
4227  if (DT.dominates(LeftEdge, LeftUse) && DT.dominates(RightEdge, RightUse)) {
4228  LHS = LeftUse;
4229  RHS = RightUse;
4230  return true;
4231  }
4232 
4233  if (DT.dominates(LeftEdge, RightUse) && DT.dominates(RightEdge, LeftUse)) {
4234  LHS = RightUse;
4235  RHS = LeftUse;
4236  return true;
4237  }
4238 
4239  return false;
4240 }
4241 
4242 const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) {
4243  auto IsReachable =
4244  [&](BasicBlock *BB) { return DT.isReachableFromEntry(BB); };
4245  if (PN->getNumIncomingValues() == 2 && all_of(PN->blocks(), IsReachable)) {
4246  const Loop *L = LI.getLoopFor(PN->getParent());
4247 
4248  // We don't want to break LCSSA, even in a SCEV expression tree.
4249  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
4250  if (LI.getLoopFor(PN->getIncomingBlock(i)) != L)
4251  return nullptr;
4252 
4253  // Try to match
4254  //
4255  // br %cond, label %left, label %right
4256  // left:
4257  // br label %merge
4258  // right:
4259  // br label %merge
4260  // merge:
4261  // V = phi [ %x, %left ], [ %y, %right ]
4262  //
4263  // as "select %cond, %x, %y"
4264 
4265  BasicBlock *IDom = DT[PN->getParent()]->getIDom()->getBlock();
4266  assert(IDom && "At least the entry block should dominate PN");
4267 
4268  auto *BI = dyn_cast<BranchInst>(IDom->getTerminator());
4269  Value *Cond = nullptr, *LHS = nullptr, *RHS = nullptr;
4270 
4271  if (BI && BI->isConditional() &&
4272  BrPHIToSelect(DT, BI, PN, Cond, LHS, RHS) &&
4273  IsAvailableOnEntry(L, DT, getSCEV(LHS), PN->getParent()) &&
4274  IsAvailableOnEntry(L, DT, getSCEV(RHS), PN->getParent()))
4275  return createNodeForSelectOrPHI(PN, Cond, LHS, RHS);
4276  }
4277 
4278  return nullptr;
4279 }
4280 
4281 const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
4282  if (const SCEV *S = createAddRecFromPHI(PN))
4283  return S;
4284 
4285  if (const SCEV *S = createNodeFromSelectLikePHI(PN))
4286  return S;
4287 
4288  // If the PHI has a single incoming value, follow that value, unless the
4289  // PHI's incoming blocks are in a different loop, in which case doing so
4290  // risks breaking LCSSA form. Instcombine would normally zap these, but
4291  // it doesn't have DominatorTree information, so it may miss cases.
4292  if (Value *V = SimplifyInstruction(PN, getDataLayout(), &TLI, &DT, &AC))
4293  if (LI.replacementPreservesLCSSAForm(PN, V))
4294  return getSCEV(V);
4295 
4296  // If it's not a loop phi, we can't handle it yet.
4297  return getUnknown(PN);
4298 }
4299 
4300 const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I,
4301  Value *Cond,
4302  Value *TrueVal,
4303  Value *FalseVal) {
4304  // Handle "constant" branch or select. This can occur for instance when a
4305  // loop pass transforms an inner loop and moves on to process the outer loop.
4306  if (auto *CI = dyn_cast<ConstantInt>(Cond))
4307  return getSCEV(CI->isOne() ? TrueVal : FalseVal);
4308 
4309  // Try to match some simple smax or umax patterns.
4310  auto *ICI = dyn_cast<ICmpInst>(Cond);
4311  if (!ICI)
4312  return getUnknown(I);
4313 
4314  Value *LHS = ICI->getOperand(0);
4315  Value *RHS = ICI->getOperand(1);
4316 
4317  switch (ICI->getPredicate()) {
4318  case ICmpInst::ICMP_SLT:
4319  case ICmpInst::ICMP_SLE:
4320  std::swap(LHS, RHS);
4322  case ICmpInst::ICMP_SGT:
4323  case ICmpInst::ICMP_SGE:
4324  // a >s b ? a+x : b+x -> smax(a, b)+x
4325  // a >s b ? b+x : a+x -> smin(a, b)+x
4326  if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) {
4327  const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), I->getType());
4328  const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), I->getType());
4329  const SCEV *LA = getSCEV(TrueVal);
4330  const SCEV *RA = getSCEV(FalseVal);
4331  const SCEV *LDiff = getMinusSCEV(LA, LS);
4332  const SCEV *RDiff = getMinusSCEV(RA, RS);
4333  if (LDiff == RDiff)
4334  return getAddExpr(getSMaxExpr(LS, RS), LDiff);
4335  LDiff = getMinusSCEV(LA, RS);
4336  RDiff = getMinusSCEV(RA, LS);
4337  if (LDiff == RDiff)
4338  return getAddExpr(getSMinExpr(LS, RS), LDiff);
4339  }
4340  break;
4341  case ICmpInst::ICMP_ULT:
4342  case ICmpInst::ICMP_ULE:
4343  std::swap(LHS, RHS);
4345  case ICmpInst::ICMP_UGT:
4346  case ICmpInst::ICMP_UGE:
4347  // a >u b ? a+x : b+x -> umax(a, b)+x
4348  // a >u b ? b+x : a+x -> umin(a, b)+x
4349  if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) {
4350  const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
4351  const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), I->getType());
4352  const SCEV *LA = getSCEV(TrueVal);
4353  const SCEV *RA = getSCEV(FalseVal);
4354  const SCEV *LDiff = getMinusSCEV(LA, LS);
4355  const SCEV *RDiff = getMinusSCEV(RA, RS);
4356  if (LDiff == RDiff)
4357  return getAddExpr(getUMaxExpr(LS, RS), LDiff);
4358  LDiff = getMinusSCEV(LA, RS);
4359  RDiff = getMinusSCEV(RA, LS);
4360  if (LDiff == RDiff)
4361  return getAddExpr(getUMinExpr(LS, RS), LDiff);
4362  }
4363  break;
4364  case ICmpInst::ICMP_NE:
4365  // n != 0 ? n+x : 1+x -> umax(n, 1)+x
4366  if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
4367  isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
4368  const SCEV *One = getOne(I->getType());
4369  const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
4370  const SCEV *LA = getSCEV(TrueVal);
4371  const SCEV *RA = getSCEV(FalseVal);
4372  const SCEV *LDiff = getMinusSCEV(LA, LS);
4373  const SCEV *RDiff = getMinusSCEV(RA, One);
4374  if (LDiff == RDiff)
4375  return getAddExpr(getUMaxExpr(One, LS), LDiff);
4376  }
4377  break;
4378  case ICmpInst::ICMP_EQ:
4379  // n == 0 ? 1+x : n+x -> umax(n, 1)+x
4380  if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
4381  isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
4382  const SCEV *One = getOne(I->getType());
4383  const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
4384  const SCEV *LA = getSCEV(TrueVal);
4385  const SCEV *RA = getSCEV(FalseVal);
4386  const SCEV *LDiff = getMinusSCEV(LA, One);
4387  const SCEV *RDiff = getMinusSCEV(RA, LS);
4388  if (LDiff == RDiff)
4389  return getAddExpr(getUMaxExpr(One, LS), LDiff);
4390  }
4391  break;
4392  default:
4393  break;
4394  }
4395 
4396  return getUnknown(I);
4397 }
4398 
4399 /// Expand GEP instructions into add and multiply operations. This allows them
4400 /// to be analyzed by regular SCEV code.
4401 const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
4402  // Don't attempt to analyze GEPs over unsized objects.
4403  if (!GEP->getSourceElementType()->isSized())
4404  return getUnknown(GEP);
4405 
4406  SmallVector<const SCEV *, 4> IndexExprs;
4407  for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index)
4408  IndexExprs.push_back(getSCEV(*Index));
4409  return getGEPExpr(GEP, IndexExprs);
4410 }
4411 
4412 uint32_t
4414  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
4415  return C->getAPInt().countTrailingZeros();
4416 
4417  if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
4418  return std::min(GetMinTrailingZeros(T->getOperand()),
4419  (uint32_t)getTypeSizeInBits(T->getType()));
4420 
4421  if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
4422  uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
4423  return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
4424  getTypeSizeInBits(E->getType()) : OpRes;
4425  }
4426 
4427  if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
4428  uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
4429  return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
4430  getTypeSizeInBits(E->getType()) : OpRes;
4431  }
4432 
4433  if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
4434  // The result is the min of all operands results.
4435  uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
4436  for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
4437  MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
4438  return MinOpRes;
4439  }
4440 
4441  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
4442  // The result is the sum of all operands results.
4443  uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0));
4444  uint32_t BitWidth = getTypeSizeInBits(M->getType());
4445  for (unsigned i = 1, e = M->getNumOperands();
4446  SumOpRes != BitWidth && i != e; ++i)
4447  SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)),
4448  BitWidth);
4449  return SumOpRes;
4450  }
4451 
4452  if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
4453  // The result is the min of all operands results.
4454  uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
4455  for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
4456  MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
4457  return MinOpRes;
4458  }
4459 
4460  if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) {
4461  // The result is the min of all operands results.
4462  uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
4463  for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
4464  MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
4465  return MinOpRes;
4466  }
4467 
4468  if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) {
4469  // The result is the min of all operands results.
4470  uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
4471  for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
4472  MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
4473  return MinOpRes;
4474  }
4475 
4476  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
4477  // For a SCEVUnknown, ask ValueTracking.
4478  unsigned BitWidth = getTypeSizeInBits(U->getType());
4479  APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
4480  computeKnownBits(U->getValue(), Zeros, Ones, getDataLayout(), 0, &AC,
4481  nullptr, &DT);
4482  return Zeros.countTrailingOnes();
4483  }
4484 
4485  // SCEVUDivExpr
4486  return 0;
4487 }
4488 
4489 /// Helper method to assign a range to V from metadata present in the IR.
4491  if (Instruction *I = dyn_cast<Instruction>(V))
4493  return getConstantRangeFromMetadata(*MD);
4494 
4495  return None;
4496 }
4497 
4498 /// Determine the range for a particular SCEV. If SignHint is
4499 /// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges
4500 /// with a "cleaner" unsigned (resp. signed) representation.
4502 ScalarEvolution::getRange(const SCEV *S,
4503  ScalarEvolution::RangeSignHint SignHint) {
4505  SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges
4506  : SignedRanges;
4507 
4508  // See if we've computed this range already.
4510  if (I != Cache.end())
4511  return I->second;
4512 
4513  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
4514  return setRange(C, SignHint, ConstantRange(C->getAPInt()));
4515 
4516  unsigned BitWidth = getTypeSizeInBits(S->getType());
4517  ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
4518 
4519  // If the value has known zeros, the maximum value will have those known zeros
4520  // as well.
4521  uint32_t TZ = GetMinTrailingZeros(S);
4522  if (TZ != 0) {
4523  if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED)
4524  ConservativeResult =
4526  APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
4527  else
4528  ConservativeResult = ConstantRange(
4529  APInt::getSignedMinValue(BitWidth),
4530  APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
4531  }
4532 
4533  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
4534  ConstantRange X = getRange(Add->getOperand(0), SignHint);
4535  for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
4536  X = X.add(getRange(Add->getOperand(i), SignHint));
4537  return setRange(Add, SignHint, ConservativeResult.intersectWith(X));
4538  }
4539 
4540  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
4541  ConstantRange X = getRange(Mul->getOperand(0), SignHint);
4542  for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
4543  X = X.multiply(getRange(Mul->getOperand(i), SignHint));
4544  return setRange(Mul, SignHint, ConservativeResult.intersectWith(X));
4545  }
4546 
4547  if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
4548  ConstantRange X = getRange(SMax->getOperand(0), SignHint);
4549  for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
4550  X = X.smax(getRange(SMax->getOperand(i), SignHint));
4551  return setRange(SMax, SignHint, ConservativeResult.intersectWith(X));
4552  }
4553 
4554  if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
4555  ConstantRange X = getRange(UMax->getOperand(0), SignHint);
4556  for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
4557  X = X.umax(getRange(UMax->getOperand(i), SignHint));
4558  return setRange(UMax, SignHint, ConservativeResult.intersectWith(X));
4559  }
4560 
4561  if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
4562  ConstantRange X = getRange(UDiv->getLHS(), SignHint);
4563  ConstantRange Y = getRange(UDiv->getRHS(), SignHint);
4564  return setRange(UDiv, SignHint,
4565  ConservativeResult.intersectWith(X.udiv(Y)));
4566  }
4567 
4568  if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
4569  ConstantRange X = getRange(ZExt->getOperand(), SignHint);
4570  return setRange(ZExt, SignHint,
4571  ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
4572  }
4573 
4574  if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
4575  ConstantRange X = getRange(SExt->getOperand(), SignHint);
4576  return setRange(SExt, SignHint,
4577  ConservativeResult.intersectWith(X.signExtend(BitWidth)));
4578  }
4579 
4580  if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
4581  ConstantRange X = getRange(Trunc->getOperand(), SignHint);
4582  return setRange(Trunc, SignHint,
4583  ConservativeResult.intersectWith(X.truncate(BitWidth)));
4584  }
4585 
4586  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
4587  // If there's no unsigned wrap, the value will never be less than its
4588  // initial value.
4589  if (AddRec->hasNoUnsignedWrap())
4590  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
4591  if (!C->getValue()->isZero())
4592  ConservativeResult = ConservativeResult.intersectWith(
4593  ConstantRange(C->getAPInt(), APInt(BitWidth, 0)));
4594 
4595  // If there's no signed wrap, and all the operands have the same sign or
4596  // zero, the value won't ever change sign.
4597  if (AddRec->hasNoSignedWrap()) {
4598  bool AllNonNeg = true;
4599  bool AllNonPos = true;
4600  for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
4601  if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false;
4602  if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false;
4603  }
4604  if (AllNonNeg)
4605  ConservativeResult = ConservativeResult.intersectWith(
4606  ConstantRange(APInt(BitWidth, 0),
4607  APInt::getSignedMinValue(BitWidth)));
4608  else if (AllNonPos)
4609  ConservativeResult = ConservativeResult.intersectWith(
4611  APInt(BitWidth, 1)));
4612  }
4613 
4614  // TODO: non-affine addrec
4615  if (AddRec->isAffine()) {
4616  const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
4617  if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
4618  getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
4619  auto RangeFromAffine = getRangeForAffineAR(
4620  AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount,
4621  BitWidth);
4622  if (!RangeFromAffine.isFullSet())
4623  ConservativeResult =
4624  ConservativeResult.intersectWith(RangeFromAffine);
4625 
4626  auto RangeFromFactoring = getRangeViaFactoring(
4627  AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount,
4628  BitWidth);
4629  if (!RangeFromFactoring.isFullSet())
4630  ConservativeResult =
4631  ConservativeResult.intersectWith(RangeFromFactoring);
4632  }
4633  }
4634 
4635  return setRange(AddRec, SignHint, ConservativeResult);
4636  }
4637 
4638  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
4639  // Check if the IR explicitly contains !range metadata.
4640  Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
4641  if (MDRange.hasValue())
4642  ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue());
4643 
4644  // Split here to avoid paying the compile-time cost of calling both
4645  // computeKnownBits and ComputeNumSignBits. This restriction can be lifted
4646  // if needed.
4647  const DataLayout &DL = getDataLayout();
4648  if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) {
4649  // For a SCEVUnknown, ask ValueTracking.
4650  APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
4651  computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, &AC, nullptr, &DT);
4652  if (Ones != ~Zeros + 1)
4653  ConservativeResult =
4654  ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1));
4655  } else {
4656  assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&
4657  "generalize as needed!");
4658  unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
4659  if (NS > 1)
4660  ConservativeResult = ConservativeResult.intersectWith(
4661  ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
4662  APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1));
4663  }
4664 
4665  return setRange(U, SignHint, ConservativeResult);
4666  }
4667 
4668  return setRange(S, SignHint, ConservativeResult);
4669 }
4670 
4671 ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
4672  const SCEV *Step,
4673  const SCEV *MaxBECount,
4674  unsigned BitWidth) {
4675  assert(!isa<SCEVCouldNotCompute>(MaxBECount) &&
4676  getTypeSizeInBits(MaxBECount->getType()) <= BitWidth &&
4677  "Precondition!");
4678 
4679  ConstantRange Result(BitWidth, /* isFullSet = */ true);
4680 
4681  // Check for overflow. This must be done with ConstantRange arithmetic
4682  // because we could be called from within the ScalarEvolution overflow
4683  // checking code.
4684 
4685  MaxBECount = getNoopOrZeroExtend(MaxBECount, Start->getType());
4686  ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
4687  ConstantRange ZExtMaxBECountRange = MaxBECountRange.zextOrTrunc(BitWidth * 2);
4688 
4689  ConstantRange StepSRange = getSignedRange(Step);
4690  ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2);
4691 
4692  ConstantRange StartURange = getUnsignedRange(Start);
4693  ConstantRange EndURange =
4694  StartURange.add(MaxBECountRange.multiply(StepSRange));
4695 
4696  // Check for unsigned overflow.
4697  ConstantRange ZExtStartURange = StartURange.zextOrTrunc(BitWidth * 2);
4698  ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2);
4699  if (ZExtStartURange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
4700  ZExtEndURange) {
4701  APInt Min = APIntOps::umin(StartURange.getUnsignedMin(),
4702  EndURange.getUnsignedMin());
4703  APInt Max = APIntOps::umax(StartURange.getUnsignedMax(),
4704  EndURange.getUnsignedMax());
4705  bool IsFullRange = Min.isMinValue() && Max.isMaxValue();
4706  if (!IsFullRange)
4707  Result =
4708  Result.intersectWith(ConstantRange(Min, Max + 1));
4709  }
4710 
4711  ConstantRange StartSRange = getSignedRange(Start);
4712  ConstantRange EndSRange =
4713  StartSRange.add(MaxBECountRange.multiply(StepSRange));
4714 
4715  // Check for signed overflow. This must be done with ConstantRange
4716  // arithmetic because we could be called from within the ScalarEvolution
4717  // overflow checking code.
4718  ConstantRange SExtStartSRange = StartSRange.sextOrTrunc(BitWidth * 2);
4719  ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2);
4720  if (SExtStartSRange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
4721  SExtEndSRange) {
4722  APInt Min =
4723  APIntOps::smin(StartSRange.getSignedMin(), EndSRange.getSignedMin());
4724  APInt Max =
4725  APIntOps::smax(StartSRange.getSignedMax(), EndSRange.getSignedMax());
4726  bool IsFullRange = Min.isMinSignedValue() && Max.isMaxSignedValue();
4727  if (!IsFullRange)
4728  Result =
4729  Result.intersectWith(ConstantRange(Min, Max + 1));
4730  }
4731 
4732  return Result;
4733 }
4734 
4735 ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
4736  const SCEV *Step,
4737  const SCEV *MaxBECount,
4738  unsigned BitWidth) {
4739  // RangeOf({C?A:B,+,C?P:Q}) == RangeOf(C?{A,+,P}:{B,+,Q})
4740  // == RangeOf({A,+,P}) union RangeOf({B,+,Q})
4741 
4742  struct SelectPattern {
4743  Value *Condition = nullptr;
4744  APInt TrueValue;
4745  APInt FalseValue;
4746 
4747  explicit SelectPattern(ScalarEvolution &SE, unsigned BitWidth,
4748  const SCEV *S) {
4749  Optional<unsigned> CastOp;
4750  APInt Offset(BitWidth, 0);
4751 
4752  assert(SE.getTypeSizeInBits(S->getType()) == BitWidth &&
4753  "Should be!");
4754 
4755  // Peel off a constant offset:
4756  if (auto *SA = dyn_cast<SCEVAddExpr>(S)) {
4757  // In the future we could consider being smarter here and handle
4758  // {Start+Step,+,Step} too.
4759  if (SA->getNumOperands() != 2 || !isa<SCEVConstant>(SA->getOperand(0)))
4760  return;
4761 
4762  Offset = cast<SCEVConstant>(SA->getOperand(0))->getAPInt();
4763  S = SA->getOperand(1);
4764  }
4765 
4766  // Peel off a cast operation
4767  if (auto *SCast = dyn_cast<SCEVCastExpr>(S)) {
4768  CastOp = SCast->getSCEVType();
4769  S = SCast->getOperand();
4770  }
4771 
4772  using namespace llvm::PatternMatch;
4773 
4774  auto *SU = dyn_cast<SCEVUnknown>(S);
4775  const APInt *TrueVal, *FalseVal;
4776  if (!SU ||
4777  !match(SU->getValue(), m_Select(m_Value(Condition), m_APInt(TrueVal),
4778  m_APInt(FalseVal)))) {
4779  Condition = nullptr;
4780  return;
4781  }
4782 
4783  TrueValue = *TrueVal;
4784  FalseValue = *FalseVal;
4785 
4786  // Re-apply the cast we peeled off earlier
4787  if (CastOp.hasValue())
4788  switch (*CastOp) {
4789  default:
4790  llvm_unreachable("Unknown SCEV cast type!");
4791 
4792  case scTruncate:
4793  TrueValue = TrueValue.trunc(BitWidth);
4794  FalseValue = FalseValue.trunc(BitWidth);
4795  break;
4796  case scZeroExtend:
4797  TrueValue = TrueValue.zext(BitWidth);
4798  FalseValue = FalseValue.zext(BitWidth);
4799  break;
4800  case scSignExtend:
4801  TrueValue = TrueValue.sext(BitWidth);
4802  FalseValue = FalseValue.sext(BitWidth);
4803  break;
4804  }
4805 
4806  // Re-apply the constant offset we peeled off earlier
4807  TrueValue += Offset;
4808  FalseValue += Offset;
4809  }
4810 
4811  bool isRecognized() { return Condition != nullptr; }
4812  };
4813 
4814  SelectPattern StartPattern(*this, BitWidth, Start);
4815  if (!StartPattern.isRecognized())
4816  return ConstantRange(BitWidth, /* isFullSet = */ true);
4817 
4818  SelectPattern StepPattern(*this, BitWidth, Step);
4819  if (!StepPattern.isRecognized())
4820  return ConstantRange(BitWidth, /* isFullSet = */ true);
4821 
4822  if (StartPattern.Condition != StepPattern.Condition) {
4823  // We don't handle this case today; but we could, by considering four
4824  // possibilities below instead of two. I'm not sure if there are cases where
4825  // that will help over what getRange already does, though.
4826  return ConstantRange(BitWidth, /* isFullSet = */ true);
4827  }
4828 
4829  // NB! Calling ScalarEvolution::getConstant is fine, but we should not try to
4830  // construct arbitrary general SCEV expressions here. This function is called
4831  // from deep in the call stack, and calling getSCEV (on a sext instruction,
4832  // say) can end up caching a suboptimal value.
4833 
4834  // FIXME: without the explicit `this` receiver below, MSVC errors out with
4835  // C2352 and C2512 (otherwise it isn't needed).
4836 
4837  const SCEV *TrueStart = this->getConstant(StartPattern.TrueValue);
4838  const SCEV *TrueStep = this->getConstant(StepPattern.TrueValue);
4839  const SCEV *FalseStart = this->getConstant(StartPattern.FalseValue);
4840  const SCEV *FalseStep = this->getConstant(StepPattern.FalseValue);
4841 
4842  ConstantRange TrueRange =
4843  this->getRangeForAffineAR(TrueStart, TrueStep, MaxBECount, BitWidth);
4844  ConstantRange FalseRange =
4845  this->getRangeForAffineAR(FalseStart, FalseStep, MaxBECount, BitWidth);
4846 
4847  return TrueRange.unionWith(FalseRange);
4848 }
4849 
4850 SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) {
4851  if (isa<ConstantExpr>(V)) return SCEV::FlagAnyWrap;
4852  const BinaryOperator *BinOp = cast<BinaryOperator>(V);
4853 
4854  // Return early if there are no flags to propagate to the SCEV.
4856  if (BinOp->hasNoUnsignedWrap())
4857  Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
4858  if (BinOp->hasNoSignedWrap())
4859  Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
4860  if (Flags == SCEV::FlagAnyWrap)
4861  return SCEV::FlagAnyWrap;
4862 
4863  return isSCEVExprNeverPoison(BinOp) ? Flags : SCEV::FlagAnyWrap;
4864 }
4865 
4866 bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) {
4867  // Here we check that I is in the header of the innermost loop containing I,
4868  // since we only deal with instructions in the loop header. The actual loop we
4869  // need to check later will come from an add recurrence, but getting that
4870  // requires computing the SCEV of the operands, which can be expensive. This
4871  // check we can do cheaply to rule out some cases early.
4872  Loop *InnermostContainingLoop = LI.getLoopFor(I->getParent());
4873  if (InnermostContainingLoop == nullptr ||
4874  InnermostContainingLoop->getHeader() != I->getParent())
4875  return false;
4876 
4877  // Only proceed if we can prove that I does not yield poison.
4878  if (!isKnownNotFullPoison(I)) return false;
4879 
4880  // At this point we know that if I is executed, then it does not wrap
4881  // according to at least one of NSW or NUW. If I is not executed, then we do
4882  // not know if the calculation that I represents would wrap. Multiple
4883  // instructions can map to the same SCEV. If we apply NSW or NUW from I to
4884  // the SCEV, we must guarantee no wrapping for that SCEV also when it is
4885  // derived from other instructions that map to the same SCEV. We cannot make
4886  // that guarantee for cases where I is not executed. So we need to find the
4887  // loop that I is considered in relation to and prove that I is executed for
4888  // every iteration of that loop. That implies that the value that I
4889  // calculates does not wrap anywhere in the loop, so then we can apply the
4890  // flags to the SCEV.
4891  //
4892  // We check isLoopInvariant to disambiguate in case we are adding recurrences
4893  // from different loops, so that we know which loop to prove that I is
4894  // executed in.
4895  for (unsigned OpIndex = 0; OpIndex < I->getNumOperands(); ++OpIndex) {
4896  // I could be an extractvalue from a call to an overflow intrinsic.
4897  // TODO: We can do better here in some cases.
4898  if (!isSCEVable(I->getOperand(OpIndex)->getType()))
4899  return false;
4900  const SCEV *Op = getSCEV(I->getOperand(OpIndex));
4901  if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
4902  bool AllOtherOpsLoopInvariant = true;
4903  for (unsigned OtherOpIndex = 0; OtherOpIndex < I->getNumOperands();
4904  ++OtherOpIndex) {
4905  if (OtherOpIndex != OpIndex) {
4906  const SCEV *OtherOp = getSCEV(I->getOperand(OtherOpIndex));
4907  if (!isLoopInvariant(OtherOp, AddRec->getLoop())) {
4908  AllOtherOpsLoopInvariant = false;
4909  break;
4910  }
4911  }
4912  }
4913  if (AllOtherOpsLoopInvariant &&
4914  isGuaranteedToExecuteForEveryIteration(I, AddRec->getLoop()))
4915  return true;
4916  }
4917  }
4918  return false;
4919 }
4920 
4921 bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) {
4922  // If we know that \c I can never be poison period, then that's enough.
4923  if (isSCEVExprNeverPoison(I))
4924  return true;
4925 
4926  // For an add recurrence specifically, we assume that infinite loops without
4927  // side effects are undefined behavior, and then reason as follows:
4928  //
4929  // If the add recurrence is poison in any iteration, it is poison on all
4930  // future iterations (since incrementing poison yields poison). If the result
4931  // of the add recurrence is fed into the loop latch condition and the loop
4932  // does not contain any throws or exiting blocks other than the latch, we now
4933  // have the ability to "choose" whether the backedge is taken or not (by
4934  // choosing a sufficiently evil value for the poison feeding into the branch)
4935  // for every iteration including and after the one in which \p I first became
4936  // poison. There are two possibilities (let's call the iteration in which \p
4937  // I first became poison as K):
4938  //
4939  // 1. In the set of iterations including and after K, the loop body executes
4940  // no side effects. In this case executing the backege an infinte number
4941  // of times will yield undefined behavior.
4942  //
4943  // 2. In the set of iterations including and after K, the loop body executes
4944  // at least one side effect. In this case, that specific instance of side
4945  // effect is control dependent on poison, which also yields undefined
4946  // behavior.
4947 
4948  auto *ExitingBB = L->getExitingBlock();
4949  auto *LatchBB = L->getLoopLatch();
4950  if (!ExitingBB || !LatchBB || ExitingBB != LatchBB)
4951  return false;
4952 
4955 
4956  // We start by assuming \c I, the post-inc add recurrence, is poison. Only
4957  // things that are known to be fully poison under that assumption go on the
4958  // PoisonStack.
4959  Pushed.insert(I);
4960  PoisonStack.push_back(I);
4961 
4962  bool LatchControlDependentOnPoison = false;
4963  while (!PoisonStack.empty() && !LatchControlDependentOnPoison) {
4964  const Instruction *Poison = PoisonStack.pop_back_val();
4965 
4966  for (auto *PoisonUser : Poison->users()) {
4967  if (propagatesFullPoison(cast<Instruction>(PoisonUser))) {
4968  if (Pushed.insert(cast<Instruction>(PoisonUser)).second)
4969  PoisonStack.push_back(cast<Instruction>(PoisonUser));
4970  } else if (auto *BI = dyn_cast<BranchInst>(PoisonUser)) {
4971  assert(BI->isConditional() && "Only possibility!");
4972  if (BI->getParent() == LatchBB) {
4973  LatchControlDependentOnPoison = true;
4974  break;
4975  }
4976  }
4977  }
4978  }
4979 
4980  return LatchControlDependentOnPoison && loopHasNoAbnormalExits(L);
4981 }
4982 
4983 ScalarEvolution::LoopProperties
4984 ScalarEvolution::getLoopProperties(const Loop *L) {
4985  typedef ScalarEvolution::LoopProperties LoopProperties;
4986 
4987  auto Itr = LoopPropertiesCache.find(L);
4988  if (Itr == LoopPropertiesCache.end()) {
4989  auto HasSideEffects = [](Instruction *I) {
4990  if (auto *SI = dyn_cast<StoreInst>(I))
4991  return !SI->isSimple();
4992 
4993  return I->mayHaveSideEffects();
4994  };
4995 
4996  LoopProperties LP = {/* HasNoAbnormalExits */ true,
4997  /*HasNoSideEffects*/ true};
4998 
4999  for (auto *BB : L->getBlocks())
5000  for (auto &I : *BB) {
5002  LP.HasNoAbnormalExits = false;
5003  if (HasSideEffects(&I))
5004  LP.HasNoSideEffects = false;
5005  if (!LP.HasNoAbnormalExits && !LP.HasNoSideEffects)
5006  break; // We're already as pessimistic as we can get.
5007  }
5008 
5009  auto InsertPair = LoopPropertiesCache.insert({L, LP});
5010  assert(InsertPair.second && "We just checked!");
5011  Itr = InsertPair.first;
5012  }
5013 
5014  return Itr->second;
5015 }
5016 
5017 const SCEV *ScalarEvolution::createSCEV(Value *V) {
5018  if (!isSCEVable(V->getType()))
5019  return getUnknown(V);
5020 
5021  if (Instruction *I = dyn_cast<Instruction>(V)) {
5022  // Don't attempt to analyze instructions in blocks that aren't
5023  // reachable. Such instructions don't matter, and they aren't required
5024  // to obey basic rules for definitions dominating uses which this
5025  // analysis depends on.
5026  if (!DT.isReachableFromEntry(I->getParent()))
5027  return getUnknown(V);
5028  } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
5029  return getConstant(CI);
5030  else if (isa<ConstantPointerNull>(V))
5031  return getZero(V->getType());
5032  else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
5033  return GA->isInterposable() ? getUnknown(V) : getSCEV(GA->getAliasee());
5034  else if (!isa<ConstantExpr>(V))
5035  return getUnknown(V);
5036 
5037  Operator *U = cast<Operator>(V);
5038  if (auto BO = MatchBinaryOp(U, DT)) {
5039  switch (BO->Opcode) {
5040  case Instruction::Add: {
5041  // The simple thing to do would be to just call getSCEV on both operands
5042  // and call getAddExpr with the result. However if we're looking at a
5043  // bunch of things all added together, this can be quite inefficient,
5044  // because it leads to N-1 getAddExpr calls for N ultimate operands.
5045  // Instead, gather up all the operands and make a single getAddExpr call.
5046  // LLVM IR canonical form means we need only traverse the left operands.
5048  do {
5049  if (BO->Op) {
5050  if (auto *OpSCEV = getExistingSCEV(BO->Op)) {
5051  AddOps.push_back(OpSCEV);
5052  break;
5053  }
5054 
5055  // If a NUW or NSW flag can be applied to the SCEV for this
5056  // addition, then compute the SCEV for this addition by itself
5057  // with a separate call to getAddExpr. We need to do that
5058  // instead of pushing the operands of the addition onto AddOps,
5059  // since the flags are only known to apply to this particular
5060  // addition - they may not apply to other additions that can be
5061  // formed with operands from AddOps.
5062  const SCEV *RHS = getSCEV(BO->RHS);
5063  SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op);
5064  if (Flags != SCEV::FlagAnyWrap) {
5065  const SCEV *LHS = getSCEV(BO->LHS);
5066  if (BO->Opcode == Instruction::Sub)
5067  AddOps.push_back(getMinusSCEV(LHS, RHS, Flags));
5068  else
5069  AddOps.push_back(getAddExpr(LHS, RHS, Flags));
5070  break;
5071  }
5072  }
5073 
5074  if (BO->Opcode == Instruction::Sub)
5075  AddOps.push_back(getNegativeSCEV(getSCEV(BO->RHS)));
5076  else
5077  AddOps.push_back(getSCEV(BO->RHS));
5078 
5079  auto NewBO = MatchBinaryOp(BO->LHS, DT);
5080  if (!NewBO || (NewBO->Opcode != Instruction::Add &&
5081  NewBO->Opcode != Instruction::Sub)) {
5082  AddOps.push_back(getSCEV(BO->LHS));
5083  break;
5084  }
5085  BO = NewBO;
5086  } while (true);
5087 
5088  return getAddExpr(AddOps);
5089  }
5090 
5091  case Instruction::Mul: {
5093  do {
5094  if (BO->Op) {
5095  if (auto *OpSCEV = getExistingSCEV(BO->Op)) {
5096  MulOps.push_back(OpSCEV);
5097  break;
5098  }
5099 
5100  SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op);
5101  if (Flags != SCEV::FlagAnyWrap) {
5102  MulOps.push_back(
5103  getMulExpr(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags));
5104  break;
5105  }
5106  }
5107 
5108  MulOps.push_back(getSCEV(BO->RHS));
5109  auto NewBO = MatchBinaryOp(BO->LHS, DT);
5110  if (!NewBO || NewBO->Opcode != Instruction::Mul) {
5111  MulOps.push_back(getSCEV(BO->LHS));
5112  break;
5113  }
5114  BO = NewBO;
5115  } while (true);
5116 
5117  return getMulExpr(MulOps);
5118  }
5119  case Instruction::UDiv:
5120  return getUDivExpr(getSCEV(BO->LHS), getSCEV(BO->RHS));
5121  case Instruction::Sub: {
5123  if (BO->Op)
5124  Flags = getNoWrapFlagsFromUB(BO->Op);
5125  return getMinusSCEV(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags);
5126  }
5127  case Instruction::And:
5128  // For an expression like x&255 that merely masks off the high bits,
5129  // use zext(trunc(x)) as the SCEV expression.
5130  if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
5131  if (CI->isNullValue())
5132  return getSCEV(BO->RHS);
5133  if (CI->isAllOnesValue())
5134  return getSCEV(BO->LHS);
5135  const APInt &A = CI->getValue();
5136 
5137  // Instcombine's ShrinkDemandedConstant may strip bits out of
5138  // constants, obscuring what would otherwise be a low-bits mask.
5139  // Use computeKnownBits to compute what ShrinkDemandedConstant
5140  // knew about to reconstruct a low-bits mask value.
5141  unsigned LZ = A.countLeadingZeros();
5142  unsigned TZ = A.countTrailingZeros();
5143  unsigned BitWidth = A.getBitWidth();
5144  APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
5145  computeKnownBits(BO->LHS, KnownZero, KnownOne, getDataLayout(),
5146  0, &AC, nullptr, &DT);
5147 
5148  APInt EffectiveMask =
5149  APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
5150  if ((LZ != 0 || TZ != 0) && !((~A & ~KnownZero) & EffectiveMask)) {
5151  const SCEV *MulCount = getConstant(ConstantInt::get(
5152  getContext(), APInt::getOneBitSet(BitWidth, TZ)));
5153  return getMulExpr(
5156  getUDivExactExpr(getSCEV(BO->LHS), MulCount),
5157  IntegerType::get(getContext(), BitWidth - LZ - TZ)),
5158  BO->LHS->getType()),
5159  MulCount);
5160  }
5161  }
5162  break;
5163 
5164  case Instruction::Or:
5165  // If the RHS of the Or is a constant, we may have something like:
5166  // X*4+1 which got turned into X*4|1. Handle this as an Add so loop
5167  // optimizations will transparently handle this case.
5168  //
5169  // In order for this transformation to be safe, the LHS must be of the
5170  // form X*(2^n) and the Or constant must be less than 2^n.
5171  if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
5172  const SCEV *LHS = getSCEV(BO->LHS);
5173  const APInt &CIVal = CI->getValue();
5174  if (GetMinTrailingZeros(LHS) >=
5175  (CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
5176  // Build a plain add SCEV.
5177  const SCEV *S = getAddExpr(LHS, getSCEV(CI));
5178  // If the LHS of the add was an addrec and it has no-wrap flags,
5179  // transfer the no-wrap flags, since an or won't introduce a wrap.
5180  if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) {
5181  const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS);
5182  const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags(
5183  OldAR->getNoWrapFlags());
5184  }
5185  return S;
5186  }
5187  }
5188  break;
5189 
5190  case Instruction::Xor:
5191  if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
5192  // If the RHS of xor is -1, then this is a not operation.
5193  if (CI->isAllOnesValue())
5194  return getNotSCEV(getSCEV(BO->LHS));
5195 
5196  // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask.
5197  // This is a variant of the check for xor with -1, and it handles
5198  // the case where instcombine has trimmed non-demanded bits out
5199  // of an xor with -1.
5200  if (auto *LBO = dyn_cast<BinaryOperator>(BO->LHS))
5201  if (ConstantInt *LCI = dyn_cast<ConstantInt>(LBO->getOperand(1)))
5202  if (LBO->getOpcode() == Instruction::And &&
5203  LCI->getValue() == CI->getValue())
5204  if (const SCEVZeroExtendExpr *Z =
5205  dyn_cast<SCEVZeroExtendExpr>(getSCEV(BO->LHS))) {
5206  Type *UTy = BO->LHS->getType();
5207  const SCEV *Z0 = Z->getOperand();
5208  Type *Z0Ty = Z0->getType();
5209  unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
5210 
5211  // If C is a low-bits mask, the zero extend is serving to
5212  // mask off the high bits. Complement the operand and
5213  // re-apply the zext.
5214  if (APIntOps::isMask(Z0TySize, CI->getValue()))
5215  return getZeroExtendExpr(getNotSCEV(Z0), UTy);
5216 
5217  // If C is a single bit, it may be in the sign-bit position
5218  // before the zero-extend. In this case, represent the xor
5219  // using an add, which is equivalent, and re-apply the zext.
5220  APInt Trunc = CI->getValue().trunc(Z0TySize);
5221  if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
5222  Trunc.isSignBit())
5223  return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
5224  UTy);
5225  }
5226  }
5227  break;
5228 
5229  case Instruction::Shl:
5230  // Turn shift left of a constant amount into a multiply.
5231  if (ConstantInt *SA = dyn_cast<ConstantInt>(BO->RHS)) {
5232  uint32_t BitWidth = cast<IntegerType>(SA->getType())->getBitWidth();
5233 
5234  // If the shift count is not less than the bitwidth, the result of
5235  // the shift is undefined. Don't try to analyze it, because the
5236  // resolution chosen here may differ from the resolution chosen in
5237  // other parts of the compiler.
5238  if (SA->getValue().uge(BitWidth))
5239  break;
5240 
5241  // It is currently not resolved how to interpret NSW for left
5242  // shift by BitWidth - 1, so we avoid applying flags in that
5243  // case. Remove this check (or this comment) once the situation
5244  // is resolved. See
5245  // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html
5246  // and http://reviews.llvm.org/D8890 .
5247  auto Flags = SCEV::FlagAnyWrap;
5248  if (BO->Op && SA->getValue().ult(BitWidth - 1))
5249  Flags = getNoWrapFlagsFromUB(BO->Op);
5250 
5252  APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
5253  return getMulExpr(getSCEV(BO->LHS), getSCEV(X), Flags);
5254  }
5255  break;
5256 
5257  case Instruction::AShr:
5258  // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression.
5259  if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS))
5260  if (Operator *L = dyn_cast<Operator>(BO->LHS))
5261  if (L->getOpcode() == Instruction::Shl &&
5262  L->getOperand(1) == BO->RHS) {
5263  uint64_t BitWidth = getTypeSizeInBits(BO->LHS->getType());
5264 
5265  // If the shift count is not less than the bitwidth, the result of
5266  // the shift is undefined. Don't try to analyze it, because the
5267  // resolution chosen here may differ from the resolution chosen in
5268  // other parts of the compiler.
5269  if (CI->getValue().uge(BitWidth))
5270  break;
5271 
5272  uint64_t Amt = BitWidth - CI->getZExtValue();
5273  if (Amt == BitWidth)
5274  return getSCEV(L->getOperand(0)); // shift by zero --> noop
5275  return getSignExtendExpr(
5276  getTruncateExpr(getSCEV(L->getOperand(0)),
5277  IntegerType::get(getContext(), Amt)),
5278  BO->LHS->getType());
5279  }
5280  break;
5281  }
5282  }
5283 
5284  switch (U->getOpcode()) {
5285  case Instruction::Trunc:
5286  return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType());
5287 
5288  case Instruction::ZExt:
5289  return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType());
5290 
5291  case Instruction::SExt:
5292  return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType());
5293 
5294  case Instruction::BitCast:
5295  // BitCasts are no-op casts so we just eliminate the cast.
5296  if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType()))
5297  return getSCEV(U->getOperand(0));
5298  break;
5299 
5300  // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can
5301  // lead to pointer expressions which cannot safely be expanded to GEPs,
5302  // because ScalarEvolution doesn't respect the GEP aliasing rules when
5303  // simplifying integer expressions.
5304 
5305  case Instruction::GetElementPtr:
5306  return createNodeForGEP(cast<GEPOperator>(U));
5307 
5308  case Instruction::PHI:
5309  return createNodeForPHI(cast<PHINode>(U));
5310 
5311  case Instruction::Select:
5312  // U can also be a select constant expr, which let fall through. Since
5313  // createNodeForSelect only works for a condition that is an `ICmpInst`, and
5314  // constant expressions cannot have instructions as operands, we'd have
5315  // returned getUnknown for a select constant expressions anyway.
5316  if (isa<Instruction>(U))
5317  return createNodeForSelectOrPHI(cast<Instruction>(U), U->getOperand(0),
5318  U->getOperand(1), U->getOperand(2));
5319  break;
5320 
5321  case Instruction::Call:
5322  case Instruction::Invoke:
5323  if (Value *RV = CallSite(U).getReturnedArgOperand())
5324  return getSCEV(RV);
5325  break;
5326  }
5327 
5328  return getUnknown(V);
5329 }
5330 
5331 
5332 
5333 //===----------------------------------------------------------------------===//
5334 // Iteration Count Computation Code
5335 //
5336 
5337 static unsigned getConstantTripCount(const SCEVConstant *ExitCount) {
5338  if (!ExitCount)
5339  return 0;
5340 
5341  ConstantInt *ExitConst = ExitCount->getValue();
5342 
5343  // Guard against huge trip counts.
5344  if (ExitConst->getValue().getActiveBits() > 32)
5345  return 0;
5346 
5347  // In case of integer overflow, this returns 0, which is correct.
5348  return ((unsigned)ExitConst->getZExtValue()) + 1;
5349 }
5350 
5352  if (BasicBlock *ExitingBB = L->getExitingBlock())
5353  return getSmallConstantTripCount(L, ExitingBB);
5354 
5355  // No trip count information for multiple exits.
5356  return 0;
5357 }
5358 
5360  BasicBlock *ExitingBlock) {
5361  assert(ExitingBlock && "Must pass a non-null exiting block!");
5362  assert(L->isLoopExiting(ExitingBlock) &&
5363  "Exiting block must actually branch out of the loop!");
5364  const SCEVConstant *ExitCount =
5365  dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
5366  return getConstantTripCount(ExitCount);
5367 }
5368 
5370  const auto *MaxExitCount =
5372  return getConstantTripCount(MaxExitCount);
5373 }
5374 
5376  if (BasicBlock *ExitingBB = L->getExitingBlock())
5377  return getSmallConstantTripMultiple(L, ExitingBB);
5378 
5379  // No trip multiple information for multiple exits.
5380  return 0;
5381 }
5382 
5383 /// Returns the largest constant divisor of the trip count of this loop as a
5384 /// normal unsigned value, if possible. This means that the actual trip count is
5385 /// always a multiple of the returned value (don't forget the trip count could
5386 /// very well be zero as well!).
5387 ///
5388 /// Returns 1 if the trip count is unknown or not guaranteed to be the
5389 /// multiple of a constant (which is also the case if the trip count is simply
5390 /// constant, use getSmallConstantTripCount for that case), Will also return 1
5391 /// if the trip count is very large (>= 2^32).
5392 ///
5393 /// As explained in the comments for getSmallConstantTripCount, this assumes
5394 /// that control exits the loop via ExitingBlock.
5395 unsigned
5397  BasicBlock *ExitingBlock) {
5398  assert(ExitingBlock && "Must pass a non-null exiting block!");
5399  assert(L->isLoopExiting(ExitingBlock) &&
5400  "Exiting block must actually branch out of the loop!");
5401  const SCEV *ExitCount = getExitCount(L, ExitingBlock);
5402  if (ExitCount == getCouldNotCompute())
5403  return 1;
5404 
5405  // Get the trip count from the BE count by adding 1.
5406  const SCEV *TCMul = getAddExpr(ExitCount, getOne(ExitCount->getType()));
5407  // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt
5408  // to factor simple cases.
5409  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul))
5410  TCMul = Mul->getOperand(0);
5411 
5412  const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul);
5413  if (!MulC)
5414  return 1;
5415 
5416  ConstantInt *Result = MulC->getValue();
5417 
5418  // Guard against huge trip counts (this requires checking
5419  // for zero to handle the case where the trip count == -1 and the
5420  // addition wraps).
5421  if (!Result || Result->getValue().getActiveBits() > 32 ||
5422  Result->getValue().getActiveBits() == 0)
5423  return 1;
5424 
5425  return (unsigned)Result->getZExtValue();
5426 }
5427 
5428 /// Get the expression for the number of loop iterations for which this loop is
5429 /// guaranteed not to exit via ExitingBlock. Otherwise return
5430 /// SCEVCouldNotCompute.
5432  return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
5433 }
5434 
5435 const SCEV *
5437  SCEVUnionPredicate &Preds) {
5438  return getPredicatedBackedgeTakenInfo(L).getExact(this, &Preds);
5439 }
5440 
5442  return getBackedgeTakenInfo(L).getExact(this);
5443 }
5444 
5445 /// Similar to getBackedgeTakenCount, except return the least SCEV value that is
5446 /// known never to be less than the actual backedge taken count.
5448  return getBackedgeTakenInfo(L).getMax(this);
5449 }
5450 
5452  return getBackedgeTakenInfo(L).isMaxOrZero(this);
5453 }
5454 
5455 /// Push PHI nodes in the header of the given loop onto the given Worklist.
5456 static void
5458  BasicBlock *Header = L->getHeader();
5459 
5460  // Push all Loop-header PHIs onto the Worklist stack.
5461  for (BasicBlock::iterator I = Header->begin();
5462  PHINode *PN = dyn_cast<PHINode>(I); ++I)
5463  Worklist.push_back(PN);
5464 }
5465 
5466 const ScalarEvolution::BackedgeTakenInfo &
5467 ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) {
5468  auto &BTI = getBackedgeTakenInfo(L);
5469  if (BTI.hasFullInfo())
5470  return BTI;
5471 
5472  auto Pair = PredicatedBackedgeTakenCounts.insert({L, BackedgeTakenInfo()});
5473 
5474  if (!Pair.second)
5475  return Pair.first->second;
5476 
5477  BackedgeTakenInfo Result =
5478  computeBackedgeTakenCount(L, /*AllowPredicates=*/true);
5479 
5480  return PredicatedBackedgeTakenCounts.find(L)->second = std::move(Result);
5481 }
5482 
5483 const ScalarEvolution::BackedgeTakenInfo &
5484 ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
5485  // Initially insert an invalid entry for this loop. If the insertion
5486  // succeeds, proceed to actually compute a backedge-taken count and
5487  // update the value. The temporary CouldNotCompute value tells SCEV
5488  // code elsewhere that it shouldn't attempt to request a new
5489  // backedge-taken count, which could result in infinite recursion.
5490  std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
5491  BackedgeTakenCounts.insert({L, BackedgeTakenInfo()});
5492  if (!Pair.second)
5493  return Pair.first->second;
5494 
5495  // computeBackedgeTakenCount may allocate memory for its result. Inserting it
5496  // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result
5497  // must be cleared in this scope.
5498  BackedgeTakenInfo Result = computeBackedgeTakenCount(L);
5499 
5500  if (Result.getExact(this) != getCouldNotCompute()) {
5501  assert(isLoopInvariant(Result.getExact(this), L) &&
5502  isLoopInvariant(Result.getMax(this), L) &&
5503  "Computed backedge-taken count isn't loop invariant for loop!");
5504  ++NumTripCountsComputed;
5505  }
5506  else if (Result.getMax(this) == getCouldNotCompute() &&
5507  isa<PHINode>(L->getHeader()->begin())) {
5508  // Only count loops that have phi nodes as not being computable.
5509  ++NumTripCountsNotComputed;
5510  }
5511 
5512  // Now that we know more about the trip count for this loop, forget any
5513  // existing SCEV values for PHI nodes in this loop since they are only
5514  // conservative estimates made without the benefit of trip count
5515  // information. This is similar to the code in forgetLoop, except that
5516  // it handles SCEVUnknown PHI nodes specially.
5517  if (Result.hasAnyInfo()) {
5519  PushLoopPHIs(L, Worklist);
5520 
5522  while (!Worklist.empty()) {
5523  Instruction *I = Worklist.pop_back_val();
5524  if (!Visited.insert(I).second)
5525  continue;
5526 
5528  ValueExprMap.find_as(static_cast<Value *>(I));
5529  if (It != ValueExprMap.end()) {
5530  const SCEV *Old = It->second;
5531 
5532  // SCEVUnknown for a PHI either means that it has an unrecognized
5533  // structure, or it's a PHI that's in the progress of being computed
5534  // by createNodeForPHI. In the former case, additional loop trip
5535  // count information isn't going to change anything. In the later
5536  // case, createNodeForPHI will perform the necessary updates on its
5537  // own when it gets to that point.
5538  if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
5539  eraseValueFromMap(It->first);
5540  forgetMemoizedResults(Old);
5541  }
5542  if (PHINode *PN = dyn_cast<PHINode>(I))
5543  ConstantEvolutionLoopExitValue.erase(PN);
5544  }
5545 
5546  PushDefUseChildren(I, Worklist);
5547  }
5548  }
5549 
5550  // Re-lookup the insert position, since the call to
5551  // computeBackedgeTakenCount above could result in a
5552  // recusive call to getBackedgeTakenInfo (on a different
5553  // loop), which would invalidate the iterator computed
5554  // earlier.
5555  return BackedgeTakenCounts.find(L)->second = std::move(Result);
5556 }
5557 
5559  // Drop any stored trip count value.
5560  auto RemoveLoopFromBackedgeMap =
5562  auto BTCPos = Map.find(L);
5563  if (BTCPos != Map.end()) {
5564  BTCPos->second.clear();
5565  Map.erase(BTCPos);
5566  }
5567  };
5568 
5569  RemoveLoopFromBackedgeMap(BackedgeTakenCounts);
5570  RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts);
5571 
5572  // Drop information about expressions based on loop-header PHIs.
5574  PushLoopPHIs(L, Worklist);
5575 
5577  while (!Worklist.empty()) {
5578  Instruction *I = Worklist.pop_back_val();
5579  if (!Visited.insert(I).second)
5580  continue;
5581 
5583  ValueExprMap.find_as(static_cast<Value *>(I));
5584  if (It != ValueExprMap.end()) {
5585  eraseValueFromMap(It->first);
5586  forgetMemoizedResults(It->second);
5587  if (PHINode *PN = dyn_cast<PHINode>(I))
5588  ConstantEvolutionLoopExitValue.erase(PN);
5589  }
5590 
5591  PushDefUseChildren(I, Worklist);
5592  }
5593 
5594  // Forget all contained loops too, to avoid dangling entries in the
5595  // ValuesAtScopes map.
5596  for (Loop *I : *L)
5597  forgetLoop(I);
5598 
5599  LoopPropertiesCache.erase(L);
5600 }
5601 
5603  Instruction *I = dyn_cast<Instruction>(V);
5604  if (!I) return;
5605 
5606  // Drop information about expressions based on loop-header PHIs.
5608  Worklist.push_back(I);
5609 
5611  while (!Worklist.empty()) {
5612  I = Worklist.pop_back_val();
5613  if (!Visited.insert(I).second)
5614  continue;
5615 
5617  ValueExprMap.find_as(static_cast<Value *>(I));
5618  if (It != ValueExprMap.end()) {
5619  eraseValueFromMap(It->first);
5620  forgetMemoizedResults(It->second);
5621  if (PHINode *PN = dyn_cast<PHINode>(I))
5622  ConstantEvolutionLoopExitValue.erase(PN);
5623  }
5624 
5625  PushDefUseChildren(I, Worklist);
5626  }
5627 }
5628 
5629 /// Get the exact loop backedge taken count considering all loop exits. A
5630 /// computable result can only be returned for loops with a single exit.
5631 /// Returning the minimum taken count among all exits is incorrect because one
5632 /// of the loop's exit limit's may have been skipped. howFarToZero assumes that
5633 /// the limit of each loop test is never skipped. This is a valid assumption as
5634 /// long as the loop exits via that test. For precise results, it is the
5635 /// caller's responsibility to specify the relevant loop exit using
5636 /// getExact(ExitingBlock, SE).
5637 const SCEV *
5638 ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE,
5639  SCEVUnionPredicate *Preds) const {
5640  // If any exits were not computable, the loop is not computable.
5641  if (!isComplete() || ExitNotTaken.empty())
5642  return SE->getCouldNotCompute();
5643 
5644  const SCEV *BECount = nullptr;
5645  for (auto &ENT : ExitNotTaken) {
5646  assert(ENT.ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
5647 
5648  if (!BECount)
5649  BECount = ENT.ExactNotTaken;
5650  else if (BECount != ENT.ExactNotTaken)
5651  return SE->getCouldNotCompute();
5652  if (Preds && !ENT.hasAlwaysTruePredicate())
5653  Preds->add(ENT.Predicate.get());
5654 
5655  assert((Preds || ENT.hasAlwaysTruePredicate()) &&
5656  "Predicate should be always true!");
5657  }
5658 
5659  assert(BECount && "Invalid not taken count for loop exit");
5660  return BECount;
5661 }
5662 
5663 /// Get the exact not taken count for this loop exit.
5664 const SCEV *
5665 ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
5666  ScalarEvolution *SE) const {
5667  for (auto &ENT : ExitNotTaken)
5668  if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate())
5669  return ENT.ExactNotTaken;
5670 
5671  return SE->getCouldNotCompute();
5672 }
5673 
5674 /// getMax - Get the max backedge taken count for the loop.
5675 const SCEV *
5676 ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
5677  auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
5678  return !ENT.hasAlwaysTruePredicate();
5679  };
5680 
5681  if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) || !getMax())
5682  return SE->getCouldNotCompute();
5683 
5684  return getMax();
5685 }
5686 
5687 bool ScalarEvolution::BackedgeTakenInfo::isMaxOrZero(ScalarEvolution *SE) const {
5688  auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
5689  return !ENT.hasAlwaysTruePredicate();
5690  };
5691  return MaxOrZero && !any_of(ExitNotTaken, PredicateNotAlwaysTrue);
5692 }
5693 
5694 bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
5695  ScalarEvolution *SE) const {
5696  if (getMax() && getMax() != SE->getCouldNotCompute() &&
5697  SE->hasOperand(getMax(), S))
5698  return true;
5699 
5700  for (auto &ENT : ExitNotTaken)
5701  if (ENT.ExactNotTaken != SE->getCouldNotCompute() &&
5702  SE->hasOperand(ENT.ExactNotTaken, S))
5703  return true;
5704 
5705  return false;
5706 }
5707 
5708 /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
5709 /// computable exit into a persistent ExitNotTakenInfo array.
5710 ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
5712  &&ExitCounts,
5713  bool Complete, const SCEV *MaxCount, bool MaxOrZero)
5714  : MaxAndComplete(MaxCount, Complete), MaxOrZero(MaxOrZero) {
5715  typedef ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo EdgeExitInfo;
5716  ExitNotTaken.reserve(ExitCounts.size());
5718  ExitCounts.begin(), ExitCounts.end(), std::back_inserter(ExitNotTaken),
5719  [&](const EdgeExitInfo &EEI) {
5720  BasicBlock *ExitBB = EEI.first;
5721  const ExitLimit &EL = EEI.second;
5722  if (EL.Predicates.empty())
5723  return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, nullptr);
5724 
5725  std::unique_ptr<SCEVUnionPredicate> Predicate(new SCEVUnionPredicate);
5726  for (auto *Pred : EL.Predicates)
5727  Predicate->add(Pred);
5728 
5729  return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, std::move(Predicate));
5730  });
5731 }
5732 
5733 /// Invalidate this result and free the ExitNotTakenInfo array.
5735  ExitNotTaken.clear();
5736 }
5737 
5738 /// Compute the number of times the backedge of the specified loop will execute.
5739 ScalarEvolution::BackedgeTakenInfo
5740 ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
5741  bool AllowPredicates) {
5742  SmallVector<BasicBlock *, 8> ExitingBlocks;
5743  L->getExitingBlocks(ExitingBlocks);
5744 
5745  typedef ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo EdgeExitInfo;
5746 
5747  SmallVector<EdgeExitInfo, 4> ExitCounts;
5748  bool CouldComputeBECount = true;
5749  BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
5750  const SCEV *MustExitMaxBECount = nullptr;
5751  const SCEV *MayExitMaxBECount = nullptr;
5752  bool MustExitMaxOrZero = false;
5753 
5754  // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts
5755  // and compute maxBECount.
5756  // Do a union of all the predicates here.
5757  for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
5758  BasicBlock *ExitBB = ExitingBlocks[i];
5759  ExitLimit EL = computeExitLimit(L, ExitBB, AllowPredicates);
5760 
5761  assert((AllowPredicates || EL.Predicates.empty()) &&
5762  "Predicated exit limit when predicates are not allowed!");
5763 
5764  // 1. For each exit that can be computed, add an entry to ExitCounts.
5765  // CouldComputeBECount is true only if all exits can be computed.
5766  if (EL.ExactNotTaken == getCouldNotCompute())
5767  // We couldn't compute an exact value for this exit, so
5768  // we won't be able to compute an exact value for the loop.
5769  CouldComputeBECount = false;
5770  else
5771  ExitCounts.emplace_back(ExitBB, EL);
5772 
5773  // 2. Derive the loop's MaxBECount from each exit's max number of
5774  // non-exiting iterations. Partition the loop exits into two kinds:
5775  // LoopMustExits and LoopMayExits.
5776  //
5777  // If the exit dominates the loop latch, it is a LoopMustExit otherwise it
5778  // is a LoopMayExit. If any computable LoopMustExit is found, then
5779  // MaxBECount is the minimum EL.MaxNotTaken of computable
5780  // LoopMustExits. Otherwise, MaxBECount is conservatively the maximum
5781  // EL.MaxNotTaken, where CouldNotCompute is considered greater than any
5782  // computable EL.MaxNotTaken.
5783  if (EL.MaxNotTaken != getCouldNotCompute() && Latch &&
5784  DT.dominates(ExitBB, Latch)) {
5785  if (!MustExitMaxBECount) {
5786  MustExitMaxBECount = EL.MaxNotTaken;
5787  MustExitMaxOrZero = EL.MaxOrZero;
5788  } else {
5789  MustExitMaxBECount =
5790  getUMinFromMismatchedTypes(MustExitMaxBECount, EL.MaxNotTaken);
5791  }
5792  } else if (MayExitMaxBECount != getCouldNotCompute()) {
5793  if (!MayExitMaxBECount || EL.MaxNotTaken == getCouldNotCompute())
5794  MayExitMaxBECount = EL.MaxNotTaken;
5795  else {
5796  MayExitMaxBECount =
5797  getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.MaxNotTaken);
5798  }
5799  }
5800  }
5801  const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount :
5802  (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute());
5803  // The loop backedge will be taken the maximum or zero times if there's
5804  // a single exit that must be taken the maximum or zero times.
5805  bool MaxOrZero = (MustExitMaxOrZero && ExitingBlocks.size() == 1);
5806  return BackedgeTakenInfo(std::move(ExitCounts), CouldComputeBECount,
5807  MaxBECount, MaxOrZero);
5808 }
5809 
5810 ScalarEvolution::ExitLimit
5811 ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
5812  bool AllowPredicates) {
5813 
5814  // Okay, we've chosen an exiting block. See what condition causes us to exit
5815  // at this block and remember the exit block and whether all other targets
5816  // lead to the loop header.
5817  bool MustExecuteLoopHeader = true;
5818  BasicBlock *Exit = nullptr;
5819  for (auto *SBB : successors(ExitingBlock))
5820  if (!L->contains(SBB)) {
5821  if (Exit) // Multiple exit successors.
5822  return getCouldNotCompute();
5823  Exit = SBB;
5824  } else if (SBB != L->getHeader()) {
5825  MustExecuteLoopHeader = false;
5826  }
5827 
5828  // At this point, we know we have a conditional branch that determines whether
5829  // the loop is exited. However, we don't know if the branch is executed each
5830  // time through the loop. If not, then the execution count of the branch will
5831  // not be equal to the trip count of the loop.
5832  //
5833  // Currently we check for this by checking to see if the Exit branch goes to
5834  // the loop header. If so, we know it will always execute the same number of
5835  // times as the loop. We also handle the case where the exit block *is* the
5836  // loop header. This is common for un-rotated loops.
5837  //
5838  // If both of those tests fail, walk up the unique predecessor chain to the
5839  // header, stopping if there is an edge that doesn't exit the loop. If the
5840  // header is reached, the execution count of the branch will be equal to the
5841  // trip count of the loop.
5842  //
5843  // More extensive analysis could be done to handle more cases here.
5844  //
5845  if (!MustExecuteLoopHeader && ExitingBlock != L->getHeader()) {
5846  // The simple checks failed, try climbing the unique predecessor chain
5847  // up to the header.
5848  bool Ok = false;
5849  for (BasicBlock *BB = ExitingBlock; BB; ) {
5850  BasicBlock *Pred = BB->getUniquePredecessor();
5851  if (!Pred)
5852  return getCouldNotCompute();
5853  TerminatorInst *PredTerm = Pred->getTerminator();
5854  for (const BasicBlock *PredSucc : PredTerm->successors()) {
5855  if (PredSucc == BB)
5856  continue;
5857  // If the predecessor has a successor that isn't BB and isn't
5858  // outside the loop, assume the worst.
5859  if (L->contains(PredSucc))
5860  return getCouldNotCompute();
5861  }
5862  if (Pred == L->getHeader()) {
5863  Ok = true;
5864  break;
5865  }
5866  BB = Pred;
5867  }
5868  if (!Ok)
5869  return getCouldNotCompute();
5870  }
5871 
5872  bool IsOnlyExit = (L->getExitingBlock() != nullptr);
5873  TerminatorInst *Term = ExitingBlock->getTerminator();
5874  if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
5875  assert(BI->isConditional() && "If unconditional, it can't be in loop!");
5876  // Proceed to the next level to examine the exit condition expression.
5877  return computeExitLimitFromCond(
5878  L, BI->getCondition(), BI->getSuccessor(0), BI->getSuccessor(1),
5879  /*ControlsExit=*/IsOnlyExit, AllowPredicates);
5880  }
5881 
5882  if (SwitchInst *SI = dyn_cast<SwitchInst>(Term))
5883  return computeExitLimitFromSingleExitSwitch(L, SI, Exit,
5884  /*ControlsExit=*/IsOnlyExit);
5885 
5886  return getCouldNotCompute();
5887 }
5888 
5889 ScalarEvolution::ExitLimit
5890 ScalarEvolution::computeExitLimitFromCond(const Loop *L,
5891  Value *ExitCond,
5892  BasicBlock *TBB,
5893  BasicBlock *FBB,
5894  bool ControlsExit,
5895  bool AllowPredicates) {
5896  // Check if the controlling expression for this loop is an And or Or.
5897  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
5898  if (BO->getOpcode() == Instruction::And) {
5899  // Recurse on the operands of the and.
5900  bool EitherMayExit = L->contains(TBB);
5901  ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
5902  ControlsExit && !EitherMayExit,
5903  AllowPredicates);
5904  ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
5905  ControlsExit && !EitherMayExit,
5906  AllowPredicates);
5907  const SCEV *BECount = getCouldNotCompute();
5908  const SCEV *MaxBECount = getCouldNotCompute();
5909  if (EitherMayExit) {
5910  // Both conditions must be true for the loop to continue executing.
5911  // Choose the less conservative count.
5912  if (EL0.ExactNotTaken == getCouldNotCompute() ||
5913  EL1.ExactNotTaken == getCouldNotCompute())
5914  BECount = getCouldNotCompute();
5915  else
5916  BECount =
5917  getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken);
5918  if (EL0.MaxNotTaken == getCouldNotCompute())
5919  MaxBECount = EL1.MaxNotTaken;
5920  else if (EL1.MaxNotTaken == getCouldNotCompute())
5921  MaxBECount = EL0.MaxNotTaken;
5922  else
5923  MaxBECount =
5924  getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken);
5925  } else {
5926  // Both conditions must be true at the same time for the loop to exit.
5927  // For now, be conservative.
5928  assert(L->contains(FBB) && "Loop block has no successor in loop!");
5929  if (EL0.MaxNotTaken == EL1.MaxNotTaken)
5930  MaxBECount = EL0.MaxNotTaken;
5931  if (EL0.ExactNotTaken == EL1.ExactNotTaken)
5932  BECount = EL0.ExactNotTaken;
5933  }
5934 
5935  // There are cases (e.g. PR26207) where computeExitLimitFromCond is able
5936  // to be more aggressive when computing BECount than when computing
5937  // MaxBECount. In these cases it is possible for EL0.ExactNotTaken and
5938  // EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken
5939  // to not.
5940  if (isa<SCEVCouldNotCompute>(MaxBECount) &&
5941  !isa<SCEVCouldNotCompute>(BECount))
5942  MaxBECount = BECount;
5943 
5944  return ExitLimit(BECount, MaxBECount, false,
5945  {&EL0.Predicates, &EL1.Predicates});
5946  }
5947  if (BO->getOpcode() == Instruction::Or) {
5948  // Recurse on the operands of the or.
5949  bool EitherMayExit = L->contains(FBB);
5950  ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
5951  ControlsExit && !EitherMayExit,
5952  AllowPredicates);
5953  ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
5954  ControlsExit && !EitherMayExit,
5955  AllowPredicates);
5956  const SCEV *BECount = getCouldNotCompute();
5957  const SCEV *MaxBECount = getCouldNotCompute();
5958  if (EitherMayExit) {
5959  // Both conditions must be false for the loop to continue executing.
5960  // Choose the less conservative count.
5961  if (EL0.ExactNotTaken == getCouldNotCompute() ||
5962  EL1.ExactNotTaken == getCouldNotCompute())
5963  BECount = getCouldNotCompute();
5964  else
5965  BECount =
5966  getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken);
5967  if (EL0.MaxNotTaken == getCouldNotCompute())
5968  MaxBECount = EL1.MaxNotTaken;
5969  else if (EL1.MaxNotTaken == getCouldNotCompute())
5970  MaxBECount = EL0.MaxNotTaken;
5971  else
5972  MaxBECount =
5973  getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken);
5974  } else {
5975  // Both conditions must be false at the same time for the loop to exit.
5976  // For now, be conservative.
5977  assert(L->contains(TBB) && "Loop block has no successor in loop!");
5978  if (EL0.MaxNotTaken == EL1.MaxNotTaken)
5979  MaxBECount = EL0.MaxNotTaken;
5980  if (EL0.ExactNotTaken == EL1.ExactNotTaken)
5981  BECount = EL0.ExactNotTaken;
5982  }
5983 
5984  return ExitLimit(BECount, MaxBECount, false,
5985  {&EL0.Predicates, &EL1.Predicates});
5986  }
5987  }
5988 
5989  // With an icmp, it may be feasible to compute an exact backedge-taken count.
5990  // Proceed to the next level to examine the icmp.
5991  if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) {
5992  ExitLimit EL =
5993  computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);
5994  if (EL.hasFullInfo() || !AllowPredicates)
5995  return EL;
5996 
5997  // Try again, but use SCEV predicates this time.
5998  return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit,
5999  /*AllowPredicates=*/true);
6000  }
6001 
6002  // Check for a constant condition. These are normally stripped out by
6003  // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
6004  // preserve the CFG and is temporarily leaving constant conditions
6005  // in place.
6006  if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
6007  if (L->contains(FBB) == !CI->getZExtValue())
6008  // The backedge is always taken.
6009  return getCouldNotCompute();
6010  else
6011  // The backedge is never taken.
6012  return getZero(CI->getType());
6013  }
6014 
6015  // If it's not an integer or pointer comparison then compute it the hard way.
6016  return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
6017 }
6018 
6019 ScalarEvolution::ExitLimit
6020 ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
6021  ICmpInst *ExitCond,
6022  BasicBlock *TBB,
6023  BasicBlock *FBB,
6024  bool ControlsExit,
6025  bool AllowPredicates) {
6026 
6027  // If the condition was exit on true, convert the condition to exit on false
6028  ICmpInst::Predicate Cond;
6029  if (!L->contains(FBB))
6030  Cond = ExitCond->getPredicate();
6031  else
6032  Cond = ExitCond->getInversePredicate();
6033 
6034  // Handle common loops like: for (X = "string"; *X; ++X)
6035  if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
6036  if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
6037  ExitLimit ItCnt =
6038  computeLoadConstantCompareExitLimit(LI, RHS, L, Cond);
6039  if (ItCnt.hasAnyInfo())
6040  return ItCnt;
6041  }
6042 
6043  const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
6044  const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
6045 
6046  // Try to evaluate any dependencies out of the loop.
6047  LHS = getSCEVAtScope(LHS, L);
6048  RHS = getSCEVAtScope(RHS, L);
6049 
6050  // At this point, we would like to compute how many iterations of the
6051  // loop the predicate will return true for these inputs.
6052  if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) {
6053  // If there is a loop-invariant, force it into the RHS.
6054  std::swap(LHS, RHS);
6055  Cond = ICmpInst::getSwappedPredicate(Cond);
6056  }
6057 
6058  // Simplify the operands before analyzing them.
6059  (void)SimplifyICmpOperands(Cond, LHS, RHS);
6060 
6061  // If we have a comparison of a chrec against a constant, try to use value
6062  // ranges to answer this query.
6063  if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS))
6064  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS))
6065  if (AddRec->getLoop() == L) {
6066  // Form the constant range.
6067  ConstantRange CompRange =
6068  ConstantRange::makeExactICmpRegion(Cond, RHSC->getAPInt());
6069 
6070  const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
6071  if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
6072  }
6073 
6074  switch (Cond) {
6075  case ICmpInst::ICMP_NE: { // while (X != Y)
6076  // Convert to: while (X-Y != 0)
6077  ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit,
6078  AllowPredicates);
6079  if (EL.hasAnyInfo()) return EL;
6080  break;
6081  }
6082  case ICmpInst::ICMP_EQ: { // while (X == Y)
6083  // Convert to: while (X-Y == 0)
6084  ExitLimit EL = howFarToNonZero(getMinusSCEV(LHS, RHS), L);
6085  if (EL.hasAnyInfo()) return EL;
6086  break;
6087  }
6088  case ICmpInst::ICMP_SLT:
6089  case ICmpInst::ICMP_ULT: { // while (X < Y)
6090  bool IsSigned = Cond == ICmpInst::ICMP_SLT;
6091  ExitLimit EL = howManyLessThans(LHS, RHS, L, IsSigned, ControlsExit,
6092  AllowPredicates);
6093  if (EL.hasAnyInfo()) return EL;
6094  break;
6095  }
6096  case ICmpInst::ICMP_SGT:
6097  case ICmpInst::ICMP_UGT: { // while (X > Y)
6098  bool IsSigned = Cond == ICmpInst::ICMP_SGT;
6099  ExitLimit EL =
6100  howManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit,
6101  AllowPredicates);
6102  if (EL.hasAnyInfo()) return EL;
6103  break;
6104  }
6105  default:
6106  break;
6107  }
6108 
6109  auto *ExhaustiveCount =
6110  computeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
6111 
6112  if (!isa<SCEVCouldNotCompute>(ExhaustiveCount))
6113  return ExhaustiveCount;
6114 
6115  return computeShiftCompareExitLimit(ExitCond->getOperand(0),
6116  ExitCond->getOperand(1), L, Cond);
6117 }
6118 
6119 ScalarEvolution::ExitLimit
6120 ScalarEvolution::computeExitLimitFromSingleExitSwitch(const Loop *L,
6121  SwitchInst *Switch,
6122  BasicBlock *ExitingBlock,
6123  bool ControlsExit) {
6124  assert(!L->contains(ExitingBlock) && "Not an exiting block!");
6125 
6126  // Give up if the exit is the default dest of a switch.
6127  if (Switch->getDefaultDest() == ExitingBlock)
6128  return getCouldNotCompute();
6129 
6130  assert(L->contains(Switch->getDefaultDest()) &&
6131  "Default case must not exit the loop!");
6132  const SCEV *LHS = getSCEVAtScope(Switch->getCondition(), L);
6133  const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock));
6134 
6135  // while (X != Y) --> while (X-Y != 0)
6136  ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
6137  if (EL.hasAnyInfo())
6138  return EL;
6139 
6140  return getCouldNotCompute();
6141 }
6142 
6143 static ConstantInt *
6145  ScalarEvolution &SE) {
6146  const SCEV *InVal = SE.getConstant(C);
6147  const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE);
6148  assert(isa<SCEVConstant>(Val) &&
6149  "Evaluation of SCEV at constant didn't fold correctly?");
6150  return cast<SCEVConstant>(Val)->getValue();
6151 }
6152 
6153 /// Given an exit condition of 'icmp op load X, cst', try to see if we can
6154 /// compute the backedge execution count.
6155 ScalarEvolution::ExitLimit
6156 ScalarEvolution::computeLoadConstantCompareExitLimit(
6157  LoadInst *LI,
6158  Constant *RHS,
6159  const Loop *L,
6160  ICmpInst::Predicate predicate) {
6161 
6162  if (LI->isVolatile()) return getCouldNotCompute();
6163 
6164  // Check to see if the loaded pointer is a getelementptr of a global.
6165  // TODO: Use SCEV instead of manually grubbing with GEPs.
6167  if (!GEP) return getCouldNotCompute();
6168 
6169  // Make sure that it is really a constant global we are gepping, with an
6170  // initializer, and make sure the first IDX is really 0.
6172  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
6173  GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) ||
6174  !cast<Constant>(GEP->getOperand(1))->isNullValue())
6175  return getCouldNotCompute();
6176 
6177  // Okay, we allow one non-constant index into the GEP instruction.
6178  Value *VarIdx = nullptr;
6179  std::vector<Constant*> Indexes;
6180  unsigned VarIdxNum = 0;
6181  for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
6182  if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
6183  Indexes.push_back(CI);
6184  } else if (!isa<ConstantInt>(GEP->getOperand(i))) {
6185  if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's.
6186  VarIdx = GEP->getOperand(i);
6187  VarIdxNum = i-2;
6188  Indexes.push_back(nullptr);
6189  }
6190 
6191  // Loop-invariant loads may be a byproduct of loop optimization. Skip them.
6192  if (!VarIdx)
6193  return getCouldNotCompute();
6194 
6195  // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
6196  // Check to see if X is a loop variant variable value now.
6197  const SCEV *Idx = getSCEV(VarIdx);
6198  Idx = getSCEVAtScope(Idx, L);
6199 
6200  // We can only recognize very limited forms of loop index expressions, in
6201  // particular, only affine AddRec's like {C1,+,C2}.
6202  const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx);
6203  if (!IdxExpr || !IdxExpr->isAffine() || isLoopInvariant(IdxExpr, L) ||
6204  !isa<SCEVConstant>(IdxExpr->getOperand(0)) ||
6205  !isa<SCEVConstant>(IdxExpr->getOperand(1)))
6206  return getCouldNotCompute();
6207 
6208  unsigned MaxSteps = MaxBruteForceIterations;
6209  for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
6210  ConstantInt *ItCst = ConstantInt::get(
6211  cast<IntegerType>(IdxExpr->getType()), IterationNum);
6212  ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
6213 
6214  // Form the GEP offset.
6215  Indexes[VarIdxNum] = Val;
6216 
6218  Indexes);
6219  if (!Result) break; // Cannot compute!
6220 
6221  // Evaluate the condition for this iteration.
6222  Result = ConstantExpr::getICmp(predicate, Result, RHS);
6223  if (!isa<ConstantInt>(Result)) break; // Couldn't decide for sure
6224  if (cast<ConstantInt>(Result)->getValue().isMinValue()) {
6225  ++NumArrayLenItCounts;
6226  return getConstant(ItCst); // Found terminating iteration!
6227  }
6228  }
6229  return getCouldNotCompute();
6230 }
6231 
6232 ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
6233  Value *LHS, Value *RHSV, const Loop *L, ICmpInst::Predicate Pred) {
6234  ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV);
6235  if (!RHS)
6236  return getCouldNotCompute();
6237 
6238  const BasicBlock *Latch = L->getLoopLatch();
6239  if (!Latch)
6240  return getCouldNotCompute();
6241 
6242  const BasicBlock *Predecessor = L->getLoopPredecessor();
6243  if (!Predecessor)
6244  return getCouldNotCompute();
6245 
6246  // Return true if V is of the form "LHS `shift_op` <positive constant>".
6247  // Return LHS in OutLHS and shift_opt in OutOpCode.
6248  auto MatchPositiveShift =
6249  [](Value *V, Value *&OutLHS, Instruction::BinaryOps &OutOpCode) {
6250 
6251  using namespace PatternMatch;
6252 
6253  ConstantInt *ShiftAmt;
6254  if (match(V, m_LShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
6255  OutOpCode = Instruction::LShr;
6256  else if (match(V, m_AShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
6257  OutOpCode = Instruction::AShr;
6258  else if (match(V, m_Shl(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
6259  OutOpCode = Instruction::Shl;
6260  else
6261  return false;
6262 
6263  return ShiftAmt->getValue().isStrictlyPositive();
6264  };
6265 
6266  // Recognize a "shift recurrence" either of the form %iv or of %iv.shifted in
6267  //
6268  // loop:
6269  // %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ]
6270  // %iv.shifted = lshr i32 %iv, <positive constant>
6271  //
6272  // Return true on a successful match. Return the corresponding PHI node (%iv
6273  // above) in PNOut and the opcode of the shift operation in OpCodeOut.
6274  auto MatchShiftRecurrence =
6275  [&](Value *V, PHINode *&PNOut, Instruction::BinaryOps &OpCodeOut) {
6276  Optional<Instruction::BinaryOps> PostShiftOpCode;
6277 
6278  {
6280  Value *V;
6281 
6282  // If we encounter a shift instruction, "peel off" the shift operation,
6283  // and remember that we did so. Later when we inspect %iv's backedge
6284  // value, we will make sure that the backedge value uses the same
6285  // operation.
6286  //
6287  // Note: the peeled shift operation does not have to be the same
6288  // instruction as the one feeding into the PHI's backedge value. We only
6289  // really care about it being the same *kind* of shift instruction --
6290  // that's all that is required for our later inferences to hold.
6291  if (MatchPositiveShift(LHS, V, OpC)) {
6292  PostShiftOpCode = OpC;
6293  LHS = V;
6294  }
6295  }
6296 
6297  PNOut = dyn_cast<PHINode>(LHS);
6298  if (!PNOut || PNOut->getParent() != L->getHeader())
6299  return false;
6300 
6301  Value *BEValue = PNOut->getIncomingValueForBlock(Latch);
6302  Value *OpLHS;
6303 
6304  return
6305  // The backedge value for the PHI node must be a shift by a positive
6306  // amount
6307  MatchPositiveShift(BEValue, OpLHS, OpCodeOut) &&
6308 
6309  // of the PHI node itself
6310  OpLHS == PNOut &&
6311 
6312  // and the kind of shift should be match the kind of shift we peeled
6313  // off, if any.
6314  (!PostShiftOpCode.hasValue() || *PostShiftOpCode == OpCodeOut);
6315  };
6316 
6317  PHINode *PN;
6318  Instruction::BinaryOps OpCode;
6319  if (!MatchShiftRecurrence(LHS, PN, OpCode))
6320  return getCouldNotCompute();
6321 
6322  const DataLayout &DL = getDataLayout();
6323 
6324  // The key rationale for this optimization is that for some kinds of shift
6325  // recurrences, the value of the recurrence "stabilizes" to either 0 or -1
6326  // within a finite number of iterations. If the condition guarding the
6327  // backedge (in the sense that the backedge is taken if the condition is true)
6328  // is false for the value the shift recurrence stabilizes to, then we know
6329  // that the backedge is taken only a finite number of times.
6330 
6331  ConstantInt *StableValue = nullptr;
6332  switch (OpCode) {
6333  default:
6334  llvm_unreachable("Impossible case!");
6335 
6336  case Instruction::AShr: {
6337  // {K,ashr,<positive-constant>} stabilizes to signum(K) in at most
6338  // bitwidth(K) iterations.
6339  Value *FirstValue = PN->getIncomingValueForBlock(Predecessor);
6340  bool KnownZero, KnownOne;
6341  ComputeSignBit(FirstValue, KnownZero, KnownOne, DL, 0, nullptr,
6342  Predecessor->getTerminator(), &DT);
6343  auto *Ty = cast<IntegerType>(RHS->getType());
6344  if (KnownZero)
6345  StableValue = ConstantInt::get(Ty, 0);
6346  else if (KnownOne)
6347  StableValue = ConstantInt::get(Ty, -1, true);
6348  else
6349  return getCouldNotCompute();
6350 
6351  break;
6352  }
6353  case Instruction::LShr:
6354  case Instruction::Shl:
6355  // Both {K,lshr,<positive-constant>} and {K,shl,<positive-constant>}
6356  // stabilize to 0 in at most bitwidth(K) iterations.
6357  StableValue = ConstantInt::get(cast<IntegerType>(RHS->getType()), 0);
6358  break;
6359  }
6360 
6361  auto *Result =
6362  ConstantFoldCompareInstOperands(Pred, StableValue, RHS, DL, &TLI);
6363  assert(Result->getType()->isIntegerTy(1) &&
6364  "Otherwise cannot be an operand to a branch instruction");
6365 
6366  if (Result->isZeroValue()) {
6367  unsigned BitWidth = getTypeSizeInBits(RHS->getType());
6368  const SCEV *UpperBound =
6369  getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth);
6370  return ExitLimit(getCouldNotCompute(), UpperBound, false);
6371  }
6372 
6373  return getCouldNotCompute();
6374 }
6375 
6376 /// Return true if we can constant fold an instruction of the specified type,
6377 /// assuming that all operands were constants.
6378 static bool CanConstantFold(const Instruction *I) {
6379  if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
6380  isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
6381  isa<LoadInst>(I))
6382  return true;
6383 
6384  if (const CallInst *CI = dyn_cast<CallInst>(I))
6385  if (const Function *F = CI->getCalledFunction())
6386  return canConstantFoldCallTo(F);
6387  return false;
6388 }
6389 
6390 /// Determine whether this instruction can constant evolve within this loop
6391 /// assuming its operands can all constant evolve.
6392 static bool canConstantEvolve(Instruction *I, const Loop *L) {
6393  // An instruction outside of the loop can't be derived from a loop PHI.
6394  if (!L->contains(I)) return false;
6395 
6396  if (isa<PHINode>(I)) {
6397  // We don't currently keep track of the control flow needed to evaluate
6398  // PHIs, so we cannot handle PHIs inside of loops.
6399  return L->getHeader() == I->getParent();
6400  }
6401 
6402  // If we won't be able to constant fold this expression even if the operands
6403  // are constants, bail early.
6404  return CanConstantFold(I);
6405 }
6406 
6407 /// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by
6408 /// recursing through each instruction operand until reaching a loop header phi.
6409 static PHINode *
6412 
6413  // Otherwise, we can evaluate this instruction if all of its operands are
6414  // constant or derived from a PHI node themselves.
6415  PHINode *PHI = nullptr;
6416  for (Value *Op : UseInst->operands()) {
6417  if (isa<Constant>(Op)) continue;
6418 
6419  Instruction *OpInst = dyn_cast<Instruction>(Op);
6420  if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr;
6421 
6422  PHINode *P = dyn_cast<PHINode>(OpInst);
6423  if (!P)
6424  // If this operand is already visited, reuse the prior result.
6425  // We may have P != PHI if this is the deepest point at which the
6426  // inconsistent paths meet.
6427  P = PHIMap.lookup(OpInst);
6428  if (!P) {
6429  // Recurse and memoize the results, whether a phi is found or not.
6430  // This recursive call invalidates pointers into PHIMap.
6431  P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap);
6432  PHIMap[OpInst] = P;
6433  }
6434  if (!P)
6435  return nullptr; // Not evolving from PHI
6436  if (PHI && PHI != P)
6437  return nullptr; // Evolving from multiple different PHIs.
6438  PHI = P;
6439  }
6440  // This is a expression evolving from a constant PHI!
6441  return PHI;
6442 }
6443 
6444 /// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
6445 /// in the loop that V is derived from. We allow arbitrary operations along the
6446 /// way, but the operands of an operation must either be constants or a value
6447 /// derived from a constant PHI. If this expression does not fit with these
6448 /// constraints, return null.
6449 static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
6450  Instruction *I = dyn_cast<Instruction>(V);
6451  if (!I || !canConstantEvolve(I, L)) return nullptr;
6452 
6453  if (PHINode *PN = dyn_cast<PHINode>(I))
6454  return PN;
6455 
6456  // Record non-constant instructions contained by the loop.
6458  return getConstantEvolvingPHIOperands(I, L, PHIMap);
6459 }
6460 
6461 /// EvaluateExpression - Given an expression that passes the
6462 /// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
6463 /// in the loop has the value PHIVal. If we can't fold this expression for some
6464 /// reason, return null.
6465 static Constant *EvaluateExpression(Value *V, const Loop *L,
6467  const DataLayout &DL,
6468  const TargetLibraryInfo *TLI) {
6469  // Convenient constant check, but redundant for recursive calls.
6470  if (Constant *C = dyn_cast<Constant>(V)) return C;
6471  Instruction *I = dyn_cast<Instruction>(V);
6472  if (!I) return nullptr;
6473 
6474  if (Constant *C = Vals.lookup(I)) return C;
6475 
6476  // An instruction inside the loop depends on a value outside the loop that we
6477  // weren't given a mapping for, or a value such as a call inside the loop.
6478  if (!canConstantEvolve(I, L)) return nullptr;
6479 
6480  // An unmapped PHI can be due to a branch or another loop inside this loop,
6481  // or due to this not being the initial iteration through a loop where we
6482  // couldn't compute the evolution of this particular PHI last time.
6483  if (isa<PHINode>(I)) return nullptr;
6484 
6485  std::vector<Constant*> Operands(I->getNumOperands());
6486 
6487  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
6488  Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i));
6489  if (!Operand) {
6490  Operands[i] = dyn_cast<Constant>(I->getOperand(i));
6491  if (!Operands[i]) return nullptr;
6492  continue;
6493  }
6494  Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI);
6495  Vals[Operand] = C;
6496  if (!C) return nullptr;
6497  Operands[i] = C;
6498  }
6499 
6500  if (CmpInst *CI = dyn_cast<CmpInst>(I))
6501  return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
6502  Operands[1], DL, TLI);
6503  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
6504  if (!LI->isVolatile())
6505  return ConstantFoldLoadFromConstPtr(Operands[0], LI->getType(), DL);
6506  }
6507  return ConstantFoldInstOperands(I, Operands, DL, TLI);
6508 }
6509 
6510 
6511 // If every incoming value to PN except the one for BB is a specific Constant,
6512 // return that, else return nullptr.
6514  Constant *IncomingVal = nullptr;
6515 
6516  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
6517  if (PN->getIncomingBlock(i) == BB)
6518  continue;
6519 
6520  auto *CurrentVal = dyn_cast<Constant>(PN->getIncomingValue(i));
6521  if (!CurrentVal)
6522  return nullptr;
6523 
6524  if (IncomingVal != CurrentVal) {
6525  if (IncomingVal)
6526  return nullptr;
6527  IncomingVal = CurrentVal;
6528  }
6529  }
6530 
6531  return IncomingVal;
6532 }
6533 
6534 /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
6535 /// in the header of its containing loop, we know the loop executes a
6536 /// constant number of times, and the PHI node is just a recurrence
6537 /// involving constants, fold it.
6538 Constant *
6539 ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
6540  const APInt &BEs,
6541  const Loop *L) {
6542  auto I = ConstantEvolutionLoopExitValue.find(PN);
6543  if (I != ConstantEvolutionLoopExitValue.end())
6544  return I->second;
6545 
6547  return ConstantEvolutionLoopExitValue[PN] = nullptr; // Not going to evaluate it.
6548 
6549  Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
6550 
6551  DenseMap<Instruction *, Constant *> CurrentIterVals;
6552  BasicBlock *Header = L->getHeader();
6553  assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
6554 
6555  BasicBlock *Latch = L->getLoopLatch();
6556  if (!Latch)
6557  return nullptr;
6558 
6559  for (auto &I : *Header) {
6560  PHINode *PHI = dyn_cast<PHINode>(&I);
6561  if (!PHI) break;
6562  auto *StartCST = getOtherIncomingValue(PHI, Latch);
6563  if (!StartCST) continue;
6564  CurrentIterVals[PHI] = StartCST;
6565  }
6566  if (!CurrentIterVals.count(PN))
6567  return RetVal = nullptr;
6568 
6569  Value *BEValue = PN->getIncomingValueForBlock(Latch);
6570 
6571  // Execute the loop symbolically to determine the exit value.
6572  if (BEs.getActiveBits() >= 32)
6573  return RetVal = nullptr; // More than 2^32-1 iterations?? Not doing it!
6574 
6575  unsigned NumIterations = BEs.getZExtValue(); // must be in range
6576  unsigned IterationNum = 0;
6577  const DataLayout &DL = getDataLayout();
6578  for (; ; ++IterationNum) {
6579  if (IterationNum == NumIterations)
6580  return RetVal = CurrentIterVals[PN]; // Got exit value!
6581 
6582  // Compute the value of the PHIs for the next iteration.
6583  // EvaluateExpression adds non-phi values to the CurrentIterVals map.
6585  Constant *NextPHI =
6586  EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
6587  if (!NextPHI)
6588  return nullptr; // Couldn't evaluate!
6589  NextIterVals[PN] = NextPHI;
6590 
6591  bool StoppedEvolving = NextPHI == CurrentIterVals[PN];
6592 
6593  // Also evaluate the other PHI nodes. However, we don't get to stop if we
6594  // cease to be able to evaluate one of them or if they stop evolving,
6595  // because that doesn't necessarily prevent us from computing PN.
6597  for (const auto &I : CurrentIterVals) {
6598  PHINode *PHI = dyn_cast<PHINode>(I.first);
6599  if (!PHI || PHI == PN || PHI->getParent() != Header) continue;
6600  PHIsToCompute.emplace_back(PHI, I.second);
6601  }
6602  // We use two distinct loops because EvaluateExpression may invalidate any
6603  // iterators into CurrentIterVals.
6604  for (const auto &I : PHIsToCompute) {
6605  PHINode *PHI = I.first;
6606  Constant *&NextPHI = NextIterVals[PHI];
6607  if (!NextPHI) { // Not already computed.
6608  Value *BEValue = PHI->getIncomingValueForBlock(Latch);
6609  NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
6610  }
6611  if (NextPHI != I.second)
6612  StoppedEvolving = false;
6613  }
6614 
6615  // If all entries in CurrentIterVals == NextIterVals then we can stop
6616  // iterating, the loop can't continue to change.
6617  if (StoppedEvolving)
6618  return RetVal = CurrentIterVals[PN];
6619 
6620  CurrentIterVals.swap(NextIterVals);
6621  }
6622 }
6623 
6624 const SCEV *ScalarEvolution::computeExitCountExhaustively(const Loop *L,
6625  Value *Cond,
6626  bool ExitWhen) {
6627  PHINode *PN = getConstantEvolvingPHI(Cond, L);
6628  if (!PN) return getCouldNotCompute();
6629 
6630  // If the loop is canonicalized, the PHI will have exactly two entries.
6631  // That's the only form we support here.
6632  if (PN->getNumIncomingValues() != 2) return getCouldNotCompute();
6633 
6634  DenseMap<Instruction *, Constant *> CurrentIterVals;
6635  BasicBlock *Header = L->getHeader();
6636  assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
6637 
6638  BasicBlock *Latch = L->getLoopLatch();
6639  assert(Latch && "Should follow from NumIncomingValues == 2!");
6640 
6641  for (auto &I : *Header) {
6642  PHINode *PHI = dyn_cast<PHINode>(&I);
6643  if (!PHI)
6644  break;
6645  auto *StartCST = getOtherIncomingValue(PHI, Latch);
6646  if (!StartCST) continue;
6647  CurrentIterVals[PHI] = StartCST;
6648  }
6649  if (!CurrentIterVals.count(PN))
6650  return getCouldNotCompute();
6651 
6652  // Okay, we find a PHI node that defines the trip count of this loop. Execute
6653  // the loop symbolically to determine when the condition gets a value of
6654  // "ExitWhen".
6655  unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis.
6656  const DataLayout &DL = getDataLayout();
6657  for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
6658  auto *CondVal = dyn_cast_or_null<ConstantInt>(
6659  EvaluateExpression(Cond, L, CurrentIterVals, DL, &TLI));
6660 
6661  // Couldn't symbolically evaluate.
6662  if (!CondVal) return getCouldNotCompute();
6663 
6664  if (CondVal->getValue() == uint64_t(ExitWhen)) {
6665  ++NumBruteForceTripCountsComputed;
6666  return getConstant(Type::getInt32Ty(getContext()), IterationNum);
6667  }
6668 
6669  // Update all the PHI nodes for the next iteration.
6671 
6672  // Create a list of which PHIs we need to compute. We want to do this before
6673  // calling EvaluateExpression on them because that may invalidate iterators
6674  // into CurrentIterVals.
6675  SmallVector<PHINode *, 8> PHIsToCompute;
6676  for (const auto &I : CurrentIterVals) {
6677  PHINode *PHI = dyn_cast<PHINode>(I.first);
6678  if (!PHI || PHI->getParent() != Header) continue;
6679  PHIsToCompute.push_back(PHI);
6680  }
6681  for (PHINode *PHI : PHIsToCompute) {
6682  Constant *&NextPHI = NextIterVals[PHI];
6683  if (NextPHI) continue; // Already computed!
6684 
6685  Value *BEValue = PHI->getIncomingValueForBlock(Latch);
6686  NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
6687  }
6688  CurrentIterVals.swap(NextIterVals);
6689  }
6690 
6691  // Too many iterations were needed to evaluate.
6692  return getCouldNotCompute();
6693 }
6694 
6695 const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
6697  ValuesAtScopes[V];
6698  // Check to see if we've folded this expression at this loop before.
6699  for (auto &LS : Values)
6700  if (LS.first == L)
6701  return LS.second ? LS.second : V;
6702 
6703  Values.emplace_back(L, nullptr);
6704 
6705  // Otherwise compute it.
6706  const SCEV *C = computeSCEVAtScope(V, L);
6707  for (auto &LS : reverse(ValuesAtScopes[V]))
6708  if (LS.first == L) {
6709  LS.second = C;
6710  break;
6711  }
6712  return C;
6713 }
6714 
6715 /// This builds up a Constant using the ConstantExpr interface. That way, we
6716 /// will return Constants for objects which aren't represented by a
6717 /// SCEVConstant, because SCEVConstant is restricted to ConstantInt.
6718 /// Returns NULL if the SCEV isn't representable as a Constant.
6720  switch (static_cast<SCEVTypes>(V->getSCEVType())) {
6721  case scCouldNotCompute:
6722  case scAddRecExpr:
6723  break;
6724  case scConstant:
6725  return cast<SCEVConstant>(V)->getValue();
6726  case scUnknown:
6727  return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue());
6728  case scSignExtend: {
6729  const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V);
6730  if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand()))
6731  return ConstantExpr::getSExt(CastOp, SS->getType());
6732  break;
6733  }
6734  case scZeroExtend: {
6735  const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V);
6736  if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand()))
6737  return ConstantExpr::getZExt(CastOp, SZ->getType());
6738  break;
6739  }
6740  case scTruncate: {
6741  const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V);
6742  if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand()))
6743  return ConstantExpr::getTrunc(CastOp, ST->getType());
6744  break;
6745  }
6746  case scAddExpr: {
6747  const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
6748  if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
6749  if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
6750  unsigned AS = PTy->getAddressSpace();
6751  Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
6752  C = ConstantExpr::getBitCast(C, DestPtrTy);
6753  }
6754  for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
6756  if (!C2) return nullptr;
6757 
6758  // First pointer!
6759  if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
6760  unsigned AS = C2->getType()->getPointerAddressSpace();
6761  std::swap(C, C2);
6762  Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
6763  // The offsets have been converted to bytes. We can add bytes to an
6764  // i8* by GEP with the byte count in the first index.
6765  C = ConstantExpr::getBitCast(C, DestPtrTy);
6766  }
6767 
6768  // Don't bother trying to sum two pointers. We probably can't
6769  // statically compute a load that results from it anyway.
6770  if (C2->getType()->isPointerTy())
6771  return nullptr;
6772 
6773  if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
6774  if (PTy->getElementType()->isStructTy())
6776  C2, Type::getInt32Ty(C->getContext()), true);
6777  C = ConstantExpr::getGetElementPtr(PTy->getElementType(), C, C2);
6778  } else
6779  C = ConstantExpr::getAdd(C, C2);
6780  }
6781  return C;
6782  }
6783  break;
6784  }
6785  case scMulExpr: {
6786  const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
6787  if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
6788  // Don't bother with pointers at all.
6789  if (C->getType()->isPointerTy()) return nullptr;
6790  for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
6792  if (!C2 || C2->getType()->isPointerTy()) return nullptr;
6793  C = ConstantExpr::getMul(C, C2);
6794  }
6795  return C;
6796  }
6797  break;
6798  }
6799  case scUDivExpr: {
6800  const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V);
6801  if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS()))
6802  if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS()))
6803  if (LHS->getType() == RHS->getType())
6804  return ConstantExpr::getUDiv(LHS, RHS);
6805  break;
6806  }
6807  case scSMaxExpr:
6808  case scUMaxExpr:
6809  break; // TODO: smax, umax.
6810  }
6811  return nullptr;
6812 }
6813 
6814 const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
6815  if (isa<SCEVConstant>(V)) return V;
6816 
6817  // If this instruction is evolved from a constant-evolving PHI, compute the
6818  // exit value from the loop without using SCEVs.
6819  if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
6820  if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
6821  const Loop *LI = this->LI[I->getParent()];
6822  if (LI && LI->getParentLoop() == L) // Looking for loop exit value.
6823  if (PHINode *PN = dyn_cast<PHINode>(I))
6824  if (PN->getParent() == LI->getHeader()) {
6825  // Okay, there is no closed form solution for the PHI node. Check
6826  // to see if the loop that contains it has a known backedge-taken
6827  // count. If so, we may be able to force computation of the exit
6828  // value.
6829  const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
6830  if (const SCEVConstant *BTCC =
6831  dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
6832  // Okay, we know how many times the containing loop executes. If
6833  // this is a constant evolving PHI node, get the final value at
6834  // the specified iteration number.
6835  Constant *RV =
6836  getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI);
6837  if (RV) return getSCEV(RV);
6838  }
6839  }
6840 
6841  // Okay, this is an expression that we cannot symbolically evaluate
6842  // into a SCEV. Check to see if it's possible to symbolically evaluate
6843  // the arguments into constants, and if so, try to constant propagate the
6844  // result. This is particularly useful for computing loop exit values.
6845  if (CanConstantFold(I)) {
6846  SmallVector<Constant *, 4> Operands;
6847  bool MadeImprovement = false;
6848  for (Value *Op : I->operands()) {
6849  if (Constant *C = dyn_cast<Constant>(Op)) {
6850  Operands.push_back(C);
6851  continue;
6852  }
6853 
6854  // If any of the operands is non-constant and if they are
6855  // non-integer and non-pointer, don't even try to analyze them
6856  // with scev techniques.
6857  if (!isSCEVable(Op->getType()))
6858  return V;
6859 
6860  const SCEV *OrigV = getSCEV(Op);
6861  const SCEV *OpV = getSCEVAtScope(OrigV, L);
6862  MadeImprovement |= OrigV != OpV;
6863 
6865  if (!C) return V;
6866  if (C->getType() != Op->getType())
6868  Op->getType(),
6869  false),
6870  C, Op->getType());
6871  Operands.push_back(C);
6872  }
6873 
6874  // Check to see if getSCEVAtScope actually made an improvement.
6875  if (MadeImprovement) {
6876  Constant *C = nullptr;
6877  const DataLayout &DL = getDataLayout();
6878  if (const CmpInst *CI = dyn_cast<CmpInst>(I))
6879  C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
6880  Operands[1], DL, &TLI);
6881  else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
6882  if (!LI->isVolatile())
6883  C = ConstantFoldLoadFromConstPtr(Operands[0], LI->getType(), DL);
6884  } else
6885  C = ConstantFoldInstOperands(I, Operands, DL, &TLI);
6886  if (!C) return V;
6887  return getSCEV(C);
6888  }
6889  }
6890  }
6891 
6892  // This is some other type of SCEVUnknown, just return it.
6893  return V;
6894  }
6895 
6896  if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) {
6897  // Avoid performing the look-up in the common case where the specified
6898  // expression has no loop-variant portions.
6899  for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) {
6900  const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
6901  if (OpAtScope != Comm->getOperand(i)) {
6902  // Okay, at least one of these operands is loop variant but might be
6903  // foldable. Build a new instance of the folded commutative expression.
6904  SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(),
6905  Comm->op_begin()+i);
6906  NewOps.push_back(OpAtScope);
6907 
6908  for (++i; i != e; ++i) {
6909  OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
6910  NewOps.push_back(OpAtScope);
6911  }
6912  if (isa<SCEVAddExpr>(Comm))
6913  return getAddExpr(NewOps);
6914  if (isa<SCEVMulExpr>(Comm))
6915  return getMulExpr(NewOps);
6916  if (isa<SCEVSMaxExpr>(Comm))
6917  return getSMaxExpr(NewOps);
6918  if (isa<SCEVUMaxExpr>(Comm))
6919  return getUMaxExpr(NewOps);
6920  llvm_unreachable("Unknown commutative SCEV type!");
6921  }
6922  }
6923  // If we got here, all operands are loop invariant.
6924  return Comm;
6925  }
6926 
6927  if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) {
6928  const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L);
6929  const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L);
6930  if (LHS == Div->getLHS() && RHS == Div->getRHS())
6931  return Div; // must be loop invariant
6932  return getUDivExpr(LHS, RHS);
6933  }
6934 
6935  // If this is a loop recurrence for a loop that does not contain L, then we
6936  // are dealing with the final value computed by the loop.
6937  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
6938  // First, attempt to evaluate each operand.
6939  // Avoid performing the look-up in the common case where the specified
6940  // expression has no loop-variant portions.
6941  for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
6942  const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L);
6943  if (OpAtScope == AddRec->getOperand(i))
6944  continue;
6945 
6946  // Okay, at least one of these operands is loop variant but might be
6947  // foldable. Build a new instance of the folded commutative expression.
6948  SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(),
6949  AddRec->op_begin()+i);
6950  NewOps.push_back(OpAtScope);
6951  for (++i; i != e; ++i)
6952  NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L));
6953 
6954  const SCEV *FoldedRec =
6955  getAddRecExpr(NewOps, AddRec->getLoop(),
6956  AddRec->getNoWrapFlags(SCEV::FlagNW));
6957  AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec);
6958  // The addrec may be folded to a nonrecurrence, for example, if the
6959  // induction variable is multiplied by zero after constant folding. Go
6960  // ahead and return the folded value.
6961  if (!AddRec)
6962  return FoldedRec;
6963  break;
6964  }
6965 
6966  // If the scope is outside the addrec's loop, evaluate it by using the
6967  // loop exit value of the addrec.
6968  if (!AddRec->getLoop()->contains(L)) {
6969  // To evaluate this recurrence, we need to know how many times the AddRec
6970  // loop iterates. Compute this now.
6971  const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
6972  if (BackedgeTakenCount == getCouldNotCompute()) return AddRec;
6973 
6974  // Then, evaluate the AddRec.
6975  return AddRec->evaluateAtIteration(BackedgeTakenCount, *this);
6976  }
6977 
6978  return AddRec;
6979  }
6980 
6981  if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) {
6982  const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
6983  if (Op == Cast->getOperand())
6984  return Cast; // must be loop invariant
6985  return getZeroExtendExpr(Op, Cast->getType());
6986  }
6987 
6988  if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) {
6989  const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
6990  if (Op == Cast->getOperand())
6991  return Cast; // must be loop invariant
6992  return getSignExtendExpr(Op, Cast->getType());
6993  }
6994 
6995  if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) {
6996  const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
6997  if (Op == Cast->getOperand())
6998  return Cast; // must be loop invariant
6999  return getTruncateExpr(Op, Cast->getType());
7000  }
7001 
7002  llvm_unreachable("Unknown SCEV type!");
7003 }
7004 
7006  return getSCEVAtScope(getSCEV(V), L);
7007 }
7008 
7009 /// Finds the minimum unsigned root of the following equation:
7010 ///
7011 /// A * X = B (mod N)
7012 ///
7013 /// where N = 2^BW and BW is the common bit width of A and B. The signedness of
7014 /// A and B isn't important.
7015 ///
7016 /// If the equation does not have a solution, SCEVCouldNotCompute is returned.
7017 static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
7018  ScalarEvolution &SE) {
7019  uint32_t BW = A.getBitWidth();
7020  assert(BW == B.getBitWidth() && "Bit widths must be the same.");
7021  assert(A != 0 && "A must be non-zero.");
7022 
7023  // 1. D = gcd(A, N)
7024  //
7025  // The gcd of A and N may have only one prime factor: 2. The number of
7026  // trailing zeros in A is its multiplicity
7027  uint32_t Mult2 = A.countTrailingZeros();
7028  // D = 2^Mult2
7029 
7030  // 2. Check if B is divisible by D.
7031  //
7032  // B is divisible by D if and only if the multiplicity of prime factor 2 for B
7033  // is not less than multiplicity of this prime factor for D.
7034  if (B.countTrailingZeros() < Mult2)
7035  return SE.getCouldNotCompute();
7036 
7037  // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic
7038  // modulo (N / D).
7039  //
7040  // If D == 1, (N / D) == N == 2^BW, so we need one extra bit to represent
7041  // (N / D) in general. The inverse itself always fits into BW bits, though,
7042  // so we immediately truncate it.
7043  APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D
7044  APInt Mod(BW + 1, 0);
7045  Mod.setBit(BW - Mult2); // Mod = N / D
7046  APInt I = AD.multiplicativeInverse(Mod).trunc(BW);
7047 
7048  // 4. Compute the minimum unsigned root of the equation:
7049  // I * (B / D) mod (N / D)
7050  // To simplify the computation, we factor out the divide by D:
7051  // (I * B mod N) / D
7052  APInt Result = (I * B).lshr(Mult2);
7053 
7054  return SE.getConstant(Result);
7055 }
7056 
7057 /// Find the roots of the quadratic equation for the given quadratic chrec
7058 /// {L,+,M,+,N}. This returns either the two roots (which might be the same) or
7059 /// two SCEVCouldNotCompute objects.
7060 ///
7063  assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
7064  const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
7065  const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1));
7066  const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2));
7067 
7068  // We currently can only solve this if the coefficients are constants.
7069  if (!LC || !MC || !NC)
7070  return None;
7071 
7072  uint32_t BitWidth = LC->getAPInt().getBitWidth();
7073  const APInt &L = LC->getAPInt();
7074  const APInt &M = MC->getAPInt();
7075  const APInt &N = NC->getAPInt();
7076  APInt Two(BitWidth, 2);
7077  APInt Four(BitWidth, 4);
7078 
7079  {
7080  using namespace APIntOps;
7081  const APInt& C = L;
7082  // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
7083  // The B coefficient is M-N/2
7084  APInt B(M);
7085  B -= sdiv(N,Two);
7086 
7087  // The A coefficient is N/2
7088  APInt A(N.sdiv(Two));
7089 
7090  // Compute the B^2-4ac term.
7091  APInt SqrtTerm(B);
7092  SqrtTerm *= B;
7093  SqrtTerm -= Four * (A * C);
7094 
7095  if (SqrtTerm.isNegative()) {
7096  // The loop is provably infinite.
7097  return None;
7098  }
7099 
7100  // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
7101  // integer value or else APInt::sqrt() will assert.
7102  APInt SqrtVal(SqrtTerm.sqrt());
7103 
7104  // Compute the two solutions for the quadratic formula.
7105  // The divisions must be performed as signed divisions.
7106  APInt NegB(-B);
7107  APInt TwoA(A << 1);
7108  if (TwoA.isMinValue())
7109  return None;
7110 
7111  LLVMContext &Context = SE.getContext();
7112 
7113  ConstantInt *Solution1 =
7114  ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
7115  ConstantInt *Solution2 =
7116  ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
7117 
7118  return std::make_pair(cast<SCEVConstant>(SE.getConstant(Solution1)),
7119  cast<SCEVConstant>(SE.getConstant(Solution2)));
7120  } // end APIntOps namespace
7121 }
7122 
7123 ScalarEvolution::ExitLimit
7124 ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
7125  bool AllowPredicates) {
7126 
7127  // This is only used for loops with a "x != y" exit test. The exit condition
7128  // is now expressed as a single expression, V = x-y. So the exit test is
7129  // effectively V != 0. We know and take advantage of the fact that this
7130  // expression only being used in a comparison by zero context.
7131 
7133  // If the value is a constant
7134  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
7135  // If the value is already zero, the branch will execute zero times.
7136  if (C->getValue()->isZero()) return C;
7137  return getCouldNotCompute(); // Otherwise it will loop infinitely.
7138  }
7139 
7140  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V);
7141  if (!AddRec && AllowPredicates)
7142  // Try to make this an AddRec using runtime tests, in the first X
7143  // iterations of this loop, where X is the SCEV expression found by the
7144  // algorithm below.
7145  AddRec = convertSCEVToAddRecWithPredicates(V, L, Predicates);
7146 
7147  if (!AddRec || AddRec->getLoop() != L)
7148  return getCouldNotCompute();
7149 
7150  // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
7151  // the quadratic equation to solve it.
7152  if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
7153  if (auto Roots = SolveQuadraticEquation(AddRec, *this)) {
7154  const SCEVConstant *R1 = Roots->first;
7155  const SCEVConstant *R2 = Roots->second;
7156  // Pick the smallest positive root value.
7157  if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp(
7158  CmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) {
7159  if (!CB->getZExtValue())
7160  std::swap(R1, R2); // R1 is the minimum root now.
7161 
7162  // We can only use this value if the chrec ends up with an exact zero
7163  // value at this index. When solving for "X*X != 5", for example, we
7164  // should not accept a root of 2.
7165  const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
7166  if (Val->isZero())
7167  // We found a quadratic root!
7168  return ExitLimit(R1, R1, false, Predicates);
7169  }
7170  }
7171  return getCouldNotCompute();
7172  }
7173 
7174  // Otherwise we can only handle this if it is affine.
7175  if (!AddRec->isAffine())
7176  return getCouldNotCompute();
7177 
7178  // If this is an affine expression, the execution count of this branch is
7179  // the minimum unsigned root of the following equation:
7180  //
7181  // Start + Step*N = 0 (mod 2^BW)
7182  //
7183  // equivalent to:
7184  //
7185  // Step*N = -Start (mod 2^BW)
7186  //
7187  // where BW is the common bit width of Start and Step.
7188 
7189  // Get the initial value for the loop.
7190  const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
7191  const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());
7192 
7193  // For now we handle only constant steps.
7194  //
7195  // TODO: Handle a nonconstant Step given AddRec<NUW>. If the
7196  // AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap
7197  // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
7198  // We have not yet seen any such cases.
7199  const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
7200  if (!StepC || StepC->getValue()->equalsInt(0))
7201  return getCouldNotCompute();
7202 
7203  // For positive steps (counting up until unsigned overflow):
7204  // N = -Start/Step (as unsigned)
7205  // For negative steps (counting down to zero):
7206  // N = Start/-Step
7207  // First compute the unsigned distance from zero in the direction of Step.
7208  bool CountDown = StepC->getAPInt().isNegative();
7209  const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start);
7210 
7211  // Handle unitary steps, which cannot wraparound.
7212  // 1*N = -Start; -1*N = Start (mod 2^BW), so:
7213  // N = Distance (as unsigned)
7214  if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) {
7215  APInt MaxBECount = getUnsignedRange(Distance).getUnsignedMax();
7216 
7217  // When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated,
7218  // we end up with a loop whose backedge-taken count is n - 1. Detect this
7219  // case, and see if we can improve the bound.
7220  //
7221  // Explicitly handling this here is necessary because getUnsignedRange
7222  // isn't context-sensitive; it doesn't know that we only care about the
7223  // range inside the loop.
7224  const SCEV *Zero = getZero(Distance->getType());
7225  const SCEV *One = getOne(Distance->getType());
7226  const SCEV *DistancePlusOne = getAddExpr(Distance, One);
7227  if (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, DistancePlusOne, Zero)) {
7228  // If Distance + 1 doesn't overflow, we can compute the maximum distance
7229  // as "unsigned_max(Distance + 1) - 1".
7230  ConstantRange CR = getUnsignedRange(DistancePlusOne);
7231  MaxBECount = APIntOps::umin(MaxBECount, CR.getUnsignedMax() - 1);
7232  }
7233  return ExitLimit(Distance, getConstant(MaxBECount), false, Predicates);
7234  }
7235 
7236  // As a special case, handle the instance where Step is a positive power of
7237  // two. In this case, determining whether Step divides Distance evenly can be
7238  // done by counting and comparing the number of trailing zeros of Step and
7239  // Distance.
7240  if (!CountDown) {
7241  const APInt &StepV = StepC->getAPInt();
7242  // StepV.isPowerOf2() returns true if StepV is an positive power of two. It
7243  // also returns true if StepV is maximally negative (eg, INT_MIN), but that
7244  // case is not handled as this code is guarded by !CountDown.
7245  if (StepV.isPowerOf2() &&
7246  GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros()) {
7247  // Here we've constrained the equation to be of the form
7248  //
7249  // 2^(N + k) * Distance' = (StepV == 2^N) * X (mod 2^W) ... (0)
7250  //
7251  // where we're operating on a W bit wide integer domain and k is
7252  // non-negative. The smallest unsigned solution for X is the trip count.
7253  //
7254  // (0) is equivalent to:
7255  //
7256  // 2^(N + k) * Distance' - 2^N * X = L * 2^W
7257  // <=> 2^N(2^k * Distance' - X) = L * 2^(W - N) * 2^N
7258  // <=> 2^k * Distance' - X = L * 2^(W - N)
7259  // <=> 2^k * Distance' = L * 2^(W - N) + X ... (1)
7260  //
7261  // The smallest X satisfying (1) is unsigned remainder of dividing the LHS
7262  // by 2^(W - N).
7263  //
7264  // <=> X = 2^k * Distance' URem 2^(W - N) ... (2)
7265  //
7266  // E.g. say we're solving
7267  //
7268  // 2 * Val = 2 * X (in i8) ... (3)
7269  //
7270  // then from (2), we get X = Val URem i8 128 (k = 0 in this case).
7271  //
7272  // Note: It is tempting to solve (3) by setting X = Val, but Val is not
7273  // necessarily the smallest unsigned value of X that satisfies (3).
7274  // E.g. if Val is i8 -127 then the smallest value of X that satisfies (3)
7275  // is i8 1, not i8 -127
7276 
7277  const auto *ModuloResult = getUDivExactExpr(Distance, Step);
7278 
7279  // Since SCEV does not have a URem node, we construct one using a truncate
7280  // and a zero extend.
7281 
7282  unsigned NarrowWidth = StepV.getBitWidth() - StepV.countTrailingZeros();
7283  auto *NarrowTy = IntegerType::get(getContext(), NarrowWidth);
7284  auto *WideTy = Distance->getType();
7285 
7286  const SCEV *Limit =
7287  getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy);
7288  return ExitLimit(Limit, Limit, false, Predicates);
7289  }
7290  }
7291 
7292  // If the condition controls loop exit (the loop exits only if the expression
7293  // is true) and the addition is no-wrap we can use unsigned divide to
7294  // compute the backedge count. In this case, the step may not divide the
7295  // distance, but we don't care because if the condition is "missed" the loop
7296  // will have undefined behavior due to wrapping.
7297  if (ControlsExit && AddRec->hasNoSelfWrap() &&
7298  loopHasNoAbnormalExits(AddRec->getLoop())) {
7299  const SCEV *Exact =
7300  getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
7301  return ExitLimit(Exact, Exact, false, Predicates);
7302  }
7303 
7304  // Then, try to solve the above equation provided that Start is constant.
7305  if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) {
7307  StepC->getValue()->getValue(), -StartC->getValue()->getValue(), *this);
7308  return ExitLimit(E, E, false, Predicates);
7309  }
7310  return getCouldNotCompute();
7311 }
7312 
7313 ScalarEvolution::ExitLimit
7314 ScalarEvolution::howFarToNonZero(const SCEV *V, const Loop *L) {
7315  // Loops that look like: while (X == 0) are very strange indeed. We don't
7316  // handle them yet except for the trivial case. This could be expanded in the
7317  // future as needed.
7318 
7319  // If the value is a constant, check to see if it is known to be non-zero
7320  // already. If so, the backedge will execute zero times.
7321  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
7322  if (!C->getValue()->isNullValue())
7323  return getZero(C->getType());
7324  return getCouldNotCompute(); // Otherwise it will loop infinitely.
7325  }
7326 
7327  // We could implement others, but I really doubt anyone writes loops like
7328  // this, and if they did, they would already be constant folded.
7329  return getCouldNotCompute();
7330 }
7331 
7332 std::pair<BasicBlock *, BasicBlock *>
7333 ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
7334  // If the block has a unique predecessor, then there is no path from the
7335  // predecessor to the block that does not go through the direct edge
7336  // from the predecessor to the block.
7337  if (BasicBlock *Pred = BB->getSinglePredecessor())
7338  return {Pred, BB};
7339 
7340  // A loop's header is defined to be a block that dominates the loop.
7341  // If the header has a unique predecessor outside the loop, it must be
7342  // a block that has exactly one successor that can reach the loop.
7343  if (Loop *L = LI.getLoopFor(BB))
7344  return {L->getLoopPredecessor(), L->getHeader()};
7345 
7346  return {nullptr, nullptr};
7347 }
7348 
7349 /// SCEV structural equivalence is usually sufficient for testing whether two
7350 /// expressions are equal, however for the purposes of looking for a condition
7351 /// guarding a loop, it can be useful to be a little more general, since a
7352 /// front-end may have replicated the controlling expression.
7353 ///
7354 static bool HasSameValue(const SCEV *A, const SCEV *B) {
7355  // Quick check to see if they are the same SCEV.
7356  if (A == B) return true;
7357 
7358  auto ComputesEqualValues = [](const Instruction *A, const Instruction *B) {
7359  // Not all instructions that are "identical" compute the same value. For
7360  // instance, two distinct alloca instructions allocating the same type are
7361  // identical and do not read memory; but compute distinct values.
7362  return A->isIdenticalTo(B) && (isa<BinaryOperator>(A) || isa<GetElementPtrInst>(A));
7363  };
7364 
7365  // Otherwise, if they're both SCEVUnknown, it's possible that they hold
7366  // two different instructions with the same value. Check for this case.
7367  if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A))
7368  if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
7369  if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
7370  if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
7371  if (ComputesEqualValues(AI, BI))
7372  return true;
7373 
7374  // Otherwise assume they may have a different value.
7375  return false;
7376 }
7377 
7379  const SCEV *&LHS, const SCEV *&RHS,
7380  unsigned Depth) {
7381  bool Changed = false;
7382 
7383  // If we hit the max recursion limit bail out.
7384  if (Depth >= 3)
7385  return false;
7386 
7387  // Canonicalize a constant to the right side.
7388  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
7389  // Check for both operands constant.
7390  if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
7391  if (ConstantExpr::getICmp(Pred,
7392  LHSC->getValue(),
7393  RHSC->getValue())->isNullValue())
7394  goto trivially_false;
7395  else
7396  goto trivially_true;
7397  }
7398  // Otherwise swap the operands to put the constant on the right.
7399  std::swap(LHS, RHS);
7400  Pred = ICmpInst::getSwappedPredicate(Pred);
7401  Changed = true;
7402  }
7403 
7404  // If we're comparing an addrec with a value which is loop-invariant in the
7405  // addrec's loop, put the addrec on the left. Also make a dominance check,
7406  // as both operands could be addrecs loop-invariant in each other's loop.
7407  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) {
7408  const Loop *L = AR->getLoop();
7409  if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) {
7410  std::swap(LHS, RHS);
7411  Pred = ICmpInst::getSwappedPredicate(Pred);
7412  Changed = true;
7413  }
7414  }
7415 
7416  // If there's a constant operand, canonicalize comparisons with boundary
7417  // cases, and canonicalize *-or-equal comparisons to regular comparisons.
7418  if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
7419  const APInt &RA = RC->getAPInt();
7420 
7421  bool SimplifiedByConstantRange = false;
7422 
7423  if (!ICmpInst::isEquality(Pred)) {
7425  if (ExactCR.isFullSet())
7426  goto trivially_true;
7427  else if (ExactCR.isEmptySet())
7428  goto trivially_false;
7429 
7430  APInt NewRHS;
7431  CmpInst::Predicate NewPred;
7432  if (ExactCR.getEquivalentICmp(NewPred, NewRHS) &&
7433  ICmpInst::isEquality(NewPred)) {
7434  // We were able to convert an inequality to an equality.
7435  Pred = NewPred;
7436  RHS = getConstant(NewRHS);
7437  Changed = SimplifiedByConstantRange = true;
7438  }
7439  }
7440 
7441  if (!SimplifiedByConstantRange) {
7442  switch (Pred) {
7443  default:
7444  break;
7445  case ICmpInst::ICMP_EQ:
7446  case ICmpInst::ICMP_NE:
7447  // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b.
7448  if (!RA)
7449  if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS))
7450  if (const SCEVMulExpr *ME =
7451  dyn_cast<SCEVMulExpr>(AE->getOperand(0)))
7452  if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 &&
7453  ME->getOperand(0)->isAllOnesValue()) {
7454  RHS = AE->getOperand(1);
7455  LHS = ME->getOperand(1);
7456  Changed = true;
7457  }
7458  break;
7459 
7460 
7461  // The "Should have been caught earlier!" messages refer to the fact
7462  // that the ExactCR.isFullSet() or ExactCR.isEmptySet() check above
7463  // should have fired on the corresponding cases, and canonicalized the
7464  // check to trivially_true or trivially_false.
7465 
7466  case ICmpInst::ICMP_UGE:
7467  assert(!RA.isMinValue() && "Should have been caught earlier!");
7468  Pred = ICmpInst::ICMP_UGT;
7469  RHS = getConstant(RA - 1);
7470  Changed = true;
7471  break;
7472  case ICmpInst::ICMP_ULE:
7473  assert(!RA.isMaxValue() && "Should have been caught earlier!");
7474  Pred = ICmpInst::ICMP_ULT;
7475  RHS = getConstant(RA + 1);
7476  Changed = true;
7477  break;
7478  case ICmpInst::ICMP_SGE:
7479  assert(!RA.isMinSignedValue() && "Should have been caught earlier!");
7480  Pred = ICmpInst::ICMP_SGT;
7481  RHS = getConstant(RA - 1);
7482  Changed = true;
7483  break;
7484  case ICmpInst::ICMP_SLE:
7485  assert(!RA.isMaxSignedValue() && "Should have been caught earlier!");
7486  Pred = ICmpInst::ICMP_SLT;
7487  RHS = getConstant(RA + 1);
7488  Changed = true;
7489  break;
7490  }
7491  }
7492  }
7493 
7494  // Check for obvious equality.
7495  if (HasSameValue(LHS, RHS)) {
7496  if (ICmpInst::isTrueWhenEqual(Pred))
7497  goto trivially_true;
7498  if (ICmpInst::isFalseWhenEqual(Pred))
7499  goto trivially_false;
7500  }
7501 
7502  // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
7503  // adding or subtracting 1 from one of the operands.
7504  switch (Pred) {
7505  case ICmpInst::ICMP_SLE:
7506  if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) {
7507  RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
7508  SCEV::FlagNSW);
7509  Pred = ICmpInst::ICMP_SLT;
7510  Changed = true;
7511  } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) {
7512  LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
7513  SCEV::FlagNSW);
7514  Pred = ICmpInst::ICMP_SLT;
7515  Changed = true;
7516  }
7517  break;
7518  case ICmpInst::ICMP_SGE:
7520  RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
7521  SCEV::FlagNSW);
7522  Pred = ICmpInst::ICMP_SGT;
7523  Changed = true;
7524  } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) {
7525  LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
7526  SCEV::FlagNSW);
7527  Pred = ICmpInst::ICMP_SGT;
7528  Changed = true;
7529  }
7530  break;
7531  case ICmpInst::ICMP_ULE:
7532  if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) {
7533  RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
7534  SCEV::FlagNUW);
7535  Pred = ICmpInst::ICMP_ULT;
7536  Changed = true;
7537  } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) {
7538  LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS);
7539  Pred = ICmpInst::ICMP_ULT;
7540  Changed = true;
7541  }
7542  break;
7543  case ICmpInst::ICMP_UGE:
7544  if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) {
7545  RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS);
7546  Pred = ICmpInst::ICMP_UGT;
7547  Changed = true;
7548  } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) {
7549  LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
7550  SCEV::FlagNUW);
7551  Pred = ICmpInst::ICMP_UGT;
7552  Changed = true;
7553  }
7554  break;
7555  default:
7556  break;
7557  }
7558 
7559  // TODO: More simplifications are possible here.
7560 
7561  // Recursively simplify until we either hit a recursion limit or nothing
7562  // changes.
7563  if (Changed)
7564  return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1);
7565 
7566  return Changed;
7567 
7568 trivially_true:
7569  // Return 0 == 0.
7571  Pred = ICmpInst::ICMP_EQ;
7572  return true;
7573 
7574 trivially_false:
7575  // Return 0 != 0.
7577  Pred = ICmpInst::ICMP_NE;
7578  return true;
7579 }
7580 
7582  return getSignedRange(S).getSignedMax().isNegative();
7583 }
7584 
7587 }
7588 
7590  return !getSignedRange(S).getSignedMin().isNegative();
7591 }
7592 
7595 }
7596 
7598  return isKnownNegative(S) || isKnownPositive(S);
7599 }
7600 
7602  const SCEV *LHS, const SCEV *RHS) {
7603  // Canonicalize the inputs first.
7604  (void)SimplifyICmpOperands(Pred, LHS, RHS);
7605 
7606  // If LHS or RHS is an addrec, check to see if the condition is true in
7607  // every iteration of the loop.
7608  // If LHS and RHS are both addrec, both conditions must be true in
7609  // every iteration of the loop.
7610  const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS);
7611  const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
7612  bool LeftGuarded = false;
7613  bool RightGuarded = false;
7614  if (LAR) {
7615  const Loop *L = LAR->getLoop();
7616  if (isLoopEntryGuardedByCond(L, Pred, LAR->getStart(), RHS) &&
7617  isLoopBackedgeGuardedByCond(L, Pred, LAR->getPostIncExpr(*this), RHS)) {
7618  if (!RAR) return true;
7619  LeftGuarded = true;
7620  }
7621  }
7622  if (RAR) {
7623  const Loop *L = RAR->getLoop();
7624  if (isLoopEntryGuardedByCond(L, Pred, LHS, RAR->getStart()) &&
7625  isLoopBackedgeGuardedByCond(L, Pred, LHS, RAR->getPostIncExpr(*this))) {
7626  if (!LAR) return true;
7627  RightGuarded = true;
7628  }
7629  }
7630  if (LeftGuarded && RightGuarded)
7631  return true;
7632 
7633  if (isKnownPredicateViaSplitting(Pred, LHS, RHS))
7634  return true;
7635 
7636  // Otherwise see what can be done with known constant ranges.
7637  return isKnownPredicateViaConstantRanges(Pred, LHS, RHS);
7638 }
7639 
7640 bool ScalarEvolution::isMonotonicPredicate(const SCEVAddRecExpr *LHS,
7641  ICmpInst::Predicate Pred,
7642  bool &Increasing) {
7643  bool Result = isMonotonicPredicateImpl(LHS, Pred, Increasing);
7644 
7645 #ifndef NDEBUG
7646  // Verify an invariant: inverting the predicate should turn a monotonically
7647  // increasing change to a monotonically decreasing one, and vice versa.
7648  bool IncreasingSwapped;
7649  bool ResultSwapped = isMonotonicPredicateImpl(
7650  LHS, ICmpInst::getSwappedPredicate(Pred), IncreasingSwapped);
7651 
7652  assert(Result == ResultSwapped && "should be able to analyze both!");
7653  if (ResultSwapped)
7654  assert(Increasing == !IncreasingSwapped &&
7655  "monotonicity should flip as we flip the predicate");
7656 #endif
7657 
7658  return Result;
7659 }
7660 
7661 bool ScalarEvolution::isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS,
7662  ICmpInst::Predicate Pred,
7663  bool &Increasing) {
7664 
7665  // A zero step value for LHS means the induction variable is essentially a
7666  // loop invariant value. We don't really depend on the predicate actually
7667  // flipping from false to true (for increasing predicates, and the other way
7668  // around for decreasing predicates), all we care about is that *if* the
7669  // predicate changes then it only changes from false to true.
7670  //
7671  // A zero step value in itself is not very useful, but there may be places
7672  // where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be
7673  // as general as possible.
7674 
7675  switch (Pred) {
7676  default:
7677  return false; // Conservative answer
7678 
7679  case ICmpInst::ICMP_UGT:
7680  case ICmpInst::ICMP_UGE:
7681  case ICmpInst::ICMP_ULT:
7682  case ICmpInst::ICMP_ULE:
7683  if (!LHS->hasNoUnsignedWrap())
7684  return false;
7685 
7686  Increasing = Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE;
7687  return true;
7688 
7689  case ICmpInst::ICMP_SGT:
7690  case ICmpInst::ICMP_SGE:
7691  case ICmpInst::ICMP_SLT:
7692  case ICmpInst::ICMP_SLE: {
7693  if (!LHS->hasNoSignedWrap())
7694  return false;
7695 
7696  const SCEV *Step = LHS->getStepRecurrence(*this);
7697 
7698  if (isKnownNonNegative(Step)) {
7699  Increasing = Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE;
7700  return true;
7701  }
7702 
7703  if (isKnownNonPositive(Step)) {
7704  Increasing = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE;
7705  return true;
7706  }
7707 
7708  return false;
7709  }
7710 
7711  }
7712 
7713  llvm_unreachable("switch has default clause!");
7714 }
7715 
7717  ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L,
7718  ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS,
7719  const SCEV *&InvariantRHS) {
7720 
7721  // If there is a loop-invariant, force it into the RHS, otherwise bail out.
7722  if (!isLoopInvariant(RHS, L)) {
7723  if (!isLoopInvariant(LHS, L))
7724  return false;
7725 
7726  std::swap(LHS, RHS);
7727  Pred = ICmpInst::getSwappedPredicate(Pred);
7728  }
7729 
7730  const SCEVAddRecExpr *ArLHS = dyn_cast<SCEVAddRecExpr>(LHS);
7731  if (!ArLHS || ArLHS->getLoop() != L)
7732  return false;
7733 
7734  bool Increasing;
7735  if (!isMonotonicPredicate(ArLHS, Pred, Increasing))
7736  return false;
7737 
7738  // If the predicate "ArLHS `Pred` RHS" monotonically increases from false to
7739  // true as the loop iterates, and the backedge is control dependent on
7740  // "ArLHS `Pred` RHS" == true then we can reason as follows:
7741  //
7742  // * if the predicate was false in the first iteration then the predicate
7743  // is never evaluated again, since the loop exits without taking the
7744  // backedge.
7745  // * if the predicate was true in the first iteration then it will
7746  // continue to be true for all future iterations since it is
7747  // monotonically increasing.
7748  //
7749  // For both the above possibilities, we can replace the loop varying
7750  // predicate with its value on the first iteration of the loop (which is
7751  // loop invariant).
7752  //
7753  // A similar reasoning applies for a monotonically decreasing predicate, by
7754  // replacing true with false and false with true in the above two bullets.
7755 
7756  auto P = Increasing ? Pred : ICmpInst::getInversePredicate(Pred);
7757 
7758  if (!isLoopBackedgeGuardedByCond(L, P, LHS, RHS))
7759  return false;
7760 
7761  InvariantPred = Pred;
7762  InvariantLHS = ArLHS->getStart();
7763  InvariantRHS = RHS;
7764  return true;
7765 }
7766 
7767 bool ScalarEvolution::isKnownPredicateViaConstantRanges(
7768  ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) {
7769  if (HasSameValue(LHS, RHS))
7770  return ICmpInst::isTrueWhenEqual(Pred);
7771 
7772  // This code is split out from isKnownPredicate because it is called from
7773  // within isLoopEntryGuardedByCond.
7774 
7775  auto CheckRanges =
7776  [&](const ConstantRange &RangeLHS, const ConstantRange &RangeRHS) {
7777  return ConstantRange::makeSatisfyingICmpRegion(Pred, RangeRHS)
7778  .contains(RangeLHS);
7779  };
7780 
7781  // The check at the top of the function catches the case where the values are
7782  // known to be equal.
7783  if (Pred == CmpInst::ICMP_EQ)
7784  return false;
7785 
7786  if (Pred == CmpInst::ICMP_NE)
7787  return CheckRanges(getSignedRange(LHS), getSignedRange(RHS)) ||
7788  CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS)) ||
7789  isKnownNonZero(getMinusSCEV(LHS, RHS));
7790 
7791  if (CmpInst::isSigned(Pred))
7792  return CheckRanges(getSignedRange(LHS), getSignedRange(RHS));
7793 
7794  return CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS));
7795 }
7796 
7797 bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
7798  const SCEV *LHS,
7799  const SCEV *RHS) {
7800 
7801  // Match Result to (X + Y)<ExpectedFlags> where Y is a constant integer.
7802  // Return Y via OutY.
7803  auto MatchBinaryAddToConst =
7804  [this](const SCEV *Result, const SCEV *X, APInt &OutY,
7805  SCEV::NoWrapFlags ExpectedFlags) {
7806  const SCEV *NonConstOp, *ConstOp;
7807  SCEV::NoWrapFlags FlagsPresent;
7808 
7809  if (!splitBinaryAdd(Result, ConstOp, NonConstOp, FlagsPresent) ||
7810  !isa<SCEVConstant>(ConstOp) || NonConstOp != X)
7811  return false;
7812 
7813  OutY = cast<SCEVConstant>(ConstOp)->getAPInt();
7814  return (FlagsPresent & ExpectedFlags) == ExpectedFlags;
7815  };
7816 
7817  APInt C;
7818 
7819  switch (Pred) {
7820  default:
7821  break;
7822 
7823  case ICmpInst::ICMP_SGE:
7824  std::swap(LHS, RHS);
7825  case ICmpInst::ICMP_SLE:
7826  // X s<= (X + C)<nsw> if C >= 0
7827  if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && C.isNonNegative())
7828  return true;
7829 
7830  // (X + C)<nsw> s<= X if C <= 0
7831  if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) &&
7832  !C.isStrictlyPositive())
7833  return true;
7834  break;
7835 
7836  case ICmpInst::ICMP_SGT:
7837  std::swap(LHS, RHS);
7838  case ICmpInst::ICMP_SLT:
7839  // X s< (X + C)<nsw> if C > 0
7840  if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) &&
7841  C.isStrictlyPositive())
7842  return true;
7843 
7844  // (X + C)<nsw> s< X if C < 0
7845  if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && C.isNegative())
7846  return true;
7847  break;
7848  }
7849 
7850  return false;
7851 }
7852 
7853 bool ScalarEvolution::isKnownPredicateViaSplitting(ICmpInst::Predicate Pred,
7854  const SCEV *LHS,
7855  const SCEV *RHS) {
7856  if (Pred != ICmpInst::ICMP_ULT || ProvingSplitPredicate)
7857  return false;
7858 
7859  // Allowing arbitrary number of activations of isKnownPredicateViaSplitting on
7860  // the stack can result in exponential time complexity.
7861  SaveAndRestore<bool> Restore(ProvingSplitPredicate, true);
7862 
7863  // If L >= 0 then I `ult` L <=> I >= 0 && I `slt` L
7864  //
7865  // To prove L >= 0 we use isKnownNonNegative whereas to prove I >= 0 we use
7866  // isKnownPredicate. isKnownPredicate is more powerful, but also more
7867  // expensive; and using isKnownNonNegative(RHS) is sufficient for most of the
7868  // interesting cases seen in practice. We can consider "upgrading" L >= 0 to
7869  // use isKnownPredicate later if needed.
7870  return isKnownNonNegative(RHS) &&
7873 }
7874 
7875 bool ScalarEvolution::isImpliedViaGuard(BasicBlock *BB,
7876  ICmpInst::Predicate Pred,
7877  const SCEV *LHS, const SCEV *RHS) {
7878  // No need to even try if we know the module has no guards.
7879  if (!HasGuards)
7880  return false;
7881 
7882  return any_of(*BB, [&](Instruction &I) {
7883  using namespace llvm::PatternMatch;
7884 
7885  Value *Condition;
7886  return match(&I, m_Intrinsic<Intrinsic::experimental_guard>(
7887  m_Value(Condition))) &&
7888  isImpliedCond(Pred, LHS, RHS, Condition, false);
7889  });
7890 }
7891 
7892 /// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
7893 /// protected by a conditional between LHS and RHS. This is used to
7894 /// to eliminate casts.
7895 bool
7897  ICmpInst::Predicate Pred,
7898  const SCEV *LHS, const SCEV *RHS) {
7899  // Interpret a null as meaning no loop, where there is obviously no guard
7900  // (interprocedural conditions notwithstanding).
7901  if (!L) return true;
7902 
7903  if (isKnownPredicateViaConstantRanges(Pred, LHS, RHS))
7904  return true;
7905 
7906  BasicBlock *Latch = L->getLoopLatch();
7907  if (!Latch)
7908  return false;
7909 
7910  BranchInst *LoopContinuePredicate =
7911  dyn_cast<BranchInst>(Latch->getTerminator());
7912  if (LoopContinuePredicate && LoopContinuePredicate->isConditional() &&
7913  isImpliedCond(Pred, LHS, RHS,
7914  LoopContinuePredicate->getCondition(),
7915  LoopContinuePredicate->getSuccessor(0) != L->getHeader()))
7916  return true;
7917 
7918  // We don't want more than one activation of the following loops on the stack
7919  // -- that can lead to O(n!) time complexity.
7920  if (WalkingBEDominatingConds)
7921  return false;
7922 
7923  SaveAndRestore<bool> ClearOnExit(WalkingBEDominatingConds, true);
7924 
7925  // See if we can exploit a trip count to prove the predicate.
7926  const auto &BETakenInfo = getBackedgeTakenInfo(L);
7927  const SCEV *LatchBECount = BETakenInfo.getExact(Latch, this);
7928  if (LatchBECount != getCouldNotCompute()) {
7929  // We know that Latch branches back to the loop header exactly
7930  // LatchBECount times. This means the backdege condition at Latch is
7931  // equivalent to "{0,+,1} u< LatchBECount".
7932  Type *Ty = LatchBECount->getType();
7933  auto NoWrapFlags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNW);
7934  const SCEV *LoopCounter =
7935  getAddRecExpr(getZero(Ty), getOne(Ty), L, NoWrapFlags);
7936  if (isImpliedCond(Pred, LHS, RHS, ICmpInst::ICMP_ULT, LoopCounter,
7937  LatchBECount))
7938  return true;
7939  }
7940 
7941  // Check conditions due to any @llvm.assume intrinsics.
7942  for (auto &AssumeVH : AC.assumptions()) {
7943  if (!AssumeVH)
7944  continue;
7945  auto *CI = cast<CallInst>(AssumeVH);
7946  if (!DT.dominates(CI, Latch->getTerminator()))
7947  continue;
7948 
7949  if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
7950  return true;
7951  }
7952 
7953  // If the loop is not reachable from the entry block, we risk running into an
7954  // infinite loop as we walk up into the dom tree. These loops do not matter
7955  // anyway, so we just return a conservative answer when we see them.
7956  if (!DT.isReachableFromEntry(L->getHeader()))
7957  return false;
7958 
7959  if (isImpliedViaGuard(Latch, Pred, LHS, RHS))
7960  return true;
7961 
7962  for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()];
7963  DTN != HeaderDTN; DTN = DTN->getIDom()) {
7964 
7965  assert(DTN && "should reach the loop header before reaching the root!");
7966 
7967  BasicBlock *BB = DTN->getBlock();
7968  if (isImpliedViaGuard(BB, Pred, LHS, RHS))
7969  return true;
7970 
7971  BasicBlock *PBB = BB->getSinglePredecessor();
7972  if (!PBB)
7973  continue;
7974 
7975  BranchInst *ContinuePredicate = dyn_cast<BranchInst>(PBB->getTerminator());
7976  if (!ContinuePredicate || !ContinuePredicate->isConditional())
7977  continue;
7978 
7979  Value *Condition = ContinuePredicate->getCondition();
7980 
7981  // If we have an edge `E` within the loop body that dominates the only
7982  // latch, the condition guarding `E` also guards the backedge. This
7983  // reasoning works only for loops with a single latch.
7984 
7985  BasicBlockEdge DominatingEdge(PBB, BB);
7986  if (DominatingEdge.isSingleEdge()) {
7987  // We're constructively (and conservatively) enumerating edges within the
7988  // loop body that dominate the latch. The dominator tree better agree
7989  // with us on this:
7990  assert(DT.dominates(DominatingEdge, Latch) && "should be!");
7991 
7992  if (isImpliedCond(Pred, LHS, RHS, Condition,
7993  BB != ContinuePredicate->getSuccessor(0)))
7994  return true;
7995  }
7996  }
7997 
7998  return false;
7999 }
8000 
8001 bool
8003  ICmpInst::Predicate Pred,
8004  const SCEV *LHS, const SCEV *RHS) {
8005  // Interpret a null as meaning no loop, where there is obviously no guard
8006  // (interprocedural conditions notwithstanding).
8007  if (!L) return false;
8008 
8009  if (isKnownPredicateViaConstantRanges(Pred, LHS, RHS))
8010  return true;
8011 
8012  // Starting at the loop predecessor, climb up the predecessor chain, as long
8013  // as there are predecessors that can be found that have unique successors
8014  // leading to the original header.
8015  for (std::pair<BasicBlock *, BasicBlock *>
8016  Pair(L->getLoopPredecessor(), L->getHeader());
8017  Pair.first;
8018  Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
8019 
8020  if (isImpliedViaGuard(Pair.first, Pred, LHS, RHS))
8021  return true;
8022 
8023  BranchInst *LoopEntryPredicate =
8024  dyn_cast<BranchInst>(Pair.first->getTerminator());
8025  if (!LoopEntryPredicate ||
8026  LoopEntryPredicate->isUnconditional())
8027  continue;
8028 
8029  if (isImpliedCond(Pred, LHS, RHS,
8030  LoopEntryPredicate->getCondition(),
8031  LoopEntryPredicate->getSuccessor(0) != Pair.second))
8032  return true;
8033  }
8034 
8035  // Check conditions due to any @llvm.assume intrinsics.
8036  for (auto &AssumeVH : AC.assumptions()) {
8037  if (!AssumeVH)
8038  continue;
8039  auto *CI = cast<CallInst>(AssumeVH);
8040  if (!DT.dominates(CI, L->getHeader()))
8041  continue;
8042 
8043  if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
8044  return true;
8045  }
8046 
8047  return false;
8048 }
8049 
8050 bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
8051  const SCEV *LHS, const SCEV *RHS,
8052  Value *FoundCondValue,
8053  bool Inverse) {
8054  if (!PendingLoopPredicates.insert(FoundCondValue).second)
8055  return false;
8056 
8057  auto ClearOnExit =
8058  make_scope_exit([&]() { PendingLoopPredicates.erase(FoundCondValue); });
8059 
8060  // Recursively handle And and Or conditions.
8061  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) {
8062  if (BO->getOpcode() == Instruction::And) {
8063  if (!Inverse)
8064  return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
8065  isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
8066  } else if (BO->getOpcode() == Instruction::Or) {
8067  if (Inverse)
8068  return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
8069  isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
8070  }
8071  }
8072 
8073  ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
8074  if (!ICI) return false;
8075 
8076  // Now that we found a conditional branch that dominates the loop or controls
8077  // the loop latch. Check to see if it is the comparison we are looking for.
8078  ICmpInst::Predicate FoundPred;
8079  if (Inverse)
8080  FoundPred = ICI->getInversePredicate();
8081  else
8082  FoundPred = ICI->getPredicate();
8083 
8084  const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
8085  const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
8086 
8087  return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS);
8088 }
8089 
8090 bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
8091  const SCEV *RHS,
8092  ICmpInst::Predicate FoundPred,
8093  const SCEV *FoundLHS,
8094  const SCEV *FoundRHS) {
8095  // Balance the types.
8096  if (getTypeSizeInBits(LHS->getType()) <
8097  getTypeSizeInBits(FoundLHS->getType())) {
8098  if (CmpInst::isSigned(Pred)) {
8099  LHS = getSignExtendExpr(LHS, FoundLHS->getType());
8100  RHS = getSignExtendExpr(RHS, FoundLHS->getType());
8101  } else {
8102  LHS = getZeroExtendExpr(LHS, FoundLHS->getType());
8103  RHS = getZeroExtendExpr(RHS, FoundLHS->getType());
8104  }
8105  } else if (getTypeSizeInBits(LHS->getType()) >
8106  getTypeSizeInBits(FoundLHS->getType())) {
8107  if (CmpInst::isSigned(FoundPred)) {
8108  FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
8109  FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
8110  } else {
8111  FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType());
8112  FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType());
8113  }
8114  }
8115 
8116  // Canonicalize the query to match the way instcombine will have
8117  // canonicalized the comparison.
8118  if (SimplifyICmpOperands(Pred, LHS, RHS))
8119  if (LHS == RHS)
8120  return CmpInst::isTrueWhenEqual(Pred);
8121  if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS))
8122  if (FoundLHS == FoundRHS)
8123  return CmpInst::isFalseWhenEqual(FoundPred);
8124 
8125  // Check to see if we can make the LHS or RHS match.
8126  if (LHS == FoundRHS || RHS == FoundLHS) {
8127  if (isa<SCEVConstant>(RHS)) {
8128  std::swap(FoundLHS, FoundRHS);
8129  FoundPred = ICmpInst::getSwappedPredicate(FoundPred);
8130  } else {
8131  std::swap(LHS, RHS);
8132  Pred = ICmpInst::getSwappedPredicate(Pred);
8133  }
8134  }
8135 
8136  // Check whether the found predicate is the same as the desired predicate.
8137  if (FoundPred == Pred)
8138  return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS);
8139 
8140  // Check whether swapping the found predicate makes it the same as the
8141  // desired predicate.
8142  if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) {
8143  if (isa<SCEVConstant>(RHS))
8144  return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS);
8145  else
8146  return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred),
8147  RHS, LHS, FoundLHS, FoundRHS);
8148  }
8149 
8150  // Unsigned comparison is the same as signed comparison when both the operands
8151  // are non-negative.
8152  if (CmpInst::isUnsigned(FoundPred) &&
8153  CmpInst::getSignedPredicate(FoundPred) == Pred &&
8154  isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS))
8155  return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS);
8156 
8157  // Check if we can make progress by sharpening ranges.
8158  if (FoundPred == ICmpInst::ICMP_NE &&
8159  (isa<SCEVConstant>(FoundLHS) || isa<SCEVConstant>(FoundRHS))) {
8160 
8161  const SCEVConstant *C = nullptr;
8162  const SCEV *V = nullptr;
8163 
8164  if (isa<SCEVConstant>(FoundLHS)) {
8165  C = cast<SCEVConstant>(FoundLHS);
8166  V = FoundRHS;
8167  } else {
8168  C = cast<SCEVConstant>(FoundRHS);
8169  V = FoundLHS;
8170  }
8171 
8172  // The guarding predicate tells us that C != V. If the known range
8173  // of V is [C, t), we can sharpen the range to [C + 1, t). The
8174  // range we consider has to correspond to same signedness as the
8175  // predicate we're interested in folding.
8176 
8177  APInt Min = ICmpInst::isSigned(Pred) ?
8179 
8180  if (Min == C->getAPInt()) {
8181  // Given (V >= Min && V != Min) we conclude V >= (Min + 1).
8182  // This is true even if (Min + 1) wraps around -- in case of
8183  // wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)).
8184 
8185  APInt SharperMin = Min + 1;
8186 
8187  switch (Pred) {
8188  case ICmpInst::ICMP_SGE:
8189  case ICmpInst::ICMP_UGE:
8190  // We know V `Pred` SharperMin. If this implies LHS `Pred`
8191  // RHS, we're done.
8192  if (isImpliedCondOperands(Pred, LHS, RHS, V,
8193  getConstant(SharperMin)))
8194  return true;
8195 
8196  case ICmpInst::ICMP_SGT:
8197  case ICmpInst::ICMP_UGT:
8198  // We know from the range information that (V `Pred` Min ||
8199  // V == Min). We know from the guarding condition that !(V
8200  // == Min). This gives us
8201  //
8202  // V `Pred` Min || V == Min && !(V == Min)
8203  // => V `Pred` Min
8204  //
8205  // If V `Pred` Min implies LHS `Pred` RHS, we're done.
8206 
8207  if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min)))
8208  return true;
8209 
8210  default:
8211  // No change
8212  break;
8213  }
8214  }
8215  }
8216 
8217  // Check whether the actual condition is beyond sufficient.
8218  if (FoundPred == ICmpInst::ICMP_EQ)
8219  if (ICmpInst::isTrueWhenEqual(Pred))
8220  if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS))
8221  return true;
8222  if (Pred == ICmpInst::ICMP_NE)
8223  if (!ICmpInst::isTrueWhenEqual(FoundPred))
8224  if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS))
8225  return true;
8226 
8227  // Otherwise assume the worst.
8228  return false;
8229 }
8230 
8231 bool ScalarEvolution::splitBinaryAdd(const SCEV *Expr,
8232  const SCEV *&L, const SCEV *&R,
8233  SCEV::NoWrapFlags &Flags) {
8234  const auto *AE = dyn_cast<SCEVAddExpr>(Expr);
8235  if (!AE || AE->getNumOperands() != 2)
8236  return false;
8237 
8238  L = AE->getOperand(0);
8239  R = AE->getOperand(1);
8240  Flags = AE->getNoWrapFlags();
8241  return true;
8242 }
8243 
8244 Optional<APInt> ScalarEvolution::computeConstantDifference(const SCEV *More,
8245  const SCEV *Less) {
8246  // We avoid subtracting expressions here because this function is usually
8247  // fairly deep in the call stack (i.e. is called many times).
8248 
8249  if (isa<SCEVAddRecExpr>(Less) && isa<SCEVAddRecExpr>(More)) {
8250  const auto *LAR = cast<SCEVAddRecExpr>(Less);
8251  const auto *MAR = cast<SCEVAddRecExpr>(More);
8252 
8253  if (LAR->getLoop() != MAR->getLoop())
8254  return None;
8255 
8256  // We look at affine expressions only; not for correctness but to keep
8257  // getStepRecurrence cheap.
8258  if (!LAR->isAffine() || !MAR->isAffine())
8259  return None;
8260 
8261  if (LAR->getStepRecurrence(*this) != MAR->getStepRecurrence(*this))
8262  return None;
8263 
8264  Less = LAR->getStart();
8265  More = MAR->getStart();
8266 
8267  // fall through
8268  }
8269 
8270  if (isa<SCEVConstant>(Less) && isa<SCEVConstant>(More)) {
8271  const auto &M = cast<SCEVConstant>(More)->getAPInt();
8272  const auto &L = cast<SCEVConstant>(Less)->getAPInt();
8273  return M - L;
8274  }
8275 
8276  const SCEV *L, *R;
8278  if (splitBinaryAdd(Less, L, R, Flags))
8279  if (const auto *LC = dyn_cast<SCEVConstant>(L))
8280  if (R == More)
8281  return -(LC->getAPInt());
8282 
8283  if (splitBinaryAdd(More, L, R, Flags))
8284  if (const auto *LC = dyn_cast<SCEVConstant>(L))
8285  if (R == Less)
8286  return LC->getAPInt();
8287 
8288  return None;
8289 }
8290 
8291 bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow(
8292  ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
8293  const SCEV *FoundLHS, const SCEV *FoundRHS) {
8294  if (Pred != CmpInst::ICMP_SLT && Pred != CmpInst::ICMP_ULT)
8295  return false;
8296 
8297  const auto *AddRecLHS = dyn_cast<SCEVAddRecExpr>(LHS);
8298  if (!AddRecLHS)
8299  return false;
8300 
8301  const auto *AddRecFoundLHS = dyn_cast<SCEVAddRecExpr>(FoundLHS);
8302  if (!AddRecFoundLHS)
8303  return false;
8304 
8305  // We'd like to let SCEV reason about control dependencies, so we constrain
8306  // both the inequalities to be about add recurrences on the same loop. This
8307  // way we can use isLoopEntryGuardedByCond later.
8308 
8309  const Loop *L = AddRecFoundLHS->getLoop();
8310  if (L != AddRecLHS->getLoop())
8311  return false;
8312 
8313  // FoundLHS u< FoundRHS u< -C => (FoundLHS + C) u< (FoundRHS + C) ... (1)
8314  //
8315  // FoundLHS s< FoundRHS s< INT_MIN - C => (FoundLHS + C) s< (FoundRHS + C)
8316  // ... (2)
8317  //
8318  // Informal proof for (2), assuming (1) [*]:
8319  //
8320  // We'll also assume (A s< B) <=> ((A + INT_MIN) u< (B + INT_MIN)) ... (3)[**]
8321  //
8322  // Then
8323  //
8324  // FoundLHS s< FoundRHS s< INT_MIN - C
8325  // <=> (FoundLHS + INT_MIN) u< (FoundRHS + INT_MIN) u< -C [ using (3) ]
8326  // <=> (FoundLHS + INT_MIN + C) u< (FoundRHS + INT_MIN + C) [ using (1) ]
8327  // <=> (FoundLHS + INT_MIN + C + INT_MIN) s<
8328  // (FoundRHS + INT_MIN + C + INT_MIN) [ using (3) ]
8329  // <=> FoundLHS + C s< FoundRHS + C
8330  //
8331  // [*]: (1) can be proved by ruling out overflow.
8332  //
8333  // [**]: This can be proved by analyzing all the four possibilities:
8334  // (A s< 0, B s< 0), (A s< 0, B s>= 0), (A s>= 0, B s< 0) and
8335  // (A s>= 0, B s>= 0).
8336  //
8337  // Note:
8338  // Despite (2), "FoundRHS s< INT_MIN - C" does not mean that "FoundRHS + C"
8339  // will not sign underflow. For instance, say FoundLHS = (i8 -128), FoundRHS
8340  // = (i8 -127) and C = (i8 -100). Then INT_MIN - C = (i8 -28), and FoundRHS
8341  // s< (INT_MIN - C). Lack of sign overflow / underflow in "FoundRHS + C" is
8342  // neither necessary nor sufficient to prove "(FoundLHS + C) s< (FoundRHS +
8343  // C)".
8344 
8345  Optional<APInt> LDiff = computeConstantDifference(LHS, FoundLHS);
8346  Optional<APInt> RDiff = computeConstantDifference(RHS, FoundRHS);
8347  if (!LDiff || !RDiff || *LDiff != *RDiff)
8348  return false;
8349 
8350  if (LDiff->isMinValue())
8351  return true;
8352 
8353  APInt FoundRHSLimit;
8354 
8355  if (Pred == CmpInst::ICMP_ULT) {
8356  FoundRHSLimit = -(*RDiff);
8357  } else {
8358  assert(Pred == CmpInst::ICMP_SLT && "Checked above!");
8359  FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - *RDiff;
8360  }
8361 
8362  // Try to prove (1) or (2), as needed.
8363  return isLoopEntryGuardedByCond(L, Pred, FoundRHS,
8364  getConstant(FoundRHSLimit));
8365 }
8366 
8367 bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
8368  const SCEV *LHS, const SCEV *RHS,
8369  const SCEV *FoundLHS,
8370  const SCEV *FoundRHS) {
8371  if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS))
8372  return true;
8373 
8374  if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS))
8375  return true;
8376 
8377  return isImpliedCondOperandsHelper(Pred, LHS, RHS,
8378  FoundLHS, FoundRHS) ||
8379  // ~x < ~y --> x > y
8380  isImpliedCondOperandsHelper(Pred, LHS, RHS,
8381  getNotSCEV(FoundRHS),
8382  getNotSCEV(FoundLHS));
8383 }
8384 
8385 
8386 /// If Expr computes ~A, return A else return nullptr
8387 static const SCEV *MatchNotExpr(const SCEV *Expr) {
8388  const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
8389  if (!Add || Add->getNumOperands() != 2 ||
8390  !Add->getOperand(0)->isAllOnesValue())
8391  return nullptr;
8392 
8393  const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
8394  if (!AddRHS || AddRHS->getNumOperands() != 2 ||
8395  !AddRHS->getOperand(0)->isAllOnesValue())
8396  return nullptr;
8397 
8398  return AddRHS->getOperand(1);
8399 }
8400 
8401 
8402 /// Is MaybeMaxExpr an SMax or UMax of Candidate and some other values?
8403 template<typename MaxExprType>
8404 static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr,
8405  const SCEV *Candidate) {
8406  const MaxExprType *MaxExpr = dyn_cast<MaxExprType>(MaybeMaxExpr);
8407  if (!MaxExpr) return false;
8408 
8409  return find(MaxExpr->operands(), Candidate) != MaxExpr->op_end();
8410 }
8411 
8412 
8413 /// Is MaybeMinExpr an SMin or UMin of Candidate and some other values?
8414 template<typename MaxExprType>
8416  const SCEV *MaybeMinExpr,
8417  const SCEV *Candidate) {
8418  const SCEV *MaybeMaxExpr = MatchNotExpr(MaybeMinExpr);
8419  if (!MaybeMaxExpr)
8420  return false;
8421 
8422  return IsMaxConsistingOf<MaxExprType>(MaybeMaxExpr, SE.getNotSCEV(Candidate));
8423 }
8424 
8426  ICmpInst::Predicate Pred,
8427  const SCEV *LHS, const SCEV *RHS) {
8428 
8429  // If both sides are affine addrecs for the same loop, with equal
8430  // steps, and we know the recurrences don't wrap, then we only
8431  // need to check the predicate on the starting values.
8432 
8433  if (!ICmpInst::isRelational(Pred))
8434  return false;
8435 
8436  const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS);
8437  if (!LAR)
8438  return false;
8439  const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
8440  if (!RAR)
8441  return false;
8442  if (LAR->getLoop() != RAR->getLoop())
8443  return false;
8444  if (!LAR->isAffine() || !RAR->isAffine())
8445  return false;
8446 
8447  if (LAR->getStepRecurrence(SE) != RAR->getStepRecurrence(SE))
8448  return false;
8449 
8452  if (!LAR->getNoWrapFlags(NW) || !RAR->getNoWrapFlags(NW))
8453  return false;
8454 
8455  return SE.isKnownPredicate(Pred, LAR->getStart(), RAR->getStart());
8456 }
8457 
8458 /// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max
8459 /// expression?
8461  ICmpInst::Predicate Pred,
8462  const SCEV *LHS, const SCEV *RHS) {
8463  switch (Pred) {
8464  default:
8465  return false;
8466 
8467  case ICmpInst::ICMP_SGE:
8468  std::swap(LHS, RHS);
8470  case ICmpInst::ICMP_SLE:
8471  return
8472  // min(A, ...) <= A
8473  IsMinConsistingOf<SCEVSMaxExpr>(SE, LHS, RHS) ||
8474  // A <= max(A, ...)
8475  IsMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS);
8476 
8477  case ICmpInst::ICMP_UGE:
8478  std::swap(LHS, RHS);
8480  case ICmpInst::ICMP_ULE:
8481  return
8482  // min(A, ...) <= A
8483  IsMinConsistingOf<SCEVUMaxExpr>(SE, LHS, RHS) ||
8484  // A <= max(A, ...)
8485  IsMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS);
8486  }
8487 
8488  llvm_unreachable("covered switch fell through?!");
8489 }
8490 
8491 bool
8492 ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
8493  const SCEV *LHS, const SCEV *RHS,
8494  const SCEV *FoundLHS,
8495  const SCEV *FoundRHS) {
8496  auto IsKnownPredicateFull =
8497  [this](ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) {
8498  return isKnownPredicateViaConstantRanges(Pred, LHS, RHS) ||
8499  IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) ||
8500  IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) ||
8501  isKnownPredicateViaNoOverflow(Pred, LHS, RHS);
8502  };
8503 
8504  switch (Pred) {
8505  default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
8506  case ICmpInst::ICMP_EQ:
8507  case ICmpInst::ICMP_NE:
8508  if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS))
8509  return true;
8510  break;
8511  case ICmpInst::ICMP_SLT:
8512  case ICmpInst::ICMP_SLE:
8513  if (IsKnownPredicateFull(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
8514  IsKnownPredicateFull(ICmpInst::ICMP_SGE, RHS, FoundRHS))
8515  return true;
8516  break;
8517  case ICmpInst::ICMP_SGT:
8518  case ICmpInst::ICMP_SGE:
8519  if (IsKnownPredicateFull(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
8520  IsKnownPredicateFull(ICmpInst::ICMP_SLE, RHS, FoundRHS))
8521  return true;
8522  break;
8523  case ICmpInst::ICMP_ULT:
8524  case ICmpInst::ICMP_ULE:
8525  if (IsKnownPredicateFull(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
8526  IsKnownPredicateFull(ICmpInst::ICMP_UGE, RHS, FoundRHS))
8527  return true;
8528  break;
8529  case ICmpInst::ICMP_UGT:
8530  case ICmpInst::ICMP_UGE:
8531  if (IsKnownPredicateFull(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
8532  IsKnownPredicateFull(ICmpInst::ICMP_ULE, RHS, FoundRHS))
8533  return true;
8534  break;
8535  }
8536 
8537  return false;
8538 }
8539 
8540 bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
8541  const SCEV *LHS,
8542  const SCEV *RHS,
8543  const SCEV *FoundLHS,
8544  const SCEV *FoundRHS) {
8545  if (!isa<SCEVConstant>(RHS) || !isa<SCEVConstant>(FoundRHS))
8546  // The restriction on `FoundRHS` be lifted easily -- it exists only to
8547  // reduce the compile time impact of this optimization.
8548  return false;
8549 
8550  Optional<APInt> Addend = computeConstantDifference(LHS, FoundLHS);
8551  if (!Addend)
8552  return false;
8553 
8554  APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getAPInt();
8555 
8556  // `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the
8557  // antecedent "`FoundLHS` `Pred` `FoundRHS`".
8558  ConstantRange FoundLHSRange =
8559  ConstantRange::makeAllowedICmpRegion(Pred, ConstFoundRHS);
8560 
8561  // Since `LHS` is `FoundLHS` + `Addend`, we can compute a range for `LHS`:
8562  ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(*Addend));
8563 
8564  // We can also compute the range of values for `LHS` that satisfy the
8565  // consequent, "`LHS` `Pred` `RHS`":
8566  APInt ConstRHS = cast<SCEVConstant>(RHS)->getAPInt();
8567  ConstantRange SatisfyingLHSRange =
8569 
8570  // The antecedent implies the consequent if every value of `LHS` that
8571  // satisfies the antecedent also satisfies the consequent.
8572  return SatisfyingLHSRange.contains(LHSRange);
8573 }
8574 
8575 bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
8576  bool IsSigned, bool NoWrap) {
8577  assert(isKnownPositive(Stride) && "Positive stride expected!");
8578 
8579  if (NoWrap) return false;
8580 
8581  unsigned BitWidth = getTypeSizeInBits(RHS->getType());
8582  const SCEV *One = getOne(Stride->getType());
8583 
8584  if (IsSigned) {
8585  APInt MaxRHS = getSignedRange(RHS).getSignedMax();
8586  APInt MaxValue = APInt::getSignedMaxValue(BitWidth);
8587  APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
8588  .getSignedMax();
8589 
8590  // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow!
8591  return (MaxValue - MaxStrideMinusOne).slt(MaxRHS);
8592  }
8593 
8594  APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax();
8595  APInt MaxValue = APInt::getMaxValue(BitWidth);
8596  APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
8597  .getUnsignedMax();
8598 
8599  // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow!
8600  return (MaxValue - MaxStrideMinusOne).ult(MaxRHS);
8601 }
8602 
8603 bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
8604  bool IsSigned, bool NoWrap) {
8605  if (NoWrap) return false;
8606 
8607  unsigned BitWidth = getTypeSizeInBits(RHS->getType());
8608  const SCEV *One = getOne(Stride->getType());
8609 
8610  if (IsSigned) {
8611  APInt MinRHS = getSignedRange(RHS).getSignedMin();
8612  APInt MinValue = APInt::getSignedMinValue(BitWidth);
8613  APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
8614  .getSignedMax();
8615 
8616  // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow!
8617  return (MinValue + MaxStrideMinusOne).sgt(MinRHS);
8618  }
8619 
8620  APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin();
8621  APInt MinValue = APInt::getMinValue(BitWidth);
8622  APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
8623  .getUnsignedMax();
8624 
8625  // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow!
8626  return (MinValue + MaxStrideMinusOne).ugt(MinRHS);
8627 }
8628 
8629 const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
8630  bool Equality) {
8631  const SCEV *One = getOne(Step->getType());
8632  Delta = Equality ? getAddExpr(Delta, Step)
8633  : getAddExpr(Delta, getMinusSCEV(Step, One));
8634  return getUDivExpr(Delta, Step);
8635 }
8636 
8637 ScalarEvolution::ExitLimit
8638 ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
8639  const Loop *L, bool IsSigned,
8640  bool ControlsExit, bool AllowPredicates) {
8642  // We handle only IV < Invariant
8643  if (!isLoopInvariant(RHS, L))
8644  return getCouldNotCompute();
8645 
8646  const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
8647  bool PredicatedIV = false;
8648 
8649  if (!IV && AllowPredicates) {
8650  // Try to make this an AddRec using runtime tests, in the first X
8651  // iterations of this loop, where X is the SCEV expression found by the
8652  // algorithm below.
8653  IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates);
8654  PredicatedIV = true;
8655  }
8656 
8657  // Avoid weird loops
8658  if (!IV || IV->getLoop() != L || !IV->isAffine())
8659  return getCouldNotCompute();
8660 
8661  bool NoWrap = ControlsExit &&
8662  IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
8663 
8664  const SCEV *Stride = IV->getStepRecurrence(*this);
8665 
8666  bool PositiveStride = isKnownPositive(Stride);
8667 
8668  // Avoid negative or zero stride values.
8669  if (!PositiveStride) {
8670  // We can compute the correct backedge taken count for loops with unknown
8671  // strides if we can prove that the loop is not an infinite loop with side
8672  // effects. Here's the loop structure we are trying to handle -
8673  //
8674  // i = start
8675  // do {
8676  // A[i] = i;
8677  // i += s;
8678  // } while (i < end);
8679  //
8680  // The backedge taken count for such loops is evaluated as -
8681  // (max(end, start + stride) - start - 1) /u stride
8682  //
8683  // The additional preconditions that we need to check to prove correctness
8684  // of the above formula is as follows -
8685  //
8686  // a) IV is either nuw or nsw depending upon signedness (indicated by the
8687  // NoWrap flag).
8688  // b) loop is single exit with no side effects.
8689  //
8690  //
8691  // Precondition a) implies that if the stride is negative, this is a single
8692  // trip loop. The backedge taken count formula reduces to zero in this case.
8693  //
8694  // Precondition b) implies that the unknown stride cannot be zero otherwise
8695  // we have UB.
8696  //
8697  // The positive stride case is the same as isKnownPositive(Stride) returning
8698  // true (original behavior of the function).
8699  //
8700  // We want to make sure that the stride is truly unknown as there are edge
8701  // cases where ScalarEvolution propagates no wrap flags to the
8702  // post-increment/decrement IV even though the increment/decrement operation
8703  // itself is wrapping. The computed backedge taken count may be wrong in
8704  // such cases. This is prevented by checking that the stride is not known to
8705  // be either positive or non-positive. For example, no wrap flags are
8706  // propagated to the post-increment IV of this loop with a trip count of 2 -
8707  //
8708  // unsigned char i;
8709  // for(i=127; i<128; i+=129)
8710  // A[i] = i;
8711  //
8712  if (PredicatedIV || !NoWrap || isKnownNonPositive(Stride) ||
8713  !loopHasNoSideEffects(L))
8714  return getCouldNotCompute();
8715 
8716  } else if (!Stride->isOne() &&
8717  doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
8718  // Avoid proven overflow cases: this will ensure that the backedge taken
8719  // count will not generate any unsigned overflow. Relaxed no-overflow
8720  // conditions exploit NoWrapFlags, allowing to optimize in presence of
8721  // undefined behaviors like the case of C language.
8722  return getCouldNotCompute();
8723 
8724  ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT
8726  const SCEV *Start = IV->getStart();
8727  const SCEV *End = RHS;
8728  // If the backedge is taken at least once, then it will be taken
8729  // (End-Start)/Stride times (rounded up to a multiple of Stride), where Start
8730  // is the LHS value of the less-than comparison the first time it is evaluated
8731  // and End is the RHS.
8732  const SCEV *BECountIfBackedgeTaken =
8733  computeBECount(getMinusSCEV(End, Start), Stride, false);
8734  // If the loop entry is guarded by the result of the backedge test of the
8735  // first loop iteration, then we know the backedge will be taken at least
8736  // once and so the backedge taken count is as above. If not then we use the
8737  // expression (max(End,Start)-Start)/Stride to describe the backedge count,
8738  // as if the backedge is taken at least once max(End,Start) is End and so the
8739  // result is as above, and if not max(End,Start) is Start so we get a backedge
8740  // count of zero.
8741  const SCEV *BECount;
8742  if (isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS))
8743  BECount = BECountIfBackedgeTaken;
8744  else {
8745  End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start);
8746  BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
8747  }
8748 
8749  const SCEV *MaxBECount;
8750  bool MaxOrZero = false;
8751  if (isa<SCEVConstant>(BECount))
8752  MaxBECount = BECount;
8753  else if (isa<SCEVConstant>(BECountIfBackedgeTaken)) {
8754  // If we know exactly how many times the backedge will be taken if it's
8755  // taken at least once, then the backedge count will either be that or
8756  // zero.
8757  MaxBECount = BECountIfBackedgeTaken;
8758  MaxOrZero = true;
8759  } else {
8760  // Calculate the maximum backedge count based on the range of values
8761  // permitted by Start, End, and Stride.
8762  APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
8763  : getUnsignedRange(Start).getUnsignedMin();
8764 
8765  unsigned BitWidth = getTypeSizeInBits(LHS->getType());
8766 
8767  APInt StrideForMaxBECount;
8768 
8769  if (PositiveStride)
8770  StrideForMaxBECount =
8771  IsSigned ? getSignedRange(Stride).getSignedMin()
8772  : getUnsignedRange(Stride).getUnsignedMin();
8773  else
8774  // Using a stride of 1 is safe when computing max backedge taken count for
8775  // a loop with unknown stride.
8776  StrideForMaxBECount = APInt(BitWidth, 1, IsSigned);
8777 
8778  APInt Limit =
8779  IsSigned ? APInt::getSignedMaxValue(BitWidth) - (StrideForMaxBECount - 1)
8780  : APInt::getMaxValue(BitWidth) - (StrideForMaxBECount - 1);
8781 
8782  // Although End can be a MAX expression we estimate MaxEnd considering only
8783  // the case End = RHS. This is safe because in the other case (End - Start)
8784  // is zero, leading to a zero maximum backedge taken count.
8785  APInt MaxEnd =
8786  IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
8787  : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
8788 
8789  MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
8790  getConstant(StrideForMaxBECount), false);
8791  }
8792 
8793  if (isa<SCEVCouldNotCompute>(MaxBECount))
8794  MaxBECount = BECount;
8795 
8796  return ExitLimit(BECount, MaxBECount, MaxOrZero, Predicates);
8797 }
8798 
8799 ScalarEvolution::ExitLimit
8800 ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
8801  const Loop *L, bool IsSigned,
8802  bool ControlsExit, bool AllowPredicates) {
8804  // We handle only IV > Invariant
8805  if (!isLoopInvariant(RHS, L))
8806  return getCouldNotCompute();
8807 
8808  const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
8809  if (!IV && AllowPredicates)
8810  // Try to make this an AddRec using runtime tests, in the first X
8811  // iterations of this loop, where X is the SCEV expression found by the
8812  // algorithm below.
8813  IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates);
8814 
8815  // Avoid weird loops
8816  if (!IV || IV->getLoop() != L || !IV->isAffine())
8817  return getCouldNotCompute();
8818 
8819  bool NoWrap = ControlsExit &&
8820  IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
8821 
8822  const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this));
8823 
8824  // Avoid negative or zero stride values
8825  if (!isKnownPositive(Stride))
8826  return getCouldNotCompute();
8827 
8828  // Avoid proven overflow cases: this will ensure that the backedge taken count
8829  // will not generate any unsigned overflow. Relaxed no-overflow conditions
8830  // exploit NoWrapFlags, allowing to optimize in presence of undefined
8831  // behaviors like the case of C language.
8832  if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap))
8833  return getCouldNotCompute();
8834 
8835  ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT
8837 
8838  const SCEV *Start = IV->getStart();
8839  const SCEV *End = RHS;
8840  if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS))
8841  End = IsSigned ? getSMinExpr(RHS, Start) : getUMinExpr(RHS, Start);
8842 
8843  const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false);
8844 
8845  APInt MaxStart = IsSigned ? getSignedRange(Start).getSignedMax()
8846  : getUnsignedRange(Start).getUnsignedMax();
8847 
8848  APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
8849  : getUnsignedRange(Stride).getUnsignedMin();
8850 
8851  unsigned BitWidth = getTypeSizeInBits(LHS->getType());
8852  APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1)
8853  : APInt::getMinValue(BitWidth) + (MinStride - 1);
8854 
8855  // Although End can be a MIN expression we estimate MinEnd considering only
8856  // the case End = RHS. This is safe because in the other case (Start - End)
8857  // is zero, leading to a zero maximum backedge taken count.
8858  APInt MinEnd =
8859  IsSigned ? APIntOps::smax(getSignedRange(RHS).getSignedMin(), Limit)
8860  : APIntOps::umax(getUnsignedRange(RHS).getUnsignedMin(), Limit);
8861 
8862 
8863  const SCEV *MaxBECount = getCouldNotCompute();
8864  if (isa<SCEVConstant>(BECount))
8865  MaxBECount = BECount;
8866  else
8867  MaxBECount = computeBECount(getConstant(MaxStart - MinEnd),
8868  getConstant(MinStride), false);
8869 
8870  if (isa<SCEVCouldNotCompute>(MaxBECount))
8871  MaxBECount = BECount;
8872 
8873  return ExitLimit(BECount, MaxBECount, false, Predicates);
8874 }
8875 
8877  ScalarEvolution &SE) const {
8878  if (Range.isFullSet()) // Infinite loop.
8879  return SE.getCouldNotCompute();
8880 
8881  // If the start is a non-zero constant, shift the range to simplify things.
8882  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
8883  if (!SC->getValue()->isZero()) {
8884  SmallVector<const SCEV *, 4> Operands(op_begin(), op_end());
8885  Operands[0] = SE.getZero(SC->getType());
8886  const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(),
8887  getNoWrapFlags(FlagNW));
8888  if (const auto *ShiftedAddRec = dyn_cast<SCEVAddRecExpr>(Shifted))
8889  return ShiftedAddRec->getNumIterationsInRange(
8890  Range.subtract(SC->getAPInt()), SE);
8891  // This is strange and shouldn't happen.
8892  return SE.getCouldNotCompute();
8893  }
8894 
8895  // The only time we can solve this is when we have all constant indices.
8896  // Otherwise, we cannot determine the overflow conditions.
8897  if (any_of(operands(), [](const SCEV *Op) { return !isa<SCEVConstant>(Op); }))
8898  return SE.getCouldNotCompute();
8899 
8900  // Okay at this point we know that all elements of the chrec are constants and
8901  // that the start element is zero.
8902 
8903  // First check to see if the range contains zero. If not, the first
8904  // iteration exits.
8905  unsigned BitWidth = SE.getTypeSizeInBits(getType());
8906  if (!Range.contains(APInt(BitWidth, 0)))
8907  return SE.getZero(getType());
8908 
8909  if (isAffine()) {
8910  // If this is an affine expression then we have this situation:
8911  // Solve {0,+,A} in Range === Ax in Range
8912 
8913  // We know that zero is in the range. If A is positive then we know that
8914  // the upper value of the range must be the first possible exit value.
8915  // If A is negative then the lower of the range is the last possible loop
8916  // value. Also note that we already checked for a full range.
8917  APInt One(BitWidth,1);
8918  APInt A = cast<SCEVConstant>(getOperand(1))->getAPInt();
8919  APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower();
8920 
8921  // The exit value should be (End+A)/A.
8922  APInt ExitVal = (End + A).udiv(A);
8923  ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal);
8924 
8925  // Evaluate at the exit value. If we really did fall out of the valid
8926  // range, then we computed our trip count, otherwise wrap around or other
8927  // things must have happened.
8928  ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE);
8929  if (Range.contains(Val->getValue()))
8930  return SE.getCouldNotCompute(); // Something strange happened
8931 
8932  // Ensure that the previous value is in the range. This is a sanity check.
8933  assert(Range.contains(
8935  ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) &&
8936  "Linear scev computation is off in a bad way!");
8937  return SE.getConstant(ExitValue);
8938  } else if (isQuadratic()) {
8939  // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the
8940  // quadratic equation to solve it. To do this, we must frame our problem in
8941  // terms of figuring out when zero is crossed, instead of when
8942  // Range.getUpper() is crossed.
8943  SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end());
8944  NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
8945  const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(), FlagAnyWrap);
8946 
8947  // Next, solve the constructed addrec
8948  if (auto Roots =
8949  SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE)) {
8950  const SCEVConstant *R1 = Roots->first;
8951  const SCEVConstant *R2 = Roots->second;
8952  // Pick the smallest positive root value.
8953  if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp(
8954  ICmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) {
8955  if (!CB->getZExtValue())
8956  std::swap(R1, R2); // R1 is the minimum root now.
8957 
8958  // Make sure the root is not off by one. The returned iteration should
8959  // not be in the range, but the previous one should be. When solving
8960  // for "X*X < 5", for example, we should not return a root of 2.
8961  ConstantInt *R1Val =
8962  EvaluateConstantChrecAtConstant(this, R1->getValue(), SE);
8963  if (Range.contains(R1Val->getValue())) {
8964  // The next iteration must be out of the range...
8965  ConstantInt *NextVal =
8966  ConstantInt::get(SE.getContext(), R1->getAPInt() + 1);
8967 
8968  R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
8969  if (!Range.contains(R1Val->getValue()))
8970  return SE.getConstant(NextVal);
8971  return SE.getCouldNotCompute(); // Something strange happened
8972  }
8973 
8974  // If R1 was not in the range, then it is a good return value. Make
8975  // sure that R1-1 WAS in the range though, just in case.
8976  ConstantInt *NextVal =
8977  ConstantInt::get(SE.getContext(), R1->getAPInt() - 1);
8978  R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
8979  if (Range.contains(R1Val->getValue()))
8980  return R1;
8981  return SE.getCouldNotCompute(); // Something strange happened
8982  }
8983  }
8984  }
8985 
8986  return SE.getCouldNotCompute();
8987 }
8988 
8989 // Return true when S contains at least an undef value.
8990 static inline bool containsUndefs(const SCEV *S) {
8991  return SCEVExprContains(S, [](const SCEV *S) {
8992  if (const auto *SU = dyn_cast<SCEVUnknown>(S))
8993  return isa<UndefValue>(SU->getValue());
8994  else if (const auto *SC = dyn_cast<SCEVConstant>(S))
8995  return isa<UndefValue>(SC->getValue());
8996  return false;
8997  });
8998 }
8999 
9000 namespace {
9001 // Collect all steps of SCEV expressions.
9002 struct SCEVCollectStrides {
9003  ScalarEvolution &SE;
9005 
9006  SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S)
9007  : SE(SE), Strides(S) {}
9008 
9009  bool follow(const SCEV *S) {
9010  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
9011  Strides.push_back(AR->getStepRecurrence(SE));
9012  return true;
9013  }
9014  bool isDone() const { return false; }
9015 };
9016 
9017 // Collect all SCEVUnknown and SCEVMulExpr expressions.
9018 struct SCEVCollectTerms {
9020 
9021  SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T)
9022  : Terms(T) {}
9023 
9024  bool follow(const SCEV *S) {
9025  if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S) ||
9026  isa<SCEVSignExtendExpr>(S)) {
9027  if (!containsUndefs(S))
9028  Terms.push_back(S);
9029 
9030  // Stop recursion: once we collected a term, do not walk its operands.
9031  return false;
9032  }
9033 
9034  // Keep looking.
9035  return true;
9036  }
9037  bool isDone() const { return false; }
9038 };
9039 
9040 // Check if a SCEV contains an AddRecExpr.
9041 struct SCEVHasAddRec {
9042  bool &ContainsAddRec;
9043 
9044  SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) {
9045  ContainsAddRec = false;
9046  }
9047 
9048  bool follow(const SCEV *S) {
9049  if (isa<SCEVAddRecExpr>(S)) {
9050  ContainsAddRec = true;
9051 
9052  // Stop recursion: once we collected a term, do not walk its operands.
9053  return false;
9054  }
9055 
9056  // Keep looking.
9057  return true;
9058  }
9059  bool isDone() const { return false; }
9060 };
9061 
9062 // Find factors that are multiplied with an expression that (possibly as a
9063 // subexpression) contains an AddRecExpr. In the expression:
9064 //
9065 // 8 * (100 + %p * %q * (%a + {0, +, 1}_loop))
9066 //
9067 // "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)"
9068 // that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size
9069 // parameters as they form a product with an induction variable.
9070 //
9071 // This collector expects all array size parameters to be in the same MulExpr.
9072 // It might be necessary to later add support for collecting parameters that are
9073 // spread over different nested MulExpr.
9074 struct SCEVCollectAddRecMultiplies {
9076  ScalarEvolution &SE;
9077 
9078  SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T, ScalarEvolution &SE)
9079  : Terms(T), SE(SE) {}
9080 
9081  bool follow(const SCEV *S) {
9082  if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) {
9083  bool HasAddRec = false;
9085  for (auto Op : Mul->operands()) {
9086  if (isa<SCEVUnknown>(Op)) {
9087  Operands.push_back(Op);
9088  } else {
9089  bool ContainsAddRec;
9090  SCEVHasAddRec ContiansAddRec(ContainsAddRec);
9091  visitAll(Op, ContiansAddRec);
9092  HasAddRec |= ContainsAddRec;
9093  }
9094  }
9095  if (Operands.size() == 0)
9096  return true;
9097 
9098  if (!HasAddRec)
9099  return false;
9100 
9101  Terms.push_back(SE.getMulExpr(Operands));
9102  // Stop recursion: once we collected a term, do not walk its operands.
9103  return false;
9104  }
9105 
9106  // Keep looking.
9107  return true;
9108  }
9109  bool isDone() const { return false; }
9110 };
9111 }
9112 
9113 /// Find parametric terms in this SCEVAddRecExpr. We first for parameters in
9114 /// two places:
9115 /// 1) The strides of AddRec expressions.
9116 /// 2) Unknowns that are multiplied with AddRec expressions.
9120  SCEVCollectStrides StrideCollector(*this, Strides);
9121  visitAll(Expr, StrideCollector);
9122 
9123  DEBUG({
9124  dbgs() << "Strides:\n";
9125  for (const SCEV *S : Strides)
9126  dbgs() << *S << "\n";
9127  });
9128 
9129  for (const SCEV *S : Strides) {
9130  SCEVCollectTerms TermCollector(Terms);
9131  visitAll(S, TermCollector);
9132  }
9133 
9134  DEBUG({
9135  dbgs() << "Terms:\n";
9136  for (const SCEV *T : Terms)
9137  dbgs() << *T << "\n";
9138  });
9139 
9140  SCEVCollectAddRecMultiplies MulCollector(Terms, *this);
9141  visitAll(Expr, MulCollector);
9142 }
9143 
9147  int Last = Terms.size() - 1;
9148  const SCEV *Step = Terms[Last];
9149 
9150  // End of recursion.
9151  if (Last == 0) {
9152  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) {
9154  for (const SCEV *Op : M->operands())
9155  if (!isa<SCEVConstant>(Op))
9156  Qs.push_back(Op);
9157 
9158  Step = SE.getMulExpr(Qs);
9159  }
9160 
9161  Sizes.push_back(Step);
9162  return true;
9163  }
9164 
9165  for (const SCEV *&Term : Terms) {
9166  // Normalize the terms before the next call to findArrayDimensionsRec.
9167  const SCEV *Q, *R;
9168  SCEVDivision::divide(SE, Term, Step, &Q, &R);
9169 
9170  // Bail out when GCD does not evenly divide one of the terms.
9171  if (!R->isZero())
9172  return false;
9173 
9174  Term = Q;
9175  }
9176 
9177  // Remove all SCEVConstants.
9178  Terms.erase(
9179  remove_if(Terms, [](const SCEV *E) { return isa<SCEVConstant>(E); }),
9180  Terms.end());
9181 
9182  if (Terms.size() > 0)
9183  if (!findArrayDimensionsRec(SE, Terms, Sizes))
9184  return false;
9185 
9186  Sizes.push_back(Step);
9187  return true;
9188 }
9189 
9190 
9191 // Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
9193  for (const SCEV *T : Terms)
9194  if (SCEVExprContains(T, isa<SCEVUnknown, const SCEV *>))
9195  return true;
9196  return false;
9197 }
9198 
9199 // Return the number of product terms in S.
9200 static inline int numberOfTerms(const SCEV *S) {
9201  if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S))
9202  return Expr->getNumOperands();
9203  return 1;
9204 }
9205 
9206 static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) {
9207  if (isa<SCEVConstant>(T))
9208  return nullptr;
9209 
9210  if (isa<SCEVUnknown>(T))
9211  return T;
9212 
9213  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) {
9215  for (const SCEV *Op : M->operands())
9216  if (!isa<SCEVConstant>(Op))
9217  Factors.push_back(Op);
9218 
9219  return SE.getMulExpr(Factors);
9220  }
9221 
9222  return T;
9223 }
9224 
9225 /// Return the size of an element read or written by Inst.
9227  Type *Ty;
9228  if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
9229  Ty = Store->getValueOperand()->getType();
9230  else if (LoadInst *Load = dyn_cast<LoadInst>(Inst))
9231  Ty = Load->getType();
9232  else
9233  return nullptr;
9234 
9236  return getSizeOfExpr(ETy, Ty);
9237 }
9238 
9241  const SCEV *ElementSize) const {
9242  if (Terms.size() < 1 || !ElementSize)
9243  return;
9244 
9245  // Early return when Terms do not contain parameters: we do not delinearize
9246  // non parametric SCEVs.
9247  if (!containsParameters(Terms))
9248  return;
9249 
9250  DEBUG({
9251  dbgs() << "Terms:\n";
9252  for (const SCEV *T : Terms)
9253  dbgs() << *T << "\n";
9254  });
9255 
9256  // Remove duplicates.
9257  std::sort(Terms.begin(), Terms.end());
9258  Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end());
9259 
9260  // Put larger terms first.
9261  std::sort(Terms.begin(), Terms.end(), [](const SCEV *LHS, const SCEV *RHS) {
9262  return numberOfTerms(LHS) > numberOfTerms(RHS);
9263  });
9264 
9265  ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
9266 
9267  // Try to divide all terms by the element size. If term is not divisible by
9268  // element size, proceed with the original term.
9269  for (const SCEV *&Term : Terms) {
9270  const SCEV *Q, *R;
9271  SCEVDivision::divide(SE, Term, ElementSize, &Q, &R);
9272  if (!Q->isZero())
9273  Term = Q;
9274  }
9275 
9277 
9278  // Remove constant factors.
9279  for (const SCEV *T : Terms)
9280  if (const SCEV *NewT = removeConstantFactors(SE, T))
9281  NewTerms.push_back(NewT);
9282 
9283  DEBUG({
9284  dbgs() << "Terms after sorting:\n";
9285  for (const SCEV *T : NewTerms)
9286  dbgs() << *T << "\n";
9287  });
9288 
9289  if (NewTerms.empty() ||
9290  !findArrayDimensionsRec(SE, NewTerms, Sizes)) {
9291  Sizes.clear();
9292  return;
9293  }
9294 
9295  // The last element to be pushed into Sizes is the size of an element.
9296  Sizes.push_back(ElementSize);
9297 
9298  DEBUG({
9299  dbgs() << "Sizes:\n";
9300  for (const SCEV *S : Sizes)
9301  dbgs() << *S << "\n";
9302  });
9303 }
9304 
9306  const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts,
9308 
9309  // Early exit in case this SCEV is not an affine multivariate function.
9310  if (Sizes.empty())
9311  return;
9312 
9313  if (auto *AR = dyn_cast<SCEVAddRecExpr>(Expr))
9314  if (!AR->isAffine())
9315  return;
9316 
9317  const SCEV *Res = Expr;
9318  int Last = Sizes.size() - 1;
9319  for (int i = Last; i >= 0; i--) {
9320  const SCEV *Q, *R;
9321  SCEVDivision::divide(*this, Res, Sizes[i], &Q, &R);
9322 
9323  DEBUG({
9324  dbgs() << "Res: " << *Res << "\n";
9325  dbgs() << "Sizes[i]: " << *Sizes[i] << "\n";
9326  dbgs() << "Res divided by Sizes[i]:\n";
9327  dbgs() << "Quotient: " << *Q << "\n";
9328  dbgs() << "Remainder: " << *R << "\n";
9329  });
9330 
9331  Res = Q;
9332 
9333  // Do not record the last subscript corresponding to the size of elements in
9334  // the array.
9335  if (i == Last) {
9336 
9337  // Bail out if the remainder is too complex.
9338  if (isa<SCEVAddRecExpr>(R)) {
9339  Subscripts.clear();
9340  Sizes.clear();
9341  return;
9342  }
9343 
9344  continue;
9345  }
9346 
9347  // Record the access function for the current subscript.
9348  Subscripts.push_back(R);
9349  }
9350 
9351  // Also push in last position the remainder of the last division: it will be
9352  // the access function of the innermost dimension.
9353  Subscripts.push_back(Res);
9354 
9355  std::reverse(Subscripts.begin(), Subscripts.end());
9356 
9357  DEBUG({
9358  dbgs() << "Subscripts:\n";
9359  for (const SCEV *S : Subscripts)
9360  dbgs() << *S << "\n";
9361  });
9362 }
9363 
9364 /// Splits the SCEV into two vectors of SCEVs representing the subscripts and
9365 /// sizes of an array access. Returns the remainder of the delinearization that
9366 /// is the offset start of the array. The SCEV->delinearize algorithm computes
9367 /// the multiples of SCEV coefficients: that is a pattern matching of sub
9368 /// expressions in the stride and base of a SCEV corresponding to the
9369 /// computation of a GCD (greatest common divisor) of base and stride. When
9370 /// SCEV->delinearize fails, it returns the SCEV unchanged.
9371 ///
9372 /// For example: when analyzing the memory access A[i][j][k] in this loop nest
9373 ///
9374 /// void foo(long n, long m, long o, double A[n][m][o]) {
9375 ///
9376 /// for (long i = 0; i < n; i++)
9377 /// for (long j = 0; j < m; j++)
9378 /// for (long k = 0; k < o; k++)
9379 /// A[i][j][k] = 1.0;
9380 /// }
9381 ///
9382 /// the delinearization input is the following AddRec SCEV:
9383 ///
9384 /// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
9385 ///
9386 /// From this SCEV, we are able to say that the base offset of the access is %A
9387 /// because it appears as an offset that does not divide any of the strides in
9388 /// the loops:
9389 ///
9390 /// CHECK: Base offset: %A
9391 ///
9392 /// and then SCEV->delinearize determines the size of some of the dimensions of
9393 /// the array as these are the multiples by which the strides are happening:
9394 ///
9395 /// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
9396 ///
9397 /// Note that the outermost dimension remains of UnknownSize because there are
9398 /// no strides that would help identifying the size of the last dimension: when
9399 /// the array has been statically allocated, one could compute the size of that
9400 /// dimension by dividing the overall size of the array by the size of the known
9401 /// dimensions: %m * %o * 8.
9402 ///
9403 /// Finally delinearize provides the access functions for the array reference
9404 /// that does correspond to A[i][j][k] of the above C testcase:
9405 ///
9406 /// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
9407 ///
9408 /// The testcases are checking the output of a function pass:
9409 /// DelinearizationPass that walks through all loads and stores of a function
9410 /// asking for the SCEV of the memory access with respect to all enclosing
9411 /// loops, calling SCEV->delinearize on that and printing the results.
9412 
9414  SmallVectorImpl<const SCEV *> &Subscripts,
9416  const SCEV *ElementSize) {
9417  // First step: collect parametric terms.
9419  collectParametricTerms(Expr, Terms);
9420 
9421  if (Terms.empty())
9422  return;
9423 
9424  // Second step: find subscript sizes.
9425  findArrayDimensions(Terms, Sizes, ElementSize);
9426 
9427  if (Sizes.empty())
9428  return;
9429 
9430  // Third step: compute the access functions for each subscript.
9431  computeAccessFunctions(Expr, Subscripts, Sizes);
9432 
9433  if (Subscripts.empty())
9434  return;
9435 
9436  DEBUG({
9437  dbgs() << "succeeded to delinearize " << *Expr << "\n";
9438  dbgs() << "ArrayDecl[UnknownSize]";
9439  for (const SCEV *S : Sizes)
9440  dbgs() << "[" << *S << "]";
9441 
9442  dbgs() << "\nArrayRef";
9443  for (const SCEV *S : Subscripts)
9444  dbgs() << "[" << *S << "]";
9445  dbgs() << "\n";
9446  });
9447 }
9448 
9449 //===----------------------------------------------------------------------===//
9450 // SCEVCallbackVH Class Implementation
9451 //===----------------------------------------------------------------------===//
9452 
9453 void ScalarEvolution::SCEVCallbackVH::deleted() {
9454  assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
9455  if (PHINode *PN = dyn_cast<PHINode>(getValPtr()))
9456  SE->ConstantEvolutionLoopExitValue.erase(PN);
9457  SE->eraseValueFromMap(getValPtr());
9458  // this now dangles!
9459 }
9460 
9461 void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
9462  assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
9463 
9464  // Forget all the expressions associated with users of the old value,
9465  // so that future queries will recompute the expressions using the new
9466  // value.
9467  Value *Old = getValPtr();
9468  SmallVector<User *, 16> Worklist(Old->user_begin(), Old->user_end());
9469  SmallPtrSet<User *, 8> Visited;
9470  while (!Worklist.empty()) {
9471  User *U = Worklist.pop_back_val();
9472  // Deleting the Old value will cause this to dangle. Postpone
9473  // that until everything else is done.
9474  if (U == Old)
9475  continue;
9476  if (!Visited.insert(U).second)
9477  continue;
9478  if (PHINode *PN = dyn_cast<PHINode>(U))
9479  SE->ConstantEvolutionLoopExitValue.erase(PN);
9480  SE->eraseValueFromMap(U);
9481  Worklist.insert(Worklist.end(), U->user_begin(), U->user_end());
9482  }
9483  // Delete the Old value.
9484  if (PHINode *PN = dyn_cast<PHINode>(Old))
9485  SE->ConstantEvolutionLoopExitValue.erase(PN);
9486  SE->eraseValueFromMap(Old);
9487  // this now dangles!
9488 }
9489 
9490 ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
9491  : CallbackVH(V), SE(se) {}
9492 
9493 //===----------------------------------------------------------------------===//
9494 // ScalarEvolution Class Implementation
9495 //===----------------------------------------------------------------------===//
9496 
9498  AssumptionCache &AC, DominatorTree &DT,
9499  LoopInfo &LI)
9500  : F(F), TLI(TLI), AC(AC), DT(DT), LI(LI),
9501  CouldNotCompute(new SCEVCouldNotCompute()),
9502  WalkingBEDominatingConds(false), ProvingSplitPredicate(false),
9503  ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64),
9504  FirstUnknown(nullptr) {
9505 
9506  // To use guards for proving predicates, we need to scan every instruction in
9507  // relevant basic blocks, and not just terminators. Doing this is a waste of
9508  // time if the IR does not actually contain any calls to
9509  // @llvm.experimental.guard, so do a quick check and remember this beforehand.
9510  //
9511  // This pessimizes the case where a pass that preserves ScalarEvolution wants
9512  // to _add_ guards to the module when there weren't any before, and wants
9513  // ScalarEvolution to optimize based on those guards. For now we prefer to be
9514  // efficient in lieu of being smart in that rather obscure case.
9515 
9516  auto *GuardDecl = F.getParent()->getFunction(
9517  Intrinsic::getName(Intrinsic::experimental_guard));
9518  HasGuards = GuardDecl && !GuardDecl->use_empty();
9519 }
9520 
9522  : F(Arg.F), HasGuards(Arg.HasGuards), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT),
9523  LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)),
9524  ValueExprMap(std::move(Arg.ValueExprMap)),
9525  PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)),
9526  WalkingBEDominatingConds(false), ProvingSplitPredicate(false),
9527  BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
9528  PredicatedBackedgeTakenCounts(
9529  std::move(Arg.PredicatedBackedgeTakenCounts)),
9530  ConstantEvolutionLoopExitValue(
9531  std::move(Arg.ConstantEvolutionLoopExitValue)),
9532  ValuesAtScopes(std::move(Arg.ValuesAtScopes)),
9533  LoopDispositions(std::move(Arg.LoopDispositions)),
9534  LoopPropertiesCache(std::move(Arg.LoopPropertiesCache)),
9535  BlockDispositions(std::move(Arg.BlockDispositions)),
9536  UnsignedRanges(std::move(Arg.UnsignedRanges)),
9537  SignedRanges(std::move(Arg.SignedRanges)),
9538  UniqueSCEVs(std::move(Arg.UniqueSCEVs)),
9539  UniquePreds(std::move(Arg.UniquePreds)),
9540  SCEVAllocator(std::move(Arg.SCEVAllocator)),
9541  FirstUnknown(Arg.FirstUnknown) {
9542  Arg.FirstUnknown = nullptr;
9543 }
9544 
9546  // Iterate through all the SCEVUnknown instances and call their
9547  // destructors, so that they release their references to their values.
9548  for (SCEVUnknown *U = FirstUnknown; U;) {
9549  SCEVUnknown *Tmp = U;
9550  U = U->Next;
9551  Tmp->~SCEVUnknown();
9552  }
9553  FirstUnknown = nullptr;
9554 
9555  ExprValueMap.clear();
9556  ValueExprMap.clear();
9557  HasRecMap.clear();
9558 
9559  // Free any extra memory created for ExitNotTakenInfo in the unlikely event
9560  // that a loop had multiple computable exits.
9561  for (auto &BTCI : BackedgeTakenCounts)
9562  BTCI.second.clear();
9563  for (auto &BTCI : PredicatedBackedgeTakenCounts)
9564  BTCI.second.clear();
9565 
9566  assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
9567  assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
9568  assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!");
9569 }
9570 
9572  return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L));
9573 }
9574 
9576  const Loop *L) {
9577  // Print all inner loops first
9578  for (Loop *I : *L)
9579  PrintLoopInfo(OS, SE, I);
9580 
9581  OS << "Loop ";
9582  L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
9583  OS << ": ";
9584 
9585  SmallVector<BasicBlock *, 8> ExitBlocks;
9586  L->getExitBlocks(ExitBlocks);
9587  if (ExitBlocks.size() != 1)
9588  OS << "<multiple exits> ";
9589 
9591  OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L);
9592  } else {
9593  OS << "Unpredictable backedge-taken count. ";
9594  }
9595 
9596  OS << "\n"
9597  "Loop ";
9598  L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
9599  OS << ": ";
9600 
9601  if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
9602  OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
9603  if (SE->isBackedgeTakenCountMaxOrZero(L))
9604  OS << ", actual taken count either this or zero.";
9605  } else {
9606  OS << "Unpredictable max backedge-taken count. ";
9607  }
9608 
9609  OS << "\n"
9610  "Loop ";
9611  L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
9612  OS << ": ";
9613 
9614  SCEVUnionPredicate Pred;
9615  auto PBT = SE->getPredicatedBackedgeTakenCount(L, Pred);
9616  if (!isa<SCEVCouldNotCompute>(PBT)) {
9617  OS << "Predicated backedge-taken count is " << *PBT << "\n";
9618  OS << " Predicates:\n";
9619  Pred.print(OS, 4);
9620  } else {
9621  OS << "Unpredictable predicated backedge-taken count. ";
9622  }
9623  OS << "\n";
9624 }
9625 
9627  switch (LD) {
9629  return "Variant";
9631  return "Invariant";
9633  return "Computable";
9634  }
9635  llvm_unreachable("Unknown ScalarEvolution::LoopDisposition kind!");
9636 }
9637 
9639  // ScalarEvolution's implementation of the print method is to print
9640  // out SCEV values of all instructions that are interesting. Doing
9641  // this potentially causes it to create new SCEV objects though,
9642  // which technically conflicts with the const qualifier. This isn't
9643  // observable from outside the class though, so casting away the
9644  // const isn't dangerous.
9645  ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
9646 
9647  OS << "Classifying expressions for: ";
9648  F.printAsOperand(OS, /*PrintType=*/false);
9649  OS << "\n";
9650  for (Instruction &I : instructions(F))
9651  if (isSCEVable(I.getType()) && !isa<CmpInst>(I)) {
9652  OS << I << '\n';
9653  OS << " --> ";
9654  const SCEV *SV = SE.getSCEV(&I);
9655  SV->print(OS);
9656  if (!isa<SCEVCouldNotCompute>(SV)) {
9657  OS << " U: ";
9658  SE.getUnsignedRange(SV).print(OS);
9659  OS << " S: ";
9660  SE.getSignedRange(SV).print(OS);
9661  }
9662 
9663  const Loop *L = LI.getLoopFor(I.getParent());
9664 
9665  const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
9666  if (AtUse != SV) {
9667  OS << " --> ";
9668  AtUse->print(OS);
9669  if (!isa<SCEVCouldNotCompute>(AtUse)) {
9670  OS << " U: ";
9671  SE.getUnsignedRange(AtUse).print(OS);
9672  OS << " S: ";
9673  SE.getSignedRange(AtUse).print(OS);
9674  }
9675  }
9676 
9677  if (L) {
9678  OS << "\t\t" "Exits: ";
9679  const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
9680  if (!SE.isLoopInvariant(ExitValue, L)) {
9681  OS << "<<Unknown>>";
9682  } else {
9683  OS << *ExitValue;
9684  }
9685 
9686  bool First = true;
9687  for (auto *Iter = L; Iter; Iter = Iter->getParentLoop()) {
9688  if (First) {
9689  OS << "\t\t" "LoopDispositions: { ";
9690  First = false;
9691  } else {
9692  OS << ", ";
9693  }
9694 
9695  Iter->getHeader()->printAsOperand(OS, /*PrintType=*/false);
9696  OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, Iter));
9697  }
9698 
9699  for (auto *InnerL : depth_first(L)) {
9700  if (InnerL == L)
9701  continue;
9702  if (First) {
9703  OS << "\t\t" "LoopDispositions: { ";
9704  First = false;
9705  } else {
9706  OS << ", ";
9707  }
9708 
9709  InnerL->getHeader()->printAsOperand(OS, /*PrintType=*/false);
9710  OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, InnerL));
9711  }
9712 
9713  OS << " }";
9714  }
9715 
9716  OS << "\n";
9717  }
9718 
9719  OS << "Determining loop execution counts for: ";
9720  F.printAsOperand(OS, /*PrintType=*/false);
9721  OS << "\n";
9722  for (Loop *I : LI)
9723  PrintLoopInfo(OS, &SE, I);
9724 }
9725 
9728  auto &Values = LoopDispositions[S];
9729  for (auto &V : Values) {
9730  if (V.getPointer() == L)
9731  return V.getInt();
9732  }
9733  Values.emplace_back(L, LoopVariant);
9734  LoopDisposition D = computeLoopDisposition(S, L);
9735  auto &Values2 = LoopDispositions[S];
9736  for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
9737  if (V.getPointer() == L) {
9738  V.setInt(D);
9739  break;
9740  }
9741  }
9742  return D;
9743 }
9744 
9746 ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
9747  switch (static_cast<SCEVTypes>(S->getSCEVType())) {
9748  case scConstant:
9749  return LoopInvariant;
9750  case scTruncate:
9751  case scZeroExtend:
9752  case scSignExtend:
9753  return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L);
9754  case scAddRecExpr: {
9755  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
9756 
9757  // If L is the addrec's loop, it's computable.
9758  if (AR->getLoop() == L)
9759  return LoopComputable;
9760 
9761  // Add recurrences are never invariant in the function-body (null loop).
9762  if (!L)
9763  return LoopVariant;
9764 
9765  // This recurrence is variant w.r.t. L if L contains AR's loop.
9766  if (L->contains(AR->getLoop()))
9767  return LoopVariant;
9768 
9769  // This recurrence is invariant w.r.t. L if AR's loop contains L.
9770  if (AR->getLoop()->contains(L))
9771  return LoopInvariant;
9772 
9773  // This recurrence is variant w.r.t. L if any of its operands
9774  // are variant.
9775  for (auto *Op : AR->operands())
9776  if (!isLoopInvariant(Op, L))
9777  return LoopVariant;
9778 
9779  // Otherwise it's loop-invariant.
9780  return LoopInvariant;
9781  }
9782  case scAddExpr:
9783  case scMulExpr:
9784  case scUMaxExpr:
9785  case scSMaxExpr: {
9786  bool HasVarying = false;
9787  for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) {
9789  if (D == LoopVariant)
9790  return LoopVariant;
9791  if (D == LoopComputable)
9792  HasVarying = true;
9793  }
9794  return HasVarying ? LoopComputable : LoopInvariant;
9795  }
9796  case scUDivExpr: {
9797  const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
9799  if (LD == LoopVariant)
9800  return LoopVariant;
9801  LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L);
9802  if (RD == LoopVariant)
9803  return LoopVariant;
9804  return (LD == LoopInvariant && RD == LoopInvariant) ?
9806  }
9807  case scUnknown:
9808  // All non-instruction values are loop invariant. All instructions are loop
9809  // invariant if they are not contained in the specified loop.
9810  // Instructions are never considered invariant in the function body
9811  // (null loop) because they are defined within the "loop".
9812  if (auto *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
9813  return (L && !L->contains(I)) ? LoopInvariant : LoopVariant;
9814  return LoopInvariant;
9815  case scCouldNotCompute:
9816  llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
9817  }
9818  llvm_unreachable("Unknown SCEV kind!");
9819 }
9820 
9821 bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
9822  return getLoopDisposition(S, L) == LoopInvariant;
9823 }
9824 
9826  return getLoopDisposition(S, L) == LoopComputable;
9827 }
9828 
9831  auto &Values = BlockDispositions[S];
9832  for (auto &V : Values) {
9833  if (V.getPointer() == BB)
9834  return V.getInt();
9835  }
9836  Values.emplace_back(BB, DoesNotDominateBlock);
9837  BlockDisposition D = computeBlockDisposition(S, BB);
9838  auto &Values2 = BlockDispositions[S];
9839  for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
9840  if (V.getPointer() == BB) {
9841  V.setInt(D);
9842  break;
9843  }
9844  }
9845  return D;
9846 }
9847 
9849 ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
9850  switch (static_cast<SCEVTypes>(S->getSCEVType())) {
9851  case scConstant:
9852  return ProperlyDominatesBlock;
9853  case scTruncate:
9854  case scZeroExtend:
9855  case scSignExtend:
9856  return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB);
9857  case scAddRecExpr: {
9858  // This uses a "dominates" query instead of "properly dominates" query
9859  // to test for proper dominance too, because the instruction which
9860  // produces the addrec's value is a PHI, and a PHI effectively properly
9861  // dominates its entire containing block.
9862  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
9863  if (!DT.dominates(AR->getLoop()->getHeader(), BB))
9864  return DoesNotDominateBlock;
9865 
9866  // Fall through into SCEVNAryExpr handling.
9868  }
9869  case scAddExpr:
9870  case scMulExpr:
9871  case scUMaxExpr:
9872  case scSMaxExpr: {
9873  const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
9874  bool Proper = true;
9875  for (const SCEV *NAryOp : NAry->operands()) {
9876  BlockDisposition D = getBlockDisposition(NAryOp, BB);
9877  if (D == DoesNotDominateBlock)
9878  return DoesNotDominateBlock;
9879  if (D == DominatesBlock)
9880  Proper = false;
9881  }
9882  return Proper ? ProperlyDominatesBlock : DominatesBlock;
9883  }
9884  case scUDivExpr: {
9885  const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
9886  const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
9887  BlockDisposition LD = getBlockDisposition(LHS, BB);
9888  if (LD == DoesNotDominateBlock)
9889  return DoesNotDominateBlock;
9890  BlockDisposition RD = getBlockDisposition(RHS, BB);
9891  if (RD == DoesNotDominateBlock)
9892  return DoesNotDominateBlock;
9893  return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ?
9895  }
9896  case scUnknown:
9897  if (Instruction *I =
9898  dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {
9899  if (I->getParent() == BB)
9900  return DominatesBlock;
9901  if (DT.properlyDominates(I->getParent(), BB))
9902  return ProperlyDominatesBlock;
9903  return DoesNotDominateBlock;
9904  }
9905  return ProperlyDominatesBlock;
9906  case scCouldNotCompute:
9907  llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
9908  }
9909  llvm_unreachable("Unknown SCEV kind!");
9910 }
9911 
9912 bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
9913  return getBlockDisposition(S, BB) >= DominatesBlock;
9914 }
9915 
9918 }
9919 
9920 bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
9921  return SCEVExprContains(S, [&](const SCEV *Expr) { return Expr == Op; });
9922 }
9923 
9924 void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
9925  ValuesAtScopes.erase(S);
9926  LoopDispositions.erase(S);
9927  BlockDispositions.erase(S);
9928  UnsignedRanges.erase(S);
9929  SignedRanges.erase(S);
9930  ExprValueMap.erase(S);
9931  HasRecMap.erase(S);
9932 
9933  auto RemoveSCEVFromBackedgeMap =
9935  for (auto I = Map.begin(), E = Map.end(); I != E;) {
9936  BackedgeTakenInfo &BEInfo = I->second;
9937  if (BEInfo.hasOperand(S, this)) {
9938  BEInfo.clear();
9939  Map.erase(I++);
9940  } else
9941  ++I;
9942  }
9943  };
9944 
9945  RemoveSCEVFromBackedgeMap(BackedgeTakenCounts);
9946  RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts);
9947 }
9948 
9950 
9951 /// replaceSubString - Replaces all occurrences of From in Str with To.
9952 static void replaceSubString(std::string &Str, StringRef From, StringRef To) {
9953  size_t Pos = 0;
9954  while ((Pos = Str.find(From, Pos)) != std::string::npos) {
9955  Str.replace(Pos, From.size(), To.data(), To.size());
9956  Pos += To.size();
9957  }
9958 }
9959 
9960 /// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis.
9961 static void
9963  std::string &S = Map[L];
9964  if (S.empty()) {
9965  raw_string_ostream OS(S);
9966  SE.getBackedgeTakenCount(L)->print(OS);
9967 
9968  // false and 0 are semantically equivalent. This can happen in dead loops.
9969  replaceSubString(OS.str(), "false", "0");
9970  // Remove wrap flags, their use in SCEV is highly fragile.
9971  // FIXME: Remove this when SCEV gets smarter about them.
9972  replaceSubString(OS.str(), "<nw>", "");
9973  replaceSubString(OS.str(), "<nsw>", "");
9974  replaceSubString(OS.str(), "<nuw>", "");
9975  }
9976 
9977  for (auto *R : reverse(*L))
9978  getLoopBackedgeTakenCounts(R, Map, SE); // recurse.
9979 }
9980 
9982  ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
9983 
9984  // Gather stringified backedge taken counts for all loops using SCEV's caches.
9985  // FIXME: It would be much better to store actual values instead of strings,
9986  // but SCEV pointers will change if we drop the caches.
9987  VerifyMap BackedgeDumpsOld, BackedgeDumpsNew;
9988  for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I)
9989  getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE);
9990 
9991  // Gather stringified backedge taken counts for all loops using a fresh
9992  // ScalarEvolution object.
9993  ScalarEvolution SE2(F, TLI, AC, DT, LI);
9994  for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I)
9995  getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE2);
9996 
9997  // Now compare whether they're the same with and without caches. This allows
9998  // verifying that no pass changed the cache.
9999  assert(BackedgeDumpsOld.size() == BackedgeDumpsNew.size() &&
10000  "New loops suddenly appeared!");
10001 
10002  for (VerifyMap::iterator OldI = BackedgeDumpsOld.begin(),
10003  OldE = BackedgeDumpsOld.end(),
10004  NewI = BackedgeDumpsNew.begin();
10005  OldI != OldE; ++OldI, ++NewI) {
10006  assert(OldI->first == NewI->first && "Loop order changed!");
10007 
10008  // Compare the stringified SCEVs. We don't care if undef backedgetaken count
10009  // changes.
10010  // FIXME: We currently ignore SCEV changes from/to CouldNotCompute. This
10011  // means that a pass is buggy or SCEV has to learn a new pattern but is
10012  // usually not harmful.
10013  if (OldI->second != NewI->second &&
10014  OldI->second.find("undef") == std::string::npos &&
10015  NewI->second.find("undef") == std::string::npos &&
10016  OldI->second != "***COULDNOTCOMPUTE***" &&
10017  NewI->second != "***COULDNOTCOMPUTE***") {
10018  dbgs() << "SCEVValidator: SCEV for loop '"
10019  << OldI->first->getHeader()->getName()
10020  << "' changed from '" << OldI->second
10021  << "' to '" << NewI->second << "'!\n";
10022  std::abort();
10023  }
10024  }
10025 
10026  // TODO: Verify more things.
10027 }
10028 
10030  Function &F, const PreservedAnalyses &PA,
10032  // Invalidate the ScalarEvolution object whenever it isn't preserved or one
10033  // of its dependencies is invalidated.
10034  auto PAC = PA.getChecker<ScalarEvolutionAnalysis>();
10035  return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
10036  Inv.invalidate<AssumptionAnalysis>(F, PA) ||
10037  Inv.invalidate<DominatorTreeAnalysis>(F, PA) ||
10038  Inv.invalidate<LoopAnalysis>(F, PA);
10039 }
10040 
10041 AnalysisKey ScalarEvolutionAnalysis::Key;
10042 
10048  AM.getResult<LoopAnalysis>(F));
10049 }
10050 
10053  AM.getResult<ScalarEvolutionAnalysis>(F).print(OS);
10054  return PreservedAnalyses::all();
10055 }
10056 
10058  "Scalar Evolution Analysis", false, true)
10064  "Scalar Evolution Analysis", false, true)
10065 char ScalarEvolutionWrapperPass::ID = 0;
10066 
10067 ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) {
10069 }
10070 
10072  SE.reset(new ScalarEvolution(
10073  F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
10074  getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
10075  getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
10076  getAnalysis<LoopInfoWrapperPass>().getLoopInfo()));
10077  return false;
10078 }
10079 
10081 
10083  SE->print(OS);
10084 }
10085 
10087  if (!VerifySCEV)
10088  return;
10089 
10090  SE->verify();
10091 }
10092 
10094  AU.setPreservesAll();
10099 }
10100 
10101 const SCEVPredicate *
10103  const SCEVConstant *RHS) {
10105  // Unique this node based on the arguments
10107  ID.AddPointer(LHS);
10108  ID.AddPointer(RHS);
10109  void *IP = nullptr;
10110  if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP))
10111  return S;
10112  SCEVEqualPredicate *Eq = new (SCEVAllocator)
10113  SCEVEqualPredicate(ID.Intern(SCEVAllocator), LHS, RHS);
10114  UniquePreds.InsertNode(Eq, IP);
10115  return Eq;
10116 }
10117 
10119  const SCEVAddRecExpr *AR,
10122  // Unique this node based on the arguments
10124  ID.AddPointer(AR);
10125  ID.AddInteger(AddedFlags);
10126  void *IP = nullptr;
10127  if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP))
10128  return S;
10129  auto *OF = new (SCEVAllocator)
10130  SCEVWrapPredicate(ID.Intern(SCEVAllocator), AR, AddedFlags);
10131  UniquePreds.InsertNode(OF, IP);
10132  return OF;
10133 }
10134 
10135 namespace {
10136 
10137 class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> {
10138 public:
10139  /// Rewrites \p S in the context of a loop L and the SCEV predication
10140  /// infrastructure.
10141  ///
10142  /// If \p Pred is non-null, the SCEV expression is rewritten to respect the
10143  /// equivalences present in \p Pred.
10144  ///
10145  /// If \p NewPreds is non-null, rewrite is free to add further predicates to
10146  /// \p NewPreds such that the result will be an AddRecExpr.
10147  static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE,
10149  SCEVUnionPredicate *Pred) {
10150  SCEVPredicateRewriter Rewriter(L, SE, NewPreds, Pred);
10151  return Rewriter.visit(S);
10152  }
10153 
10154  SCEVPredicateRewriter(const Loop *L, ScalarEvolution &SE,
10156  SCEVUnionPredicate *Pred)
10157  : SCEVRewriteVisitor(SE), NewPreds(NewPreds), Pred(Pred), L(L) {}
10158 
10159  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
10160  if (Pred) {
10161  auto ExprPreds = Pred->getPredicatesForExpr(Expr);
10162  for (auto *Pred : ExprPreds)
10163  if (const auto *IPred = dyn_cast<SCEVEqualPredicate>(Pred))
10164  if (IPred->getLHS() == Expr)
10165  return IPred->getRHS();
10166  }
10167 
10168  return Expr;
10169  }
10170 
10171  const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
10172  const SCEV *Operand = visit(Expr->getOperand());
10173  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Operand);
10174  if (AR && AR->getLoop() == L && AR->isAffine()) {
10175  // This couldn't be folded because the operand didn't have the nuw
10176  // flag. Add the nusw flag as an assumption that we could make.
10177  const SCEV *Step = AR->getStepRecurrence(SE);
10178  Type *Ty = Expr->getType();
10179  if (addOverflowAssumption(AR, SCEVWrapPredicate::IncrementNUSW))
10180  return SE.getAddRecExpr(SE.getZeroExtendExpr(AR->getStart(), Ty),
10181  SE.getSignExtendExpr(Step, Ty), L,
10182  AR->getNoWrapFlags());
10183  }
10184  return SE.getZeroExtendExpr(Operand, Expr->getType());
10185  }
10186 
10187  const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
10188  const SCEV *Operand = visit(Expr->getOperand());
10189  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Operand);
10190  if (AR && AR->getLoop() == L && AR->isAffine()) {
10191  // This couldn't be folded because the operand didn't have the nsw
10192  // flag. Add the nssw flag as an assumption that we could make.
10193  const SCEV *Step = AR->getStepRecurrence(SE);
10194  Type *Ty = Expr->getType();
10195  if (addOverflowAssumption(AR, SCEVWrapPredicate::IncrementNSSW))
10196  return SE.getAddRecExpr(SE.getSignExtendExpr(AR->getStart(), Ty),
10197  SE.getSignExtendExpr(Step, Ty), L,
10198  AR->getNoWrapFlags());
10199  }
10200  return SE.getSignExtendExpr(Operand, Expr->getType());
10201  }
10202 
10203 private:
10204  bool addOverflowAssumption(const SCEVAddRecExpr *AR,
10206  auto *A = SE.getWrapPredicate(AR, AddedFlags);
10207  if (!NewPreds) {
10208  // Check if we've already made this assumption.
10209  return Pred && Pred->implies(A);
10210  }
10211  NewPreds->insert(A);
10212  return true;
10213  }
10214 
10216  SCEVUnionPredicate *Pred;
10217  const Loop *L;
10218 };
10219 } // end anonymous namespace
10220 
10222  SCEVUnionPredicate &Preds) {
10223  return SCEVPredicateRewriter::rewrite(S, L, *this, nullptr, &Preds);
10224 }
10225 
10227  const SCEV *S, const Loop *L,
10229 
10231  S = SCEVPredicateRewriter::rewrite(S, L, *this, &TransformPreds, nullptr);
10232  auto *AddRec = dyn_cast<SCEVAddRecExpr>(S);
10233 
10234  if (!AddRec)
10235  return nullptr;
10236 
10237  // Since the transformation was successful, we can now transfer the SCEV
10238  // predicates.
10239  for (auto *P : TransformPreds)
10240  Preds.insert(P);
10241 
10242  return AddRec;
10243 }
10244 
10245 /// SCEV predicates
10248  : FastID(ID), Kind(Kind) {}
10249 
10251  const SCEVUnknown *LHS,
10252  const SCEVConstant *RHS)
10253  : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {}
10254 
10256  const auto *Op = dyn_cast<SCEVEqualPredicate>(N);
10257 
10258  if (!Op)
10259  return false;
10260 
10261  return Op->LHS == LHS && Op->RHS == RHS;
10262 }
10263 
10264 bool SCEVEqualPredicate::isAlwaysTrue() const { return false; }
10265 
10266 const SCEV *SCEVEqualPredicate::getExpr() const { return LHS; }
10267 
10268 void SCEVEqualPredicate::print(raw_ostream &OS, unsigned Depth) const {
10269  OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n";
10270 }
10271 
10273  const SCEVAddRecExpr *AR,
10274  IncrementWrapFlags Flags)
10275  : SCEVPredicate(ID, P_Wrap), AR(AR), Flags(Flags) {}
10276 
10277 const SCEV *SCEVWrapPredicate::getExpr() const { return AR; }
10278 
10280  const auto *Op = dyn_cast<SCEVWrapPredicate>(N);
10281 
10282  return Op && Op->AR == AR && setFlags(Flags, Op->Flags) == Flags;
10283 }
10284 
10286  SCEV::NoWrapFlags ScevFlags = AR->getNoWrapFlags();
10287  IncrementWrapFlags IFlags = Flags;
10288 
10289  if (ScalarEvolution::setFlags(ScevFlags, SCEV::FlagNSW) == ScevFlags)
10290  IFlags = clearFlags(IFlags, IncrementNSSW);
10291 
10292  return IFlags == IncrementAnyWrap;
10293 }
10294 
10295 void SCEVWrapPredicate::print(raw_ostream &OS, unsigned Depth) const {
10296  OS.indent(Depth) << *getExpr() << " Added Flags: ";
10298  OS << "<nusw>";
10300  OS << "<nssw>";
10301  OS << "\n";
10302 }
10303 
10306  ScalarEvolution &SE) {
10307  IncrementWrapFlags ImpliedFlags = IncrementAnyWrap;
10308  SCEV::NoWrapFlags StaticFlags = AR->getNoWrapFlags();
10309 
10310  // We can safely transfer the NSW flag as NSSW.
10311  if (ScalarEvolution::setFlags(StaticFlags, SCEV::FlagNSW) == StaticFlags)
10312  ImpliedFlags = IncrementNSSW;
10313 
10314  if (ScalarEvolution::setFlags(StaticFlags, SCEV::FlagNUW) == StaticFlags) {
10315  // If the increment is positive, the SCEV NUW flag will also imply the
10316  // WrapPredicate NUSW flag.
10317  if (const auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE)))
10318  if (Step->getValue()->getValue().isNonNegative())
10319  ImpliedFlags = setFlags(ImpliedFlags, IncrementNUSW);
10320  }
10321 
10322  return ImpliedFlags;
10323 }
10324 
10325 /// Union predicates don't get cached so create a dummy set ID for it.
10327  : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) {}
10328 
10330  return all_of(Preds,
10331  [](const SCEVPredicate *I) { return I->isAlwaysTrue(); });
10332 }
10333 
10336  auto I = SCEVToPreds.find(Expr);
10337  if (I == SCEVToPreds.end())
10339  return I->second;
10340 }
10341 
10343  if (const auto *Set = dyn_cast<SCEVUnionPredicate>(N))
10344  return all_of(Set->Preds,
10345  [this](const SCEVPredicate *I) { return this->implies(I); });
10346 
10347  auto ScevPredsIt = SCEVToPreds.find(N->getExpr());
10348  if (ScevPredsIt == SCEVToPreds.end())
10349  return false;
10350  auto &SCEVPreds = ScevPredsIt->second;
10351 
10352  return any_of(SCEVPreds,
10353  [N](const SCEVPredicate *I) { return I->implies(N); });
10354 }
10355 
10356 const SCEV *SCEVUnionPredicate::getExpr() const { return nullptr; }
10357 
10358 void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const {
10359  for (auto Pred : Preds)
10360  Pred->print(OS, Depth);
10361 }
10362 
10364  if (const auto *Set = dyn_cast<SCEVUnionPredicate>(N)) {
10365  for (auto Pred : Set->Preds)
10366  add(Pred);
10367  return;
10368  }
10369 
10370  if (implies(N))
10371  return;
10372 
10373  const SCEV *Key = N->getExpr();
10374  assert(Key && "Only SCEVUnionPredicate doesn't have an "
10375  " associated expression!");
10376 
10377  SCEVToPreds[Key].push_back(N);
10378  Preds.push_back(N);
10379 }
10380 
10382  Loop &L)
10383  : SE(SE), L(L), Generation(0), BackedgeCount(nullptr) {}
10384 
10386  const SCEV *Expr = SE.getSCEV(V);
10387  RewriteEntry &Entry = RewriteMap[Expr];
10388 
10389  // If we already have an entry and the version matches, return it.
10390  if (Entry.second && Generation == Entry.first)
10391  return Entry.second;
10392 
10393  // We found an entry but it's stale. Rewrite the stale entry
10394  // according to the current predicate.
10395  if (Entry.second)
10396  Expr = Entry.second;
10397 
10398  const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, &L, Preds);
10399  Entry = {Generation, NewSCEV};
10400 
10401  return NewSCEV;
10402 }
10403 
10405  if (!BackedgeCount) {
10406  SCEVUnionPredicate BackedgePred;
10407  BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, BackedgePred);
10408  addPredicate(BackedgePred);
10409  }
10410  return BackedgeCount;
10411 }
10412 
10414  if (Preds.implies(&Pred))
10415  return;
10416  Preds.add(&Pred);
10417  updateGeneration();
10418 }
10419 
10421  return Preds;
10422 }
10423 
10424 void PredicatedScalarEvolution::updateGeneration() {
10425  // If the generation number wrapped recompute everything.
10426  if (++Generation == 0) {
10427  for (auto &II : RewriteMap) {
10428  const SCEV *Rewritten = II.second.second;
10429  II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, &L, Preds)};
10430  }
10431  }
10432 }
10433 
10436  const SCEV *Expr = getSCEV(V);
10437  const auto *AR = cast<SCEVAddRecExpr>(Expr);
10438 
10439  auto ImpliedFlags = SCEVWrapPredicate::getImpliedFlags(AR, SE);
10440 
10441  // Clear the statically implied flags.
10442  Flags = SCEVWrapPredicate::clearFlags(Flags, ImpliedFlags);
10443  addPredicate(*SE.getWrapPredicate(AR, Flags));
10444 
10445  auto II = FlagsMap.insert({V, Flags});
10446  if (!II.second)
10447  II.first->second = SCEVWrapPredicate::setFlags(Flags, II.first->second);
10448 }
10449 
10452  const SCEV *Expr = getSCEV(V);
10453  const auto *AR = cast<SCEVAddRecExpr>(Expr);
10454 
10456  Flags, SCEVWrapPredicate::getImpliedFlags(AR, SE));
10457 
10458  auto II = FlagsMap.find(V);
10459 
10460  if (II != FlagsMap.end())
10461  Flags = SCEVWrapPredicate::clearFlags(Flags, II->second);
10462 
10463  return Flags == SCEVWrapPredicate::IncrementAnyWrap;
10464 }
10465 
10467  const SCEV *Expr = this->getSCEV(V);
10469  auto *New = SE.convertSCEVToAddRecWithPredicates(Expr, &L, NewPreds);
10470 
10471  if (!New)
10472  return nullptr;
10473 
10474  for (auto *P : NewPreds)
10475  Preds.add(P);
10476 
10477  updateGeneration();
10478  RewriteMap[SE.getSCEV(V)] = {Generation, New};
10479  return New;
10480 }
10481 
10484  : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds),
10485  Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) {
10486  for (const auto &I : Init.FlagsMap)
10487  FlagsMap.insert(I);
10488 }
10489 
10491  // For each block.
10492  for (auto *BB : L.getBlocks())
10493  for (auto &I : *BB) {
10494  if (!SE.isSCEVable(I.getType()))
10495  continue;
10496 
10497  auto *Expr = SE.getSCEV(&I);
10498  auto II = RewriteMap.find(Expr);
10499 
10500  if (II == RewriteMap.end())
10501  continue;
10502 
10503  // Don't print things that are not interesting.
10504  if (II->second.second == Expr)
10505  continue;
10506 
10507  OS.indent(Depth) << "[PSE]" << I << ":\n";
10508  OS.indent(Depth + 2) << *Expr << "\n";
10509  OS.indent(Depth + 2) << "--> " << *II->second.second << "\n";
10510  }
10511 }
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
MachineLoop * L
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type (if unknown returns 0).
const SCEV * getTruncateOrNoop(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const NoneType None
Definition: None.h:23
const Use & getOperandUse(unsigned i) const
Definition: User.h:158
virtual const SCEV * getExpr() const =0
Returns the SCEV to which this predicate applies, or nullptr if this is a SCEVUnionPredicate.
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
static bool containsParameters(SmallVectorImpl< const SCEV * > &Terms)
void AddPointer(const void *Ptr)
Add* - Add various data types to Bit data.
Definition: FoldingSet.cpp:52
IncrementWrapFlags getFlags() const
Returns the set assumed no overflow flags.
APInt multiplicativeInverse(const APInt &modulo) const
Computes the multiplicative inverse of this APInt for a given modulo.
Definition: APInt.cpp:1354
void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
Type * getSourceElementType() const
Definition: Operator.cpp:9
APInt getSignedMin() const
Return the smallest signed value contained in the ConstantRange.
This routine provides some synthesis utilities to produce sequences of values.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
static cl::opt< bool > VerifySCEV("verify-scev", cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"))
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:513
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:177
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:64
void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Proves that V doesn't overflow by adding SCEV predicate.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:870
The SCEV properly dominates the block.
bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Trigger the invalidation of some other analysis pass if not already handled and return whether it was...
Definition: PassManager.h:543
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
BasicBlock * getUniquePredecessor()
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:239
static const SCEV * SolveLinEquationWithOverflow(const APInt &A, const APInt &B, ScalarEvolution &SE)
Finds the minimum unsigned root of the following equation:
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
const SCEV * getGEPExpr(GEPOperator *GEP, const SmallVectorImpl< const SCEV * > &IndexExprs)
Returns an expression for a GEP.
SCEVCastExpr(const FoldingSetNodeIDRef ID, unsigned SCEVTy, const SCEV *op, Type *ty)
LLVMContext & Context
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1309
bool isOne() const
Return true if the expression is a constant one.
const SCEV * getExitCount(Loop *L, BasicBlock *ExitingBlock)
Get the expression for the number of loop iterations for which this loop is guaranteed not to exit vi...
const SCEV * getConstant(ConstantInt *V)
STATISTIC(NumFunctions,"Total number of functions")
bool hasValue() const
Definition: Optional.h:125
static const SCEV * getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty, ScalarEvolution *SE)
size_t i
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:162
ConstantRange sextOrTrunc(uint32_t BitWidth) const
Make this range have the bit width given by BitWidth.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:450
APInt GreatestCommonDivisor(const APInt &Val1, const APInt &Val2)
Compute GCD of two APInt values.
Definition: APInt.cpp:801
static const SCEV * removeConstantFactors(ScalarEvolution &SE, const SCEV *T)
bool canConstantFoldCallTo(const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function...
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:52
LLVMContext & getContext() const
static void PushDefUseChildren(Instruction *I, SmallVectorImpl< Instruction * > &Worklist)
Push users of the given Instruction onto the given Worklist.
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(std::begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:776
SCEVUnionPredicate()
Union predicates don't get cached so create a dummy set ID for it.
bool isZero() const
Return true if the expression is a constant zero.
static bool BrPHIToSelect(DominatorTree &DT, BranchInst *BI, PHINode *Merge, Value *&C, Value *&LHS, Value *&RHS)
virtual bool implies(const SCEVPredicate *N) const =0
Returns true if this predicate implies N.
void setBit(unsigned bitPosition)
Set a given bit to 1.
Definition: APInt.cpp:553
bool isKnownNotFullPoison(const Instruction *PoisonI)
Return true if this function can prove that if PoisonI is executed and yields a full-poison value (al...
unsigned getNumOperands() const
Definition: User.h:167
static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow)
Compute the result of "n choose k", the binomial coefficient.
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
The main scalar evolution driver.
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:329
This class represents a function call, abstracting a target machine's calling convention.
This file contains the declarations for metadata subclasses.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:536
IncrementWrapFlags
Similar to SCEV::NoWrapFlags, but with slightly different semantics for FlagNUSW. ...
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:984
An immutable pass that tracks lazily created AssumptionCache objects.
scalar Scalar Evolution false
bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
static const SCEV * getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, ScalarEvolution *SE)
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
unsigned less or equal
Definition: InstrTypes.h:906
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass...
unsigned less than
Definition: InstrTypes.h:905
bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
Definition: PatternMatch.h:536
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:450
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, bool InBounds=false, Optional< unsigned > InRangeIndex=None, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition: Constants.h:1126
static LLVM_NODISCARD SCEV::NoWrapFlags setFlags(SCEV::NoWrapFlags Flags, SCEV::NoWrapFlags OnFlags)
std::vector< LoopT * >::const_reverse_iterator reverse_iterator
Definition: LoopInfo.h:566
bool isSigned() const
Determine if this instruction is using a signed comparison.
Definition: InstrTypes.h:1027
A cache of .assume calls within a function.
static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Definition: APInt.cpp:1965
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block...
static void PushLoopPHIs(const Loop *L, SmallVectorImpl< Instruction * > &Worklist)
Push PHI nodes in the header of the given loop onto the given Worklist.
uint32_t GetMinTrailingZeros(const SCEV *S)
Determine the minimum number of zero bits that S is guaranteed to end in (at every loop iteration)...
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
LLVM_NODISCARD detail::scope_exit< typename std::decay< Callable >::type > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:47
APInt getSignedMax() const
Return the largest signed value contained in the ConstantRange.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop...
Definition: LoopInfo.h:160
This class represents a truncation of an integer value to a smaller integer value.
bool isMask(unsigned numBits, const APInt &APIVal)
Definition: APInt.h:1812
INITIALIZE_PASS_BEGIN(ScalarEvolutionWrapperPass,"scalar-evolution","Scalar Evolution Analysis", false, true) INITIALIZE_PASS_END(ScalarEvolutionWrapperPass
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:736
static ConstantInt * EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, ScalarEvolution &SE)
LoopT * getParentLoop() const
Definition: LoopInfo.h:103
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:100
static const SCEV * getSignedOverflowLimitForStep(const SCEV *Step, ICmpInst::Predicate *Pred, ScalarEvolution *SE)
Metadata node.
Definition: Metadata.h:830
bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether the backedge of the loop is protected by a conditional between LHS and RHS...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:189
An instruction for reading from memory.
Definition: Instructions.h:164
static std::pair< const SCEV *, ConstantInt * > splitAddExpr(const SCEV *S)
Try to split a SCEVAddExpr into a pair of {SCEV, ConstantInt}.
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:905
Hexagon Common GEP
#define R2(n)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:575
static ConstantRange makeAllowedICmpRegion(CmpInst::Predicate Pred, const ConstantRange &Other)
Produce the smallest range such that all values that may satisfy the given predicate with any value c...
An object of this class is returned by queries that could not be answered.
#define op(i)
void reserve(size_type N)
Definition: SmallVector.h:377
static bool HasSameValue(const SCEV *A, const SCEV *B)
SCEV structural equivalence is usually sufficient for testing whether two expressions are equal...
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
op_iterator idx_end()
Definition: Operator.h:398
const std::vector< BlockT * > & getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:139
const SCEV * getUMaxFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS)
Promote the operands to the wider of the types using zero-extension, and then perform a umax operatio...
const SCEV * getExpr() const override
Returns the SCEV to which this predicate applies, or nullptr if this is a SCEVUnionPredicate.
static Optional< ConstantRange > GetRangeFromMetadata(Value *V)
Helper method to assign a range to V from metadata present in the IR.
BlockT * getHeader() const
Definition: LoopInfo.h:102
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:431
NodeT * getIDom(NodeT *BB) const
ConstantInt * findCaseDest(BasicBlock *BB)
Finds the unique case value for a given successor.
This is the base class for unary cast operator classes.
bool propagatesFullPoison(const Instruction *I)
Return true if this function can prove that I is guaranteed to yield full-poison (all bits poison) if...
ConstantRange smax(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a signed maximum of a value in thi...
bool isOffsetOf(Type *&STy, Constant *&FieldNo) const
const SCEV * getStart() const
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:157
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:345
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:228
static Constant * getAdd(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2132
bool isKnownNonPositive(const SCEV *S)
Test if the given expression is known to be non-positive.
Predicate getSignedPredicate()
For example, ULT->SLT, ULE->SLE, UGT->SGT, UGE->SGE, SLT->Failed assert.
Definition: InstrTypes.h:1046
static void getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE)
getLoopBackedgeTakenCounts - Helper method for verifyAnalysis.
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:324
The SCEV is loop-invariant.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:172
static LLVM_NODISCARD SCEVWrapPredicate::IncrementWrapFlags setFlags(SCEVWrapPredicate::IncrementWrapFlags Flags, SCEVWrapPredicate::IncrementWrapFlags OnFlags)
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1865
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:41
void setValPtr(Value *P)
Definition: ValueHandle.h:340
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
void computeAccessFunctions(const SCEV *Expr, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< const SCEV * > &Sizes)
Return in Subscripts the access functions for each dimension in Sizes (third step of delinearization)...
bool isUnconditional() const
bool isAlwaysTrue() const override
Returns true if the predicate is always true.
ArrayRef< const SCEVPredicate * > getPredicatesForExpr(const SCEV *Expr)
Returns a reference to a vector containing all predicates which apply to Expr.
static Constant * getIntegerCast(Constant *C, Type *Ty, bool isSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:1535
const SCEVPredicate * getWrapPredicate(const SCEVAddRecExpr *AR, SCEVWrapPredicate::IncrementWrapFlags AddedFlags)
bool isAlignOf(Type *&AllocTy) const
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:555
bool isIdenticalTo(const Instruction *I) const
Return true if the specified instruction is exactly identical to the current one. ...
ConstantRange truncate(uint32_t BitWidth) const
Return a new range in the specified integer type, which must be strictly smaller than the current typ...
struct fuzzer::@269 Flags
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:143
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
Class to represent struct types.
Definition: DerivedTypes.h:199
void verifyAnalysis() const override
verifyAnalysis() - This member can be implemented by a analysis pass to check state of analysis infor...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.cpp:1122
const SCEV *const * op_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
static GCRegistry::Add< StatepointGC > D("statepoint-example","an example strategy for statepoint")
static Constant * AddOne(Constant *C)
Add one to a Constant.
bool Eq(const uint8_t *Data, size_t Size, const char *Str)
Definition: StrcmpTest.cpp:11
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:36
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
static int CompareValueComplexity(SmallSet< std::pair< Value *, Value * >, 8 > &EqCache, const LoopInfo *const LI, Value *LV, Value *RV, unsigned Depth)
Compare the two values LV and RV in terms of their "complexity" where "complexity" is a partial (and ...
void AddInteger(signed I)
Definition: FoldingSet.cpp:61
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1573
This node is the base class for n'ary commutative operators.
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
Definition: APInt.h:377
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:806
ConstantRange signExtend(uint32_t BitWidth) const
Return a new range in the specified integer type, which must be strictly larger than the current type...
op_iterator op_begin() const
void setNoWrapFlags(NoWrapFlags Flags)
Set flags for a non-recurrence without clearing previously set flags.
This node represents multiplication of some number of SCEVs.
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
ConstantRange multiply(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a multiplication of a value in thi...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
APInt lshr(const APInt &LHS, unsigned shiftAmt)
Logical right-shift function.
Definition: APInt.h:1892
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
static const SCEV * BinomialCoefficient(const SCEV *It, unsigned K, ScalarEvolution &SE, Type *ResultTy)
Compute BC(It, K). The result has width W. Assume, K > 0.
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:241
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:850
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:1788
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:1783
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1587
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS...
reverse_iterator rbegin() const
Definition: LoopInfo.h:134
static bool isPrivateLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:297
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:873
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
#define F(x, y, z)
Definition: MD5.cpp:51
const SCEV * getOffsetOfExpr(Type *IntTy, StructType *STy, unsigned FieldNo)
Return an expression for offsetof on the given field with type IntTy.
unsigned getSmallConstantMaxTripCount(Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
This node represents a polynomial recurrence on the trip count of the specified loop.
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
bool implies(const SCEVPredicate *N) const override
Implementation of the SCEVPredicate interface.
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:3473
#define T
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:121
APInt udiv(const APInt &LHS, const APInt &RHS)
Unsigned division function for APInt.
Definition: APInt.h:1911
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:269
bool sgt(const APInt &RHS) const
Signed greather than comparison.
Definition: APInt.h:1101
BasicBlock * getSuccessor(unsigned i) const
const SCEV * getSizeOfExpr(Type *IntTy, Type *AllocTy)
Return an expression for sizeof AllocTy that is type IntTy.
const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
Base class for the actual dominator tree node.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1279
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:75
APInt sdiv(const APInt &LHS, const APInt &RHS)
Signed division function for APInt.
Definition: APInt.h:1906
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
An instruction for storing to memory.
Definition: Instructions.h:300
static cl::opt< unsigned > MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, cl::desc("Maximum number of iterations SCEV will ""symbolically execute a constant ""derived loop"), cl::init(100))
bool hasDefinitiveInitializer() const
hasDefinitiveInitializer - Whether the global variable has an initializer, and any other instances of...
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:210
SelectClass_match< Cond, LHS, RHS > m_Select(const Cond &C, const LHS &L, const RHS &R)
Definition: PatternMatch.h:758
static Constant * getUDiv(Constant *C1, Constant *C2, bool isExact=false)
Definition: Constants.cpp:2165
APInt ashr(const APInt &LHS, unsigned shiftAmt)
Arithmetic right-shift function.
Definition: APInt.h:1885
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:135
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:96
int Switch(int a)
Definition: Switch2Test.cpp:11
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
Class to represent pointers.
Definition: DerivedTypes.h:443
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.cpp:501
static ConstantRange makeSatisfyingICmpRegion(CmpInst::Predicate Pred, const ConstantRange &Other)
Produce the largest range such that all values in the returned range satisfy the given predicate with...
static cl::opt< unsigned > MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4))
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
unsigned getNumIncomingValues() const
Return the number of incoming edges.
bool implies(const SCEVPredicate *N) const override
Returns true if this predicate implies N.
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:196
bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS. ...
static const SCEV * MatchNotExpr(const SCEV *Expr)
If Expr computes ~A, return A else return nullptr.
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1695
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:254
FoldingSetNodeID - This class is used to gather all the unique data bits of a node.
Definition: FoldingSet.h:316
ValTy * getReturnedArgOperand() const
Definition: CallSite.h:296
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:830
const SCEV * getCouldNotCompute()
Visit all nodes in the expression tree using worklist traversal.
succ_range successors()
Definition: InstrTypes.h:280
ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
#define P(N)
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Definition: PatternMatch.h:530
bool erase(const KeyT &Val)
Definition: DenseMap.h:243
static bool isInternalLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:294
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:52
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:107
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt...
Definition: PatternMatch.h:180
static cl::opt< bool > VerifySCEVMap("verify-scev-maps", cl::desc("Verify no dangling value in ScalarEvolution's ""ExprValueMap (slow)"))
ConstantRange intersectWith(const ConstantRange &CR) const
Return the range that results from the intersection of this range with another range.
This class defines a simple visitor class that may be used for various SCEV analysis purposes...
bool isSizeOf(Type *&AllocTy) const
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:916
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs...ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:653
bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type. ...
ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
This class represents a binary unsigned division operation.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
Type * getType() const
Return the LLVM type of this SCEV expression.
LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L)
Return the "disposition" of the given SCEV with respect to the given loop.
static void GroupByComplexity(SmallVectorImpl< const SCEV * > &Ops, LoopInfo *LI)
Given a list of SCEV objects, order them by their complexity, and group objects of the same complexit...
Conditional or Unconditional Branch instruction.
bool containsAddRecurrence(const SCEV *S)
Return true if the SCEV is a scAddRecExpr or it contains scAddRecExpr.
bool sge(const APInt &RHS) const
Signed greather or equal comparison.
Definition: APInt.h:1135
static bool IsMinConsistingOf(ScalarEvolution &SE, const SCEV *MaybeMinExpr, const SCEV *Candidate)
Is MaybeMinExpr an SMin or UMin of Candidate and some other values?
bool isEquality() const
Return true if this predicate is either EQ or NE.
static int CompareSCEVComplexity(SmallSet< std::pair< const SCEV *, const SCEV * >, 8 > &EqCacheSCEV, const LoopInfo *const LI, const SCEV *LHS, const SCEV *RHS, unsigned Depth=0)
This is an important base class in LLVM.
Definition: Constant.h:42
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition: APInt.h:358
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:115
const SCEV * getOperand(unsigned i) const
void print(raw_ostream &OS, unsigned Depth=0) const override
Prints a textual representation of this predicate with an indentation of Depth.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
This file contains the declarations for the subclasses of Constant, which represent the different fla...
LLVM_ATTRIBUTE_RETURNS_NONNULL LLVM_ATTRIBUTE_RETURNS_NOALIAS void * Allocate(size_t Size, size_t Alignment)
Allocate space at the specified alignment.
Definition: Allocator.h:212
static unsigned getConstantTripCount(const SCEVConstant *ExitCount)
SCEVWrapPredicate(const FoldingSetNodeIDRef ID, const SCEVAddRecExpr *AR, IncrementWrapFlags Flags)
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
Definition: APInt.h:1947
const SCEV * getExpr() const override
Implementation of the SCEVPredicate interface.
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
APInt Xor(const APInt &LHS, const APInt &RHS)
Bitwise XOR function for APInt.
Definition: APInt.h:1952
static cl::opt< unsigned > MulOpsInlineThreshold("scev-mulops-inline-threshold", cl::Hidden, cl::desc("Threshold for inlining multiplication operands into a SCEV"), cl::init(1000))
void add(const SCEVPredicate *N)
Adds a predicate to this union.
bool isZeroValue() const
Return true if the value is negative zero or null value.
Definition: Constants.cpp:70
void print(raw_ostream &OS, unsigned Depth) const override
Prints a textual representation of this predicate with an indentation of Depth.
static bool canConstantEvolve(Instruction *I, const Loop *L)
Determine whether this instruction can constant evolve within this loop assuming its operands can all...
void visitAll(const SCEV *Root)
scalar evolution
const SCEV * getSMaxExpr(const SCEV *LHS, const SCEV *RHS)
bool isOverflowIntrinsicNoWrap(const IntrinsicInst *II, const DominatorTree &DT)
Returns true if the arithmetic part of the II 's result is used only along the paths control dependen...
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:484
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:939
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
Represent the analysis usage information of a pass.
ConstantRange udiv(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned division of a value in...
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
Definition: PatternMatch.h:524
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:109
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:743
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: PassManager.h:250
uint32_t Offset
bool isFalseWhenEqual() const
This is just a convenience.
Definition: InstrTypes.h:1058
bool hasOperand(const SCEV *S, const SCEV *Op) const
Test whether the given SCEV has Op as a direct or indirect operand.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
This instruction compares its operands according to the predicate given to the constructor.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
const SCEV * getMaxBackedgeTakenCount(const Loop *L)
Similar to getBackedgeTakenCount, except return the least SCEV value that is known never to be less t...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:880
Utility class for integer arithmetic operators which may exhibit overflow - Add, Sub, and Mul.
Definition: Operator.h:75
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1255
static const unsigned End
const SCEV * rewriteUsingPredicate(const SCEV *S, const Loop *L, SCEVUnionPredicate &A)
Re-writes the SCEV according to the Predicates in A.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
const SCEVAddRecExpr * getPostIncExpr(ScalarEvolution &SE) const
Return an expression representing the value of this expression one iteration of the loop ahead...
const SCEV * getPredicatedBackedgeTakenCount(const Loop *L, SCEVUnionPredicate &Predicates)
Similar to getBackedgeTakenCount, except it will add a set of SCEV predicates to Predicates that are ...
Value * getPointerOperand()
Definition: Operator.h:401
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:52
bool isMaxValue() const
Determine if this is the largest unsigned value.
Definition: APInt.h:352
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition: APInt.cpp:1854
Value * getOperand(unsigned i) const
Definition: User.h:145
op_range operands()
Definition: User.h:213
unsigned getSmallConstantTripCount(Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:1948
iterator_range< block_iterator > blocks()
void print(raw_ostream &OS) const
Print out the bounds to a stream.
Class to represent integer types.
Definition: DerivedTypes.h:39
static LLVM_NODISCARD SCEV::NoWrapFlags clearFlags(SCEV::NoWrapFlags Flags, SCEV::NoWrapFlags OffFlags)
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:960
ConstantRange zeroExtend(uint32_t BitWidth) const
Return a new range in the specified integer type, which must be strictly larger than the current type...
bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
static Constant * getNot(Constant *C)
Definition: Constants.cpp:2126
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
MutableArrayRef< WeakVH > assumptions()
Access the list of assumption handles currently tracked for this function.
void setNoWrapFlags(NoWrapFlags Flags)
Set flags for a recurrence without clearing any previously set flags.
Constant * ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, const DataLayout &DL)
ConstantFoldLoadFromConstPtr - Return the value that a load from C would produce if it is constant an...
static Constant * getAllOnesValue(Type *Ty)
Get the all ones value.
Definition: Constants.cpp:249
const SCEV * getLHS() const
const APInt & getAPInt() const
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:392
void findArrayDimensions(SmallVectorImpl< const SCEV * > &Terms, SmallVectorImpl< const SCEV * > &Sizes, const SCEV *ElementSize) const
Compute the array dimensions Sizes from the set of Terms extracted from the memory access function of...
bool isEmptySet() const
Return true if this set contains no members.
const SCEV * getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS)
Promote the operands to the wider of the types using zero-extension, and then perform a umin operatio...
static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S, BasicBlock *BB)
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:213
iterator erase(const_iterator CI)
Definition: SmallVector.h:431
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:113
bool isNonConstantNegative() const
Return true if the specified scev is negated, but not a constant.
const SCEVAddRecExpr * convertSCEVToAddRecWithPredicates(const SCEV *S, const Loop *L, SmallPtrSetImpl< const SCEVPredicate * > &Preds)
Tries to convert the S expression to an AddRec expression, adding additional predicates to Preds as r...
static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow)
void print(raw_ostream &OS, unsigned Depth) const
Print the SCEV mappings done by the Predicated Scalar Evolution.
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:213
const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldCompareInstOperands - Attempt to constant fold a compare instruction (icmp/fcmp) with the...
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:391
ConstantRange add(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an addition of a value in this ran...
op_iterator idx_begin()
Definition: Operator.h:396
std::string & str()
Flushes the stream contents to the target string and returns the string's reference.
Definition: raw_ostream.h:479
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const SCEV * getRHS() const
const SCEV * getUDivExactExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
signed greater than
Definition: InstrTypes.h:907
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
The SCEV is loop-variant (unknown).
This class represents an assumption made using SCEV expressions which can be checked at run-time...
static Constant * EvaluateExpression(Value *V, const Loop *L, DenseMap< Instruction *, Constant * > &Vals, const DataLayout &DL, const TargetLibraryInfo *TLI)
EvaluateExpression - Given an expression that passes the getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node in the loop has the value PHIVal.
void dump() const
This method is used for debugging.
static cl::opt< unsigned > MaxSCEVCompareDepth("scalar-evolution-max-scev-compare-depth", cl::Hidden, cl::desc("Maximum depth of recursive SCEV complexity comparisons"), cl::init(32))
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:1793
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:218
bool isConditional() const
const SCEV * getSMinExpr(const SCEV *LHS, const SCEV *RHS)
RetVal visitCouldNotCompute(const SCEVCouldNotCompute *S)
bool ugt(const APInt &RHS) const
Unsigned greather than comparison.
Definition: APInt.h:1083
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.cpp:703
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:709
A function analysis which provides an AssumptionCache.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static int numberOfTerms(const SCEV *S)
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:234
bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we've proved that V doesn't wrap by means of a SCEV predicate.
uint64_t * Vals
Iterator for intrusive lists based on ilist_node.
static void Merge(const std::string &Input, const std::vector< std::string > Result, size_t NumNewFeatures)
static PHINode * getConstantEvolvingPHI(Value *V, const Loop *L)
getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node in the loop that V is deri...
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2)
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the generic address space (address sp...
Definition: DerivedTypes.h:458
This is the shared class of boolean and integer constants.
Definition: Constants.h:88
bool isAlwaysTrue() const override
Implementation of the SCEVPredicate interface.
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
auto find(R &&Range, const T &Val) -> decltype(std::begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:757
unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Return the number of times the sign bit of the register is replicated into the other bits...
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.cpp:533
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
void initializeScalarEvolutionWrapperPassPass(PassRegistry &)
reverse_iterator rend() const
Definition: LoopInfo.h:135
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty)
static LLVM_NODISCARD SCEVWrapPredicate::IncrementWrapFlags clearFlags(SCEVWrapPredicate::IncrementWrapFlags Flags, SCEVWrapPredicate::IncrementWrapFlags OffFlags)
Convenient IncrementWrapFlags manipulation methods.
const SCEV * getTruncateOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
void delinearize(const SCEV *Expr, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< const SCEV * > &Sizes, const SCEV *ElementSize)
Split this SCEVAddRecExpr into two vectors of SCEVs representing the subscripts and sizes of an array...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
bool isAllOnesValue() const
Return true if the expression is a constant all-ones value.
This is a utility class that provides an abstraction for the common functionality between Instruction...
Definition: Operator.h:33
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
Provides information about what library functions are available for the current target.
The SCEV dominates the block.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:175
unsigned getSmallConstantTripMultiple(Loop *L)
Returns the largest constant divisor of the trip count of the loop if it is a single-exit loop and we...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:27
This class represents a range of values.
Definition: ConstantRange.h:45
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:218
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
signed less than
Definition: InstrTypes.h:909
const APInt & getLower() const
Return the lower value for this range.
INITIALIZE_PASS(HexagonGenMux,"hexagon-mux","Hexagon generate mux instructions", false, false) void HexagonGenMux I isValid()
bool isTrueWhenEqual() const
This is just a convenience.
Definition: InstrTypes.h:1052
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
static LLVM_NODISCARD SCEVWrapPredicate::IncrementWrapFlags getImpliedFlags(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Returns the set of SCEVWrapPredicate no wrap flags implied by a SCEVAddRecExpr.
CHAIN = SC CHAIN, Imm128 - System call.
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:337
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:1000
ScalarEvolution(Function &F, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI)
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition: DenseMap.h:62
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition: APInt.h:438
static Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1559
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:558
iterator_range< detail::value_sequence_iterator< ValueT > > seq(ValueT Begin, ValueT End)
Definition: Sequence.h:71
ConstantInt * getValue() const
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
#define NC
Definition: regutils.h:42
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void print(raw_ostream &OS, const Module *=nullptr) const override
print - Print out the internal state of the pass.
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:122
scalar Scalar Evolution Analysis
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
static Constant * getOtherIncomingValue(PHINode *PN, BasicBlock *BB)
signed less or equal
Definition: InstrTypes.h:910
LoopDisposition
An enum describing the relationship between a SCEV and a loop.
Class for arbitrary precision integers.
Definition: APInt.h:77
This node represents an addition of some number of SCEVs.
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty)
SetVector< ValueOffsetPair > * getSCEVValues(const SCEV *S)
Return the Value set from which the SCEV expr is generated.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:195
void setPreservesAll()
Set by analyses that do not transform their input at all.
This class represents a signed maximum selection.
iterator_range< user_iterator > users()
Definition: Value.h:370
BasicBlock * getSinglePredecessor()
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:226
static const SCEV * getUnsignedOverflowLimitForStep(const SCEV *Step, ICmpInst::Predicate *Pred, ScalarEvolution *SE)
DenseMap< const Loop *, std::string > VerifyMap
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1452
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Get a canonical add expression, or something simpler if possible.
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:1798
void visitAll(const SCEV *Root, SV &Visitor)
Use SCEVTraversal to visit all nodes in the given expression tree.
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:191
Value * getCondition() const
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:464
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:426
A utility class that uses RAII to save and restore the value of a variable.
bool isMinValue() const
Determine if this is the smallest unsigned value.
Definition: APInt.h:366
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1942
SCEVPredicate(const SCEVPredicate &)=default
static Constant * BuildConstantFromSCEV(const SCEV *V)
This builds up a Constant using the ConstantExpr interface.
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:207
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:49
Analysis pass that exposes the ScalarEvolution for a function.
ConstantRange umax(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned maximum of a value in ...
This class represents a zero extension of a small integer value to a larger integer value...
Virtual Register Rewriter
Definition: VirtRegMap.cpp:194
static Constant * getNeg(Constant *C, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2113
static Optional< std::pair< const SCEVConstant *, const SCEVConstant * > > SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE)
Find the roots of the quadratic equation for the given quadratic chrec {L,+,M,+,N}.
Basic Alias true
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
static void replaceSubString(std::string &Str, StringRef From, StringRef To)
replaceSubString - Replaces all occurrences of From in Str with To.
const SCEVUnionPredicate & getUnionPredicate() const
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:528
void collectParametricTerms(const SCEV *Expr, SmallVectorImpl< const SCEV * > &Terms)
Collect parametric terms occurring in step expressions (first step of delinearization).
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:119
Value * getCondition() const
void emplace_back(ArgTypes &&...Args)
Definition: SmallVector.h:635
void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Determine whether the sign bit is known to be zero or one.
BasicBlock * getDefaultDest() const
unsigned size() const
Definition: DenseMap.h:83
The SCEV does not dominate the block.
FoldingSetNodeIDRef - This class describes a reference to an interned FoldingSetNodeID, which can be a useful to store node id data rather than using plain FoldingSetNodeIDs, since the 32-element SmallVector is often much larger than necessary, and the possibility of heap allocation means it requires a non-trivial destructor call.
Definition: FoldingSet.h:287
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:372
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:142
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:169
bool isInBounds() const
Test whether this is an inbounds GEP, as defined by LangRef.html.
Definition: Operator.h:385
iterator begin()
Definition: DenseMap.h:65
unsigned greater or equal
Definition: InstrTypes.h:904
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:368
static PHINode * getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, DenseMap< Instruction *, PHINode * > &PHIMap)
getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by recursing through each instructi...
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:124
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
iterator end()
Definition: DenseMap.h:69
This class represents a sign extension of a small integer value to a larger integer value...
bool isSignBit() const
Check if the APInt's value is returned by getSignBit.
Definition: APInt.h:400
BlockDisposition
An enum describing the relationship between a SCEV and a basic block.
bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:606
BlockT * getLoopPredecessor() const
If the given loop's header has exactly one unique predecessor outside the loop, return it...
Definition: LoopInfoImpl.h:131
This class represents an unsigned maximum selection.
static LLVM_NODISCARD SCEV::NoWrapFlags maskFlags(SCEV::NoWrapFlags Flags, int Mask)
Convenient NoWrapFlags manipulation that hides enum casts and is visible in the ScalarEvolution name ...
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
static bool CollectAddOperandsWithScales(DenseMap< const SCEV *, APInt > &M, SmallVectorImpl< const SCEV * > &NewOps, APInt &AccumulatedConstant, const SCEV *const *Ops, size_t NumOperands, const APInt &Scale, ScalarEvolution &SE)
Process the given Ops list, which is a list of operands to be added under the given scale...
static volatile int Zero
AnalysisUsage & addRequiredTransitive()
const Loop * getLoop() const
bool isGuaranteedToExecuteForEveryIteration(const Instruction *I, const Loop *L)
Return true if this function can prove that the instruction I is executed for every iteration of the ...
void print(raw_ostream &OS) const
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:807
static StringRef loopDispositionToStr(ScalarEvolution::LoopDisposition LD)
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:525
iterator find_as(const LookupKeyT &Val)
Alternate version of find() which allows a different, and possibly less expensive, key type.
Definition: DenseMap.h:146
Analysis pass providing the TargetLibraryInfo.
iterator_range< df_iterator< T > > depth_first(const T &G)
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
const unsigned Kind
const SCEVPredicate * getEqualPredicate(const SCEVUnknown *LHS, const SCEVConstant *RHS)
Multiway switch.
const SCEV * getBackedgeTakenCount(const Loop *L)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
bool isUnsigned() const
Determine if this instruction is using an unsigned comparison.
Definition: InstrTypes.h:1033
const APInt & getUpper() const
Return the upper value for this range.
bool isSingleEdge() const
Definition: Dominators.cpp:40
static ConstantRange makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp, const ConstantRange &Other, unsigned NoWrapKind)
Return the largest range containing all X such that "X BinOpC Y" is guaranteed not to wrap (overflow)...
bool SimplifyICmpOperands(ICmpInst::Predicate &Pred, const SCEV *&LHS, const SCEV *&RHS, unsigned Depth=0)
Simplify LHS and RHS in a comparison with predicate Pred.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This templated class represents "all analyses that operate over \<a particular IR unit\>" (e...
Definition: PassManager.h:361
user_iterator user_begin()
Definition: Value.h:346
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:441
This class represents a composition of other SCEV predicates, and is the class that most clients will...
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands...
unsigned getSCEVType() const
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS)
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:463
static Optional< BinaryOp > MatchBinaryOp(Value *V, DominatorTree &DT)
Try to map V into a BinaryOp, and return None on failure.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
void print(raw_ostream &OS) const
Print out the internal representation of this scalar to the specified stream.
const SCEV * getNumIterationsInRange(const ConstantRange &Range, ScalarEvolution &SE) const
Return the number of iterations of this loop that produce values in the specified constant range...
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:537
LLVM Value Representation.
Definition: Value.h:71
succ_range successors(BasicBlock *BB)
Definition: IR/CFG.h:143
const SCEV * getExpr() const override
Returns the SCEV to which this predicate applies, or nullptr if this is a SCEVUnionPredicate.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
A vector that has set insertion semantics.
Definition: SetVector.h:41
void eraseValueFromMap(Value *V)
Erase Value from ValueExprMap and ExprValueMap.
static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, const Loop *L)
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:239
APInt shl(const APInt &LHS, unsigned shiftAmt)
Left-shift function.
Definition: APInt.h:1899
SCEVEqualPredicate(const FoldingSetNodeIDRef ID, const SCEVUnknown *LHS, const SCEVConstant *RHS)
ConstantRange zextOrTrunc(uint32_t BitWidth) const
Make this range have the bit width given by BitWidth.
This file provides utility classes that use RAII to save and restore values.
static cl::opt< unsigned > MaxValueCompareDepth("scalar-evolution-max-value-compare-depth", cl::Hidden, cl::desc("Maximum depth of recursive value complexity comparisons"), cl::init(2))
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:533
static bool containsConstantSomewhere(const SCEV *StartExpr)
Determine if any of the operands in this SCEV are a constant or if any of the add or multiply express...
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
bool implies(const SCEVPredicate *N) const override
Returns true if this predicate implies N.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:125
#define DEBUG(X)
Definition: Debug.h:100
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getMaxBackedgeTakenCount or z...
const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1343
const SCEV * getUnknown(Value *V)
FoldingSetNodeIDRef Intern(BumpPtrAllocator &Allocator) const
Intern - Copy this node's data to a memory region allocated from the given allocator and return a Fol...
Definition: FoldingSet.cpp:176
The SCEV varies predictably with the loop.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToValueMap &Map, bool InterpretConsts=false)
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:831
static bool findArrayDimensionsRec(ScalarEvolution &SE, SmallVectorImpl< const SCEV * > &Terms, SmallVectorImpl< const SCEV * > &Sizes)
bool dominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV dominate the specified basic block...
op_iterator op_end() const
Value handle with callbacks on RAUW and destruction.
Definition: ValueHandle.h:333
unsigned greater than
Definition: InstrTypes.h:903
static int sizeOfSCEV(const SCEV *S)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
inst_range instructions(Function *F)
Definition: InstIterator.h:132
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:980
A container for analyses that lazily runs them and caches their results.
Value * SimplifyInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr)
See if we can compute a simplified version of this instruction.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:217
BlockDisposition getBlockDisposition(const SCEV *S, const BasicBlock *BB)
Return the "disposition" of the given SCEV with respect to the given block.
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form...
Definition: LoopInfo.h:695
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml","ocaml 3.10-compatible collector")
static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr, const SCEV *Candidate)
Is MaybeMaxExpr an SMax or UMax of Candidate and some other values?
bool isAlwaysTrue() const override
Returns true if the predicate is always true.
const SCEV * getElementSize(Instruction *Inst)
Return the size of an element read or written by Inst.
static bool CanConstantFold(const Instruction *I)
Return true if we can constant fold an instruction of the specified type, assuming that all operands ...
Constant * ConstantFoldLoadThroughGEPIndices(Constant *C, ArrayRef< Constant * > Indices)
ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr indices (with an implied zero ...
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty)
int * Ptr
bool isLoopInvariantPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS, const SCEV *&InvariantRHS)
Return true if the result of the predicate LHS Pred RHS is loop invariant with respect to L...
APInt abs() const
Get the absolute value;.
Definition: APInt.h:1559
static bool containsUndefs(const SCEV *S)
bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop. ...
This node is a base class providing common functionality for n'ary operators.
static Constant * getMul(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2154
APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
const SCEV * getNoopOrAnyExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
This class represents an assumption made on an AddRec expression.
PredicatedScalarEvolution(ScalarEvolution &SE, Loop &L)
NoWrapFlags
NoWrapFlags are bitfield indices into SubclassData.
friend class SCEVCallbackVH
static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
static SCEV::NoWrapFlags StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type, const SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:479
const SCEV * getOperand() const
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:95
const SCEV * getNotSCEV(const SCEV *V)
Return the SCEV object corresponding to ~V.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Get a canonical multiply expression, or something simpler if possible.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count...
This class represents an assumption that two SCEV expressions are equal, and this can be checked at r...
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:64
const BasicBlock * getParent() const
Definition: Instruction.h:62
static bool classof(const SCEV *S)
Methods for support type inquiry through isa, cast, and dyn_cast:
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
This visitor recursively visits a SCEV expression and re-writes it.
bool isQuadratic() const
Return true if this represents an expression A + B*x + C*x^2 where A, B and C are loop invariant valu...
void print(raw_ostream &OS, unsigned Depth=0) const override
Prints a textual representation of this predicate with an indentation of Depth.
signed greater or equal
Definition: InstrTypes.h:908
This class represents a constant integer value.
const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
APInt getUnsignedMin() const
Return the smallest unsigned value contained in the ConstantRange.
user_iterator user_end()
Definition: Value.h:354
static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Is LHS Pred RHS true on the virtue of LHS or RHS being a Min or Max expression?