LLVM  mainline
ScalarEvolution.cpp
Go to the documentation of this file.
00001 //===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file contains the implementation of the scalar evolution analysis
00011 // engine, which is used primarily to analyze expressions involving induction
00012 // variables in loops.
00013 //
00014 // There are several aspects to this library.  First is the representation of
00015 // scalar expressions, which are represented as subclasses of the SCEV class.
00016 // These classes are used to represent certain types of subexpressions that we
00017 // can handle. We only create one SCEV of a particular shape, so
00018 // pointer-comparisons for equality are legal.
00019 //
00020 // One important aspect of the SCEV objects is that they are never cyclic, even
00021 // if there is a cycle in the dataflow for an expression (ie, a PHI node).  If
00022 // the PHI node is one of the idioms that we can represent (e.g., a polynomial
00023 // recurrence) then we represent it directly as a recurrence node, otherwise we
00024 // represent it as a SCEVUnknown node.
00025 //
00026 // In addition to being able to represent expressions of various types, we also
00027 // have folders that are used to build the *canonical* representation for a
00028 // particular expression.  These folders are capable of using a variety of
00029 // rewrite rules to simplify the expressions.
00030 //
00031 // Once the folders are defined, we can implement the more interesting
00032 // higher-level code, such as the code that recognizes PHI nodes of various
00033 // types, computes the execution count of a loop, etc.
00034 //
00035 // TODO: We should use these routines and value representations to implement
00036 // dependence analysis!
00037 //
00038 //===----------------------------------------------------------------------===//
00039 //
00040 // There are several good references for the techniques used in this analysis.
00041 //
00042 //  Chains of recurrences -- a method to expedite the evaluation
00043 //  of closed-form functions
00044 //  Olaf Bachmann, Paul S. Wang, Eugene V. Zima
00045 //
00046 //  On computational properties of chains of recurrences
00047 //  Eugene V. Zima
00048 //
00049 //  Symbolic Evaluation of Chains of Recurrences for Loop Optimization
00050 //  Robert A. van Engelen
00051 //
00052 //  Efficient Symbolic Analysis for Optimizing Compilers
00053 //  Robert A. van Engelen
00054 //
00055 //  Using the chains of recurrences algebra for data dependence testing and
00056 //  induction variable substitution
00057 //  MS Thesis, Johnie Birch
00058 //
00059 //===----------------------------------------------------------------------===//
00060 
00061 #include "llvm/Analysis/ScalarEvolution.h"
00062 #include "llvm/ADT/Optional.h"
00063 #include "llvm/ADT/STLExtras.h"
00064 #include "llvm/ADT/SmallPtrSet.h"
00065 #include "llvm/ADT/Statistic.h"
00066 #include "llvm/Analysis/AssumptionCache.h"
00067 #include "llvm/Analysis/ConstantFolding.h"
00068 #include "llvm/Analysis/InstructionSimplify.h"
00069 #include "llvm/Analysis/LoopInfo.h"
00070 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
00071 #include "llvm/Analysis/TargetLibraryInfo.h"
00072 #include "llvm/Analysis/ValueTracking.h"
00073 #include "llvm/IR/ConstantRange.h"
00074 #include "llvm/IR/Constants.h"
00075 #include "llvm/IR/DataLayout.h"
00076 #include "llvm/IR/DerivedTypes.h"
00077 #include "llvm/IR/Dominators.h"
00078 #include "llvm/IR/GetElementPtrTypeIterator.h"
00079 #include "llvm/IR/GlobalAlias.h"
00080 #include "llvm/IR/GlobalVariable.h"
00081 #include "llvm/IR/InstIterator.h"
00082 #include "llvm/IR/Instructions.h"
00083 #include "llvm/IR/LLVMContext.h"
00084 #include "llvm/IR/Metadata.h"
00085 #include "llvm/IR/Operator.h"
00086 #include "llvm/IR/PatternMatch.h"
00087 #include "llvm/Support/CommandLine.h"
00088 #include "llvm/Support/Debug.h"
00089 #include "llvm/Support/ErrorHandling.h"
00090 #include "llvm/Support/MathExtras.h"
00091 #include "llvm/Support/raw_ostream.h"
00092 #include "llvm/Support/SaveAndRestore.h"
00093 #include <algorithm>
00094 using namespace llvm;
00095 
00096 #define DEBUG_TYPE "scalar-evolution"
00097 
00098 STATISTIC(NumArrayLenItCounts,
00099           "Number of trip counts computed with array length");
00100 STATISTIC(NumTripCountsComputed,
00101           "Number of loops with predictable loop counts");
00102 STATISTIC(NumTripCountsNotComputed,
00103           "Number of loops without predictable loop counts");
00104 STATISTIC(NumBruteForceTripCountsComputed,
00105           "Number of loops with trip counts computed by force");
00106 
00107 static cl::opt<unsigned>
00108 MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
00109                         cl::desc("Maximum number of iterations SCEV will "
00110                                  "symbolically execute a constant "
00111                                  "derived loop"),
00112                         cl::init(100));
00113 
00114 // FIXME: Enable this with XDEBUG when the test suite is clean.
00115 static cl::opt<bool>
00116 VerifySCEV("verify-scev",
00117            cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
00118 
00119 //===----------------------------------------------------------------------===//
00120 //                           SCEV class definitions
00121 //===----------------------------------------------------------------------===//
00122 
00123 //===----------------------------------------------------------------------===//
00124 // Implementation of the SCEV class.
00125 //
00126 
00127 LLVM_DUMP_METHOD
00128 void SCEV::dump() const {
00129   print(dbgs());
00130   dbgs() << '\n';
00131 }
00132 
00133 void SCEV::print(raw_ostream &OS) const {
00134   switch (static_cast<SCEVTypes>(getSCEVType())) {
00135   case scConstant:
00136     cast<SCEVConstant>(this)->getValue()->printAsOperand(OS, false);
00137     return;
00138   case scTruncate: {
00139     const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
00140     const SCEV *Op = Trunc->getOperand();
00141     OS << "(trunc " << *Op->getType() << " " << *Op << " to "
00142        << *Trunc->getType() << ")";
00143     return;
00144   }
00145   case scZeroExtend: {
00146     const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this);
00147     const SCEV *Op = ZExt->getOperand();
00148     OS << "(zext " << *Op->getType() << " " << *Op << " to "
00149        << *ZExt->getType() << ")";
00150     return;
00151   }
00152   case scSignExtend: {
00153     const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this);
00154     const SCEV *Op = SExt->getOperand();
00155     OS << "(sext " << *Op->getType() << " " << *Op << " to "
00156        << *SExt->getType() << ")";
00157     return;
00158   }
00159   case scAddRecExpr: {
00160     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this);
00161     OS << "{" << *AR->getOperand(0);
00162     for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i)
00163       OS << ",+," << *AR->getOperand(i);
00164     OS << "}<";
00165     if (AR->getNoWrapFlags(FlagNUW))
00166       OS << "nuw><";
00167     if (AR->getNoWrapFlags(FlagNSW))
00168       OS << "nsw><";
00169     if (AR->getNoWrapFlags(FlagNW) &&
00170         !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)))
00171       OS << "nw><";
00172     AR->getLoop()->getHeader()->printAsOperand(OS, /*PrintType=*/false);
00173     OS << ">";
00174     return;
00175   }
00176   case scAddExpr:
00177   case scMulExpr:
00178   case scUMaxExpr:
00179   case scSMaxExpr: {
00180     const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
00181     const char *OpStr = nullptr;
00182     switch (NAry->getSCEVType()) {
00183     case scAddExpr: OpStr = " + "; break;
00184     case scMulExpr: OpStr = " * "; break;
00185     case scUMaxExpr: OpStr = " umax "; break;
00186     case scSMaxExpr: OpStr = " smax "; break;
00187     }
00188     OS << "(";
00189     for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
00190          I != E; ++I) {
00191       OS << **I;
00192       if (std::next(I) != E)
00193         OS << OpStr;
00194     }
00195     OS << ")";
00196     switch (NAry->getSCEVType()) {
00197     case scAddExpr:
00198     case scMulExpr:
00199       if (NAry->getNoWrapFlags(FlagNUW))
00200         OS << "<nuw>";
00201       if (NAry->getNoWrapFlags(FlagNSW))
00202         OS << "<nsw>";
00203     }
00204     return;
00205   }
00206   case scUDivExpr: {
00207     const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this);
00208     OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")";
00209     return;
00210   }
00211   case scUnknown: {
00212     const SCEVUnknown *U = cast<SCEVUnknown>(this);
00213     Type *AllocTy;
00214     if (U->isSizeOf(AllocTy)) {
00215       OS << "sizeof(" << *AllocTy << ")";
00216       return;
00217     }
00218     if (U->isAlignOf(AllocTy)) {
00219       OS << "alignof(" << *AllocTy << ")";
00220       return;
00221     }
00222 
00223     Type *CTy;
00224     Constant *FieldNo;
00225     if (U->isOffsetOf(CTy, FieldNo)) {
00226       OS << "offsetof(" << *CTy << ", ";
00227       FieldNo->printAsOperand(OS, false);
00228       OS << ")";
00229       return;
00230     }
00231 
00232     // Otherwise just print it normally.
00233     U->getValue()->printAsOperand(OS, false);
00234     return;
00235   }
00236   case scCouldNotCompute:
00237     OS << "***COULDNOTCOMPUTE***";
00238     return;
00239   }
00240   llvm_unreachable("Unknown SCEV kind!");
00241 }
00242 
00243 Type *SCEV::getType() const {
00244   switch (static_cast<SCEVTypes>(getSCEVType())) {
00245   case scConstant:
00246     return cast<SCEVConstant>(this)->getType();
00247   case scTruncate:
00248   case scZeroExtend:
00249   case scSignExtend:
00250     return cast<SCEVCastExpr>(this)->getType();
00251   case scAddRecExpr:
00252   case scMulExpr:
00253   case scUMaxExpr:
00254   case scSMaxExpr:
00255     return cast<SCEVNAryExpr>(this)->getType();
00256   case scAddExpr:
00257     return cast<SCEVAddExpr>(this)->getType();
00258   case scUDivExpr:
00259     return cast<SCEVUDivExpr>(this)->getType();
00260   case scUnknown:
00261     return cast<SCEVUnknown>(this)->getType();
00262   case scCouldNotCompute:
00263     llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
00264   }
00265   llvm_unreachable("Unknown SCEV kind!");
00266 }
00267 
00268 bool SCEV::isZero() const {
00269   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
00270     return SC->getValue()->isZero();
00271   return false;
00272 }
00273 
00274 bool SCEV::isOne() const {
00275   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
00276     return SC->getValue()->isOne();
00277   return false;
00278 }
00279 
00280 bool SCEV::isAllOnesValue() const {
00281   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
00282     return SC->getValue()->isAllOnesValue();
00283   return false;
00284 }
00285 
00286 /// isNonConstantNegative - Return true if the specified scev is negated, but
00287 /// not a constant.
00288 bool SCEV::isNonConstantNegative() const {
00289   const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this);
00290   if (!Mul) return false;
00291 
00292   // If there is a constant factor, it will be first.
00293   const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
00294   if (!SC) return false;
00295 
00296   // Return true if the value is negative, this matches things like (-42 * V).
00297   return SC->getAPInt().isNegative();
00298 }
00299 
00300 SCEVCouldNotCompute::SCEVCouldNotCompute() :
00301   SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
00302 
00303 bool SCEVCouldNotCompute::classof(const SCEV *S) {
00304   return S->getSCEVType() == scCouldNotCompute;
00305 }
00306 
00307 const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
00308   FoldingSetNodeID ID;
00309   ID.AddInteger(scConstant);
00310   ID.AddPointer(V);
00311   void *IP = nullptr;
00312   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
00313   SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
00314   UniqueSCEVs.InsertNode(S, IP);
00315   return S;
00316 }
00317 
00318 const SCEV *ScalarEvolution::getConstant(const APInt &Val) {
00319   return getConstant(ConstantInt::get(getContext(), Val));
00320 }
00321 
00322 const SCEV *
00323 ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
00324   IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
00325   return getConstant(ConstantInt::get(ITy, V, isSigned));
00326 }
00327 
00328 SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
00329                            unsigned SCEVTy, const SCEV *op, Type *ty)
00330   : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
00331 
00332 SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
00333                                    const SCEV *op, Type *ty)
00334   : SCEVCastExpr(ID, scTruncate, op, ty) {
00335   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
00336          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
00337          "Cannot truncate non-integer value!");
00338 }
00339 
00340 SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
00341                                        const SCEV *op, Type *ty)
00342   : SCEVCastExpr(ID, scZeroExtend, op, ty) {
00343   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
00344          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
00345          "Cannot zero extend non-integer value!");
00346 }
00347 
00348 SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
00349                                        const SCEV *op, Type *ty)
00350   : SCEVCastExpr(ID, scSignExtend, op, ty) {
00351   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
00352          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
00353          "Cannot sign extend non-integer value!");
00354 }
00355 
00356 void SCEVUnknown::deleted() {
00357   // Clear this SCEVUnknown from various maps.
00358   SE->forgetMemoizedResults(this);
00359 
00360   // Remove this SCEVUnknown from the uniquing map.
00361   SE->UniqueSCEVs.RemoveNode(this);
00362 
00363   // Release the value.
00364   setValPtr(nullptr);
00365 }
00366 
00367 void SCEVUnknown::allUsesReplacedWith(Value *New) {
00368   // Clear this SCEVUnknown from various maps.
00369   SE->forgetMemoizedResults(this);
00370 
00371   // Remove this SCEVUnknown from the uniquing map.
00372   SE->UniqueSCEVs.RemoveNode(this);
00373 
00374   // Update this SCEVUnknown to point to the new value. This is needed
00375   // because there may still be outstanding SCEVs which still point to
00376   // this SCEVUnknown.
00377   setValPtr(New);
00378 }
00379 
00380 bool SCEVUnknown::isSizeOf(Type *&AllocTy) const {
00381   if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
00382     if (VCE->getOpcode() == Instruction::PtrToInt)
00383       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
00384         if (CE->getOpcode() == Instruction::GetElementPtr &&
00385             CE->getOperand(0)->isNullValue() &&
00386             CE->getNumOperands() == 2)
00387           if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1)))
00388             if (CI->isOne()) {
00389               AllocTy = cast<PointerType>(CE->getOperand(0)->getType())
00390                                  ->getElementType();
00391               return true;
00392             }
00393 
00394   return false;
00395 }
00396 
00397 bool SCEVUnknown::isAlignOf(Type *&AllocTy) const {
00398   if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
00399     if (VCE->getOpcode() == Instruction::PtrToInt)
00400       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
00401         if (CE->getOpcode() == Instruction::GetElementPtr &&
00402             CE->getOperand(0)->isNullValue()) {
00403           Type *Ty =
00404             cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
00405           if (StructType *STy = dyn_cast<StructType>(Ty))
00406             if (!STy->isPacked() &&
00407                 CE->getNumOperands() == 3 &&
00408                 CE->getOperand(1)->isNullValue()) {
00409               if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
00410                 if (CI->isOne() &&
00411                     STy->getNumElements() == 2 &&
00412                     STy->getElementType(0)->isIntegerTy(1)) {
00413                   AllocTy = STy->getElementType(1);
00414                   return true;
00415                 }
00416             }
00417         }
00418 
00419   return false;
00420 }
00421 
00422 bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
00423   if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
00424     if (VCE->getOpcode() == Instruction::PtrToInt)
00425       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
00426         if (CE->getOpcode() == Instruction::GetElementPtr &&
00427             CE->getNumOperands() == 3 &&
00428             CE->getOperand(0)->isNullValue() &&
00429             CE->getOperand(1)->isNullValue()) {
00430           Type *Ty =
00431             cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
00432           // Ignore vector types here so that ScalarEvolutionExpander doesn't
00433           // emit getelementptrs that index into vectors.
00434           if (Ty->isStructTy() || Ty->isArrayTy()) {
00435             CTy = Ty;
00436             FieldNo = CE->getOperand(2);
00437             return true;
00438           }
00439         }
00440 
00441   return false;
00442 }
00443 
00444 //===----------------------------------------------------------------------===//
00445 //                               SCEV Utilities
00446 //===----------------------------------------------------------------------===//
00447 
00448 namespace {
00449 /// SCEVComplexityCompare - Return true if the complexity of the LHS is less
00450 /// than the complexity of the RHS.  This comparator is used to canonicalize
00451 /// expressions.
00452 class SCEVComplexityCompare {
00453   const LoopInfo *const LI;
00454 public:
00455   explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
00456 
00457   // Return true or false if LHS is less than, or at least RHS, respectively.
00458   bool operator()(const SCEV *LHS, const SCEV *RHS) const {
00459     return compare(LHS, RHS) < 0;
00460   }
00461 
00462   // Return negative, zero, or positive, if LHS is less than, equal to, or
00463   // greater than RHS, respectively. A three-way result allows recursive
00464   // comparisons to be more efficient.
00465   int compare(const SCEV *LHS, const SCEV *RHS) const {
00466     // Fast-path: SCEVs are uniqued so we can do a quick equality check.
00467     if (LHS == RHS)
00468       return 0;
00469 
00470     // Primarily, sort the SCEVs by their getSCEVType().
00471     unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
00472     if (LType != RType)
00473       return (int)LType - (int)RType;
00474 
00475     // Aside from the getSCEVType() ordering, the particular ordering
00476     // isn't very important except that it's beneficial to be consistent,
00477     // so that (a + b) and (b + a) don't end up as different expressions.
00478     switch (static_cast<SCEVTypes>(LType)) {
00479     case scUnknown: {
00480       const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
00481       const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
00482 
00483       // Sort SCEVUnknown values with some loose heuristics. TODO: This is
00484       // not as complete as it could be.
00485       const Value *LV = LU->getValue(), *RV = RU->getValue();
00486 
00487       // Order pointer values after integer values. This helps SCEVExpander
00488       // form GEPs.
00489       bool LIsPointer = LV->getType()->isPointerTy(),
00490         RIsPointer = RV->getType()->isPointerTy();
00491       if (LIsPointer != RIsPointer)
00492         return (int)LIsPointer - (int)RIsPointer;
00493 
00494       // Compare getValueID values.
00495       unsigned LID = LV->getValueID(),
00496         RID = RV->getValueID();
00497       if (LID != RID)
00498         return (int)LID - (int)RID;
00499 
00500       // Sort arguments by their position.
00501       if (const Argument *LA = dyn_cast<Argument>(LV)) {
00502         const Argument *RA = cast<Argument>(RV);
00503         unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
00504         return (int)LArgNo - (int)RArgNo;
00505       }
00506 
00507       // For instructions, compare their loop depth, and their operand
00508       // count.  This is pretty loose.
00509       if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
00510         const Instruction *RInst = cast<Instruction>(RV);
00511 
00512         // Compare loop depths.
00513         const BasicBlock *LParent = LInst->getParent(),
00514           *RParent = RInst->getParent();
00515         if (LParent != RParent) {
00516           unsigned LDepth = LI->getLoopDepth(LParent),
00517             RDepth = LI->getLoopDepth(RParent);
00518           if (LDepth != RDepth)
00519             return (int)LDepth - (int)RDepth;
00520         }
00521 
00522         // Compare the number of operands.
00523         unsigned LNumOps = LInst->getNumOperands(),
00524           RNumOps = RInst->getNumOperands();
00525         return (int)LNumOps - (int)RNumOps;
00526       }
00527 
00528       return 0;
00529     }
00530 
00531     case scConstant: {
00532       const SCEVConstant *LC = cast<SCEVConstant>(LHS);
00533       const SCEVConstant *RC = cast<SCEVConstant>(RHS);
00534 
00535       // Compare constant values.
00536       const APInt &LA = LC->getAPInt();
00537       const APInt &RA = RC->getAPInt();
00538       unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
00539       if (LBitWidth != RBitWidth)
00540         return (int)LBitWidth - (int)RBitWidth;
00541       return LA.ult(RA) ? -1 : 1;
00542     }
00543 
00544     case scAddRecExpr: {
00545       const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
00546       const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
00547 
00548       // Compare addrec loop depths.
00549       const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
00550       if (LLoop != RLoop) {
00551         unsigned LDepth = LLoop->getLoopDepth(),
00552           RDepth = RLoop->getLoopDepth();
00553         if (LDepth != RDepth)
00554           return (int)LDepth - (int)RDepth;
00555       }
00556 
00557       // Addrec complexity grows with operand count.
00558       unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
00559       if (LNumOps != RNumOps)
00560         return (int)LNumOps - (int)RNumOps;
00561 
00562       // Lexicographically compare.
00563       for (unsigned i = 0; i != LNumOps; ++i) {
00564         long X = compare(LA->getOperand(i), RA->getOperand(i));
00565         if (X != 0)
00566           return X;
00567       }
00568 
00569       return 0;
00570     }
00571 
00572     case scAddExpr:
00573     case scMulExpr:
00574     case scSMaxExpr:
00575     case scUMaxExpr: {
00576       const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
00577       const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
00578 
00579       // Lexicographically compare n-ary expressions.
00580       unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
00581       if (LNumOps != RNumOps)
00582         return (int)LNumOps - (int)RNumOps;
00583 
00584       for (unsigned i = 0; i != LNumOps; ++i) {
00585         if (i >= RNumOps)
00586           return 1;
00587         long X = compare(LC->getOperand(i), RC->getOperand(i));
00588         if (X != 0)
00589           return X;
00590       }
00591       return (int)LNumOps - (int)RNumOps;
00592     }
00593 
00594     case scUDivExpr: {
00595       const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
00596       const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
00597 
00598       // Lexicographically compare udiv expressions.
00599       long X = compare(LC->getLHS(), RC->getLHS());
00600       if (X != 0)
00601         return X;
00602       return compare(LC->getRHS(), RC->getRHS());
00603     }
00604 
00605     case scTruncate:
00606     case scZeroExtend:
00607     case scSignExtend: {
00608       const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
00609       const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
00610 
00611       // Compare cast expressions by operand.
00612       return compare(LC->getOperand(), RC->getOperand());
00613     }
00614 
00615     case scCouldNotCompute:
00616       llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
00617     }
00618     llvm_unreachable("Unknown SCEV kind!");
00619   }
00620 };
00621 }  // end anonymous namespace
00622 
00623 /// GroupByComplexity - Given a list of SCEV objects, order them by their
00624 /// complexity, and group objects of the same complexity together by value.
00625 /// When this routine is finished, we know that any duplicates in the vector are
00626 /// consecutive and that complexity is monotonically increasing.
00627 ///
00628 /// Note that we go take special precautions to ensure that we get deterministic
00629 /// results from this routine.  In other words, we don't want the results of
00630 /// this to depend on where the addresses of various SCEV objects happened to
00631 /// land in memory.
00632 ///
00633 static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
00634                               LoopInfo *LI) {
00635   if (Ops.size() < 2) return;  // Noop
00636   if (Ops.size() == 2) {
00637     // This is the common case, which also happens to be trivially simple.
00638     // Special case it.
00639     const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
00640     if (SCEVComplexityCompare(LI)(RHS, LHS))
00641       std::swap(LHS, RHS);
00642     return;
00643   }
00644 
00645   // Do the rough sort by complexity.
00646   std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI));
00647 
00648   // Now that we are sorted by complexity, group elements of the same
00649   // complexity.  Note that this is, at worst, N^2, but the vector is likely to
00650   // be extremely short in practice.  Note that we take this approach because we
00651   // do not want to depend on the addresses of the objects we are grouping.
00652   for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) {
00653     const SCEV *S = Ops[i];
00654     unsigned Complexity = S->getSCEVType();
00655 
00656     // If there are any objects of the same complexity and same value as this
00657     // one, group them.
00658     for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) {
00659       if (Ops[j] == S) { // Found a duplicate.
00660         // Move it to immediately after i'th element.
00661         std::swap(Ops[i+1], Ops[j]);
00662         ++i;   // no need to rescan it.
00663         if (i == e-2) return;  // Done!
00664       }
00665     }
00666   }
00667 }
00668 
00669 // Returns the size of the SCEV S.
00670 static inline int sizeOfSCEV(const SCEV *S) {
00671   struct FindSCEVSize {
00672     int Size;
00673     FindSCEVSize() : Size(0) {}
00674 
00675     bool follow(const SCEV *S) {
00676       ++Size;
00677       // Keep looking at all operands of S.
00678       return true;
00679     }
00680     bool isDone() const {
00681       return false;
00682     }
00683   };
00684 
00685   FindSCEVSize F;
00686   SCEVTraversal<FindSCEVSize> ST(F);
00687   ST.visitAll(S);
00688   return F.Size;
00689 }
00690 
00691 namespace {
00692 
00693 struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> {
00694 public:
00695   // Computes the Quotient and Remainder of the division of Numerator by
00696   // Denominator.
00697   static void divide(ScalarEvolution &SE, const SCEV *Numerator,
00698                      const SCEV *Denominator, const SCEV **Quotient,
00699                      const SCEV **Remainder) {
00700     assert(Numerator && Denominator && "Uninitialized SCEV");
00701 
00702     SCEVDivision D(SE, Numerator, Denominator);
00703 
00704     // Check for the trivial case here to avoid having to check for it in the
00705     // rest of the code.
00706     if (Numerator == Denominator) {
00707       *Quotient = D.One;
00708       *Remainder = D.Zero;
00709       return;
00710     }
00711 
00712     if (Numerator->isZero()) {
00713       *Quotient = D.Zero;
00714       *Remainder = D.Zero;
00715       return;
00716     }
00717 
00718     // A simple case when N/1. The quotient is N.
00719     if (Denominator->isOne()) {
00720       *Quotient = Numerator;
00721       *Remainder = D.Zero;
00722       return;
00723     }
00724 
00725     // Split the Denominator when it is a product.
00726     if (const SCEVMulExpr *T = dyn_cast<const SCEVMulExpr>(Denominator)) {
00727       const SCEV *Q, *R;
00728       *Quotient = Numerator;
00729       for (const SCEV *Op : T->operands()) {
00730         divide(SE, *Quotient, Op, &Q, &R);
00731         *Quotient = Q;
00732 
00733         // Bail out when the Numerator is not divisible by one of the terms of
00734         // the Denominator.
00735         if (!R->isZero()) {
00736           *Quotient = D.Zero;
00737           *Remainder = Numerator;
00738           return;
00739         }
00740       }
00741       *Remainder = D.Zero;
00742       return;
00743     }
00744 
00745     D.visit(Numerator);
00746     *Quotient = D.Quotient;
00747     *Remainder = D.Remainder;
00748   }
00749 
00750   // Except in the trivial case described above, we do not know how to divide
00751   // Expr by Denominator for the following functions with empty implementation.
00752   void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {}
00753   void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {}
00754   void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {}
00755   void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
00756   void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
00757   void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
00758   void visitUnknown(const SCEVUnknown *Numerator) {}
00759   void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
00760 
00761   void visitConstant(const SCEVConstant *Numerator) {
00762     if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
00763       APInt NumeratorVal = Numerator->getAPInt();
00764       APInt DenominatorVal = D->getAPInt();
00765       uint32_t NumeratorBW = NumeratorVal.getBitWidth();
00766       uint32_t DenominatorBW = DenominatorVal.getBitWidth();
00767 
00768       if (NumeratorBW > DenominatorBW)
00769         DenominatorVal = DenominatorVal.sext(NumeratorBW);
00770       else if (NumeratorBW < DenominatorBW)
00771         NumeratorVal = NumeratorVal.sext(DenominatorBW);
00772 
00773       APInt QuotientVal(NumeratorVal.getBitWidth(), 0);
00774       APInt RemainderVal(NumeratorVal.getBitWidth(), 0);
00775       APInt::sdivrem(NumeratorVal, DenominatorVal, QuotientVal, RemainderVal);
00776       Quotient = SE.getConstant(QuotientVal);
00777       Remainder = SE.getConstant(RemainderVal);
00778       return;
00779     }
00780   }
00781 
00782   void visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
00783     const SCEV *StartQ, *StartR, *StepQ, *StepR;
00784     if (!Numerator->isAffine())
00785       return cannotDivide(Numerator);
00786     divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);
00787     divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);
00788     // Bail out if the types do not match.
00789     Type *Ty = Denominator->getType();
00790     if (Ty != StartQ->getType() || Ty != StartR->getType() ||
00791         Ty != StepQ->getType() || Ty != StepR->getType())
00792       return cannotDivide(Numerator);
00793     Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
00794                                 Numerator->getNoWrapFlags());
00795     Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
00796                                  Numerator->getNoWrapFlags());
00797   }
00798 
00799   void visitAddExpr(const SCEVAddExpr *Numerator) {
00800     SmallVector<const SCEV *, 2> Qs, Rs;
00801     Type *Ty = Denominator->getType();
00802 
00803     for (const SCEV *Op : Numerator->operands()) {
00804       const SCEV *Q, *R;
00805       divide(SE, Op, Denominator, &Q, &R);
00806 
00807       // Bail out if types do not match.
00808       if (Ty != Q->getType() || Ty != R->getType())
00809         return cannotDivide(Numerator);
00810 
00811       Qs.push_back(Q);
00812       Rs.push_back(R);
00813     }
00814 
00815     if (Qs.size() == 1) {
00816       Quotient = Qs[0];
00817       Remainder = Rs[0];
00818       return;
00819     }
00820 
00821     Quotient = SE.getAddExpr(Qs);
00822     Remainder = SE.getAddExpr(Rs);
00823   }
00824 
00825   void visitMulExpr(const SCEVMulExpr *Numerator) {
00826     SmallVector<const SCEV *, 2> Qs;
00827     Type *Ty = Denominator->getType();
00828 
00829     bool FoundDenominatorTerm = false;
00830     for (const SCEV *Op : Numerator->operands()) {
00831       // Bail out if types do not match.
00832       if (Ty != Op->getType())
00833         return cannotDivide(Numerator);
00834 
00835       if (FoundDenominatorTerm) {
00836         Qs.push_back(Op);
00837         continue;
00838       }
00839 
00840       // Check whether Denominator divides one of the product operands.
00841       const SCEV *Q, *R;
00842       divide(SE, Op, Denominator, &Q, &R);
00843       if (!R->isZero()) {
00844         Qs.push_back(Op);
00845         continue;
00846       }
00847 
00848       // Bail out if types do not match.
00849       if (Ty != Q->getType())
00850         return cannotDivide(Numerator);
00851 
00852       FoundDenominatorTerm = true;
00853       Qs.push_back(Q);
00854     }
00855 
00856     if (FoundDenominatorTerm) {
00857       Remainder = Zero;
00858       if (Qs.size() == 1)
00859         Quotient = Qs[0];
00860       else
00861         Quotient = SE.getMulExpr(Qs);
00862       return;
00863     }
00864 
00865     if (!isa<SCEVUnknown>(Denominator))
00866       return cannotDivide(Numerator);
00867 
00868     // The Remainder is obtained by replacing Denominator by 0 in Numerator.
00869     ValueToValueMap RewriteMap;
00870     RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
00871         cast<SCEVConstant>(Zero)->getValue();
00872     Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
00873 
00874     if (Remainder->isZero()) {
00875       // The Quotient is obtained by replacing Denominator by 1 in Numerator.
00876       RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
00877           cast<SCEVConstant>(One)->getValue();
00878       Quotient =
00879           SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
00880       return;
00881     }
00882 
00883     // Quotient is (Numerator - Remainder) divided by Denominator.
00884     const SCEV *Q, *R;
00885     const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
00886     // This SCEV does not seem to simplify: fail the division here.
00887     if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator))
00888       return cannotDivide(Numerator);
00889     divide(SE, Diff, Denominator, &Q, &R);
00890     if (R != Zero)
00891       return cannotDivide(Numerator);
00892     Quotient = Q;
00893   }
00894 
00895 private:
00896   SCEVDivision(ScalarEvolution &S, const SCEV *Numerator,
00897                const SCEV *Denominator)
00898       : SE(S), Denominator(Denominator) {
00899     Zero = SE.getZero(Denominator->getType());
00900     One = SE.getOne(Denominator->getType());
00901 
00902     // We generally do not know how to divide Expr by Denominator. We
00903     // initialize the division to a "cannot divide" state to simplify the rest
00904     // of the code.
00905     cannotDivide(Numerator);
00906   }
00907 
00908   // Convenience function for giving up on the division. We set the quotient to
00909   // be equal to zero and the remainder to be equal to the numerator.
00910   void cannotDivide(const SCEV *Numerator) {
00911     Quotient = Zero;
00912     Remainder = Numerator;
00913   }
00914 
00915   ScalarEvolution &SE;
00916   const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One;
00917 };
00918 
00919 }
00920 
00921 //===----------------------------------------------------------------------===//
00922 //                      Simple SCEV method implementations
00923 //===----------------------------------------------------------------------===//
00924 
00925 /// BinomialCoefficient - Compute BC(It, K).  The result has width W.
00926 /// Assume, K > 0.
00927 static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
00928                                        ScalarEvolution &SE,
00929                                        Type *ResultTy) {
00930   // Handle the simplest case efficiently.
00931   if (K == 1)
00932     return SE.getTruncateOrZeroExtend(It, ResultTy);
00933 
00934   // We are using the following formula for BC(It, K):
00935   //
00936   //   BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K!
00937   //
00938   // Suppose, W is the bitwidth of the return value.  We must be prepared for
00939   // overflow.  Hence, we must assure that the result of our computation is
00940   // equal to the accurate one modulo 2^W.  Unfortunately, division isn't
00941   // safe in modular arithmetic.
00942   //
00943   // However, this code doesn't use exactly that formula; the formula it uses
00944   // is something like the following, where T is the number of factors of 2 in
00945   // K! (i.e. trailing zeros in the binary representation of K!), and ^ is
00946   // exponentiation:
00947   //
00948   //   BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T)
00949   //
00950   // This formula is trivially equivalent to the previous formula.  However,
00951   // this formula can be implemented much more efficiently.  The trick is that
00952   // K! / 2^T is odd, and exact division by an odd number *is* safe in modular
00953   // arithmetic.  To do exact division in modular arithmetic, all we have
00954   // to do is multiply by the inverse.  Therefore, this step can be done at
00955   // width W.
00956   //
00957   // The next issue is how to safely do the division by 2^T.  The way this
00958   // is done is by doing the multiplication step at a width of at least W + T
00959   // bits.  This way, the bottom W+T bits of the product are accurate. Then,
00960   // when we perform the division by 2^T (which is equivalent to a right shift
00961   // by T), the bottom W bits are accurate.  Extra bits are okay; they'll get
00962   // truncated out after the division by 2^T.
00963   //
00964   // In comparison to just directly using the first formula, this technique
00965   // is much more efficient; using the first formula requires W * K bits,
00966   // but this formula less than W + K bits. Also, the first formula requires
00967   // a division step, whereas this formula only requires multiplies and shifts.
00968   //
00969   // It doesn't matter whether the subtraction step is done in the calculation
00970   // width or the input iteration count's width; if the subtraction overflows,
00971   // the result must be zero anyway.  We prefer here to do it in the width of
00972   // the induction variable because it helps a lot for certain cases; CodeGen
00973   // isn't smart enough to ignore the overflow, which leads to much less
00974   // efficient code if the width of the subtraction is wider than the native
00975   // register width.
00976   //
00977   // (It's possible to not widen at all by pulling out factors of 2 before
00978   // the multiplication; for example, K=2 can be calculated as
00979   // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires
00980   // extra arithmetic, so it's not an obvious win, and it gets
00981   // much more complicated for K > 3.)
00982 
00983   // Protection from insane SCEVs; this bound is conservative,
00984   // but it probably doesn't matter.
00985   if (K > 1000)
00986     return SE.getCouldNotCompute();
00987 
00988   unsigned W = SE.getTypeSizeInBits(ResultTy);
00989 
00990   // Calculate K! / 2^T and T; we divide out the factors of two before
00991   // multiplying for calculating K! / 2^T to avoid overflow.
00992   // Other overflow doesn't matter because we only care about the bottom
00993   // W bits of the result.
00994   APInt OddFactorial(W, 1);
00995   unsigned T = 1;
00996   for (unsigned i = 3; i <= K; ++i) {
00997     APInt Mult(W, i);
00998     unsigned TwoFactors = Mult.countTrailingZeros();
00999     T += TwoFactors;
01000     Mult = Mult.lshr(TwoFactors);
01001     OddFactorial *= Mult;
01002   }
01003 
01004   // We need at least W + T bits for the multiplication step
01005   unsigned CalculationBits = W + T;
01006 
01007   // Calculate 2^T, at width T+W.
01008   APInt DivFactor = APInt::getOneBitSet(CalculationBits, T);
01009 
01010   // Calculate the multiplicative inverse of K! / 2^T;
01011   // this multiplication factor will perform the exact division by
01012   // K! / 2^T.
01013   APInt Mod = APInt::getSignedMinValue(W+1);
01014   APInt MultiplyFactor = OddFactorial.zext(W+1);
01015   MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod);
01016   MultiplyFactor = MultiplyFactor.trunc(W);
01017 
01018   // Calculate the product, at width T+W
01019   IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
01020                                                       CalculationBits);
01021   const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
01022   for (unsigned i = 1; i != K; ++i) {
01023     const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i));
01024     Dividend = SE.getMulExpr(Dividend,
01025                              SE.getTruncateOrZeroExtend(S, CalculationTy));
01026   }
01027 
01028   // Divide by 2^T
01029   const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
01030 
01031   // Truncate the result, and divide by K! / 2^T.
01032 
01033   return SE.getMulExpr(SE.getConstant(MultiplyFactor),
01034                        SE.getTruncateOrZeroExtend(DivResult, ResultTy));
01035 }
01036 
01037 /// evaluateAtIteration - Return the value of this chain of recurrences at
01038 /// the specified iteration number.  We can evaluate this recurrence by
01039 /// multiplying each element in the chain by the binomial coefficient
01040 /// corresponding to it.  In other words, we can evaluate {A,+,B,+,C,+,D} as:
01041 ///
01042 ///   A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3)
01043 ///
01044 /// where BC(It, k) stands for binomial coefficient.
01045 ///
01046 const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
01047                                                 ScalarEvolution &SE) const {
01048   const SCEV *Result = getStart();
01049   for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
01050     // The computation is correct in the face of overflow provided that the
01051     // multiplication is performed _after_ the evaluation of the binomial
01052     // coefficient.
01053     const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType());
01054     if (isa<SCEVCouldNotCompute>(Coeff))
01055       return Coeff;
01056 
01057     Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff));
01058   }
01059   return Result;
01060 }
01061 
01062 //===----------------------------------------------------------------------===//
01063 //                    SCEV Expression folder implementations
01064 //===----------------------------------------------------------------------===//
01065 
01066 const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
01067                                              Type *Ty) {
01068   assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
01069          "This is not a truncating conversion!");
01070   assert(isSCEVable(Ty) &&
01071          "This is not a conversion to a SCEVable type!");
01072   Ty = getEffectiveSCEVType(Ty);
01073 
01074   FoldingSetNodeID ID;
01075   ID.AddInteger(scTruncate);
01076   ID.AddPointer(Op);
01077   ID.AddPointer(Ty);
01078   void *IP = nullptr;
01079   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
01080 
01081   // Fold if the operand is constant.
01082   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
01083     return getConstant(
01084       cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty)));
01085 
01086   // trunc(trunc(x)) --> trunc(x)
01087   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
01088     return getTruncateExpr(ST->getOperand(), Ty);
01089 
01090   // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
01091   if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
01092     return getTruncateOrSignExtend(SS->getOperand(), Ty);
01093 
01094   // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
01095   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
01096     return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
01097 
01098   // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can
01099   // eliminate all the truncates, or we replace other casts with truncates.
01100   if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) {
01101     SmallVector<const SCEV *, 4> Operands;
01102     bool hasTrunc = false;
01103     for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) {
01104       const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty);
01105       if (!isa<SCEVCastExpr>(SA->getOperand(i)))
01106         hasTrunc = isa<SCEVTruncateExpr>(S);
01107       Operands.push_back(S);
01108     }
01109     if (!hasTrunc)
01110       return getAddExpr(Operands);
01111     UniqueSCEVs.FindNodeOrInsertPos(ID, IP);  // Mutates IP, returns NULL.
01112   }
01113 
01114   // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
01115   // eliminate all the truncates, or we replace other casts with truncates.
01116   if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) {
01117     SmallVector<const SCEV *, 4> Operands;
01118     bool hasTrunc = false;
01119     for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) {
01120       const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty);
01121       if (!isa<SCEVCastExpr>(SM->getOperand(i)))
01122         hasTrunc = isa<SCEVTruncateExpr>(S);
01123       Operands.push_back(S);
01124     }
01125     if (!hasTrunc)
01126       return getMulExpr(Operands);
01127     UniqueSCEVs.FindNodeOrInsertPos(ID, IP);  // Mutates IP, returns NULL.
01128   }
01129 
01130   // If the input value is a chrec scev, truncate the chrec's operands.
01131   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
01132     SmallVector<const SCEV *, 4> Operands;
01133     for (const SCEV *Op : AddRec->operands())
01134       Operands.push_back(getTruncateExpr(Op, Ty));
01135     return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
01136   }
01137 
01138   // The cast wasn't folded; create an explicit cast node. We can reuse
01139   // the existing insert position since if we get here, we won't have
01140   // made any changes which would invalidate it.
01141   SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
01142                                                  Op, Ty);
01143   UniqueSCEVs.InsertNode(S, IP);
01144   return S;
01145 }
01146 
01147 // Get the limit of a recurrence such that incrementing by Step cannot cause
01148 // signed overflow as long as the value of the recurrence within the
01149 // loop does not exceed this limit before incrementing.
01150 static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step,
01151                                                  ICmpInst::Predicate *Pred,
01152                                                  ScalarEvolution *SE) {
01153   unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
01154   if (SE->isKnownPositive(Step)) {
01155     *Pred = ICmpInst::ICMP_SLT;
01156     return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
01157                            SE->getSignedRange(Step).getSignedMax());
01158   }
01159   if (SE->isKnownNegative(Step)) {
01160     *Pred = ICmpInst::ICMP_SGT;
01161     return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
01162                            SE->getSignedRange(Step).getSignedMin());
01163   }
01164   return nullptr;
01165 }
01166 
01167 // Get the limit of a recurrence such that incrementing by Step cannot cause
01168 // unsigned overflow as long as the value of the recurrence within the loop does
01169 // not exceed this limit before incrementing.
01170 static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step,
01171                                                    ICmpInst::Predicate *Pred,
01172                                                    ScalarEvolution *SE) {
01173   unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
01174   *Pred = ICmpInst::ICMP_ULT;
01175 
01176   return SE->getConstant(APInt::getMinValue(BitWidth) -
01177                          SE->getUnsignedRange(Step).getUnsignedMax());
01178 }
01179 
01180 namespace {
01181 
01182 struct ExtendOpTraitsBase {
01183   typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *);
01184 };
01185 
01186 // Used to make code generic over signed and unsigned overflow.
01187 template <typename ExtendOp> struct ExtendOpTraits {
01188   // Members present:
01189   //
01190   // static const SCEV::NoWrapFlags WrapType;
01191   //
01192   // static const ExtendOpTraitsBase::GetExtendExprTy GetExtendExpr;
01193   //
01194   // static const SCEV *getOverflowLimitForStep(const SCEV *Step,
01195   //                                           ICmpInst::Predicate *Pred,
01196   //                                           ScalarEvolution *SE);
01197 };
01198 
01199 template <>
01200 struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase {
01201   static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW;
01202 
01203   static const GetExtendExprTy GetExtendExpr;
01204 
01205   static const SCEV *getOverflowLimitForStep(const SCEV *Step,
01206                                              ICmpInst::Predicate *Pred,
01207                                              ScalarEvolution *SE) {
01208     return getSignedOverflowLimitForStep(Step, Pred, SE);
01209   }
01210 };
01211 
01212 const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
01213     SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr;
01214 
01215 template <>
01216 struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
01217   static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW;
01218 
01219   static const GetExtendExprTy GetExtendExpr;
01220 
01221   static const SCEV *getOverflowLimitForStep(const SCEV *Step,
01222                                              ICmpInst::Predicate *Pred,
01223                                              ScalarEvolution *SE) {
01224     return getUnsignedOverflowLimitForStep(Step, Pred, SE);
01225   }
01226 };
01227 
01228 const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
01229     SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr;
01230 }
01231 
01232 // The recurrence AR has been shown to have no signed/unsigned wrap or something
01233 // close to it. Typically, if we can prove NSW/NUW for AR, then we can just as
01234 // easily prove NSW/NUW for its preincrement or postincrement sibling. This
01235 // allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step +
01236 // Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the
01237 // expression "Step + sext/zext(PreIncAR)" is congruent with
01238 // "sext/zext(PostIncAR)"
01239 template <typename ExtendOpTy>
01240 static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
01241                                         ScalarEvolution *SE) {
01242   auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
01243   auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
01244 
01245   const Loop *L = AR->getLoop();
01246   const SCEV *Start = AR->getStart();
01247   const SCEV *Step = AR->getStepRecurrence(*SE);
01248 
01249   // Check for a simple looking step prior to loop entry.
01250   const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
01251   if (!SA)
01252     return nullptr;
01253 
01254   // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
01255   // subtraction is expensive. For this purpose, perform a quick and dirty
01256   // difference, by checking for Step in the operand list.
01257   SmallVector<const SCEV *, 4> DiffOps;
01258   for (const SCEV *Op : SA->operands())
01259     if (Op != Step)
01260       DiffOps.push_back(Op);
01261 
01262   if (DiffOps.size() == SA->getNumOperands())
01263     return nullptr;
01264 
01265   // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` +
01266   // `Step`:
01267 
01268   // 1. NSW/NUW flags on the step increment.
01269   auto PreStartFlags =
01270     ScalarEvolution::maskFlags(SA->getNoWrapFlags(), SCEV::FlagNUW);
01271   const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags);
01272   const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
01273       SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
01274 
01275   // "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies
01276   // "S+X does not sign/unsign-overflow".
01277   //
01278 
01279   const SCEV *BECount = SE->getBackedgeTakenCount(L);
01280   if (PreAR && PreAR->getNoWrapFlags(WrapType) &&
01281       !isa<SCEVCouldNotCompute>(BECount) && SE->isKnownPositive(BECount))
01282     return PreStart;
01283 
01284   // 2. Direct overflow check on the step operation's expression.
01285   unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
01286   Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
01287   const SCEV *OperandExtendedStart =
01288       SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy),
01289                      (SE->*GetExtendExpr)(Step, WideTy));
01290   if ((SE->*GetExtendExpr)(Start, WideTy) == OperandExtendedStart) {
01291     if (PreAR && AR->getNoWrapFlags(WrapType)) {
01292       // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW
01293       // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then
01294       // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`.  Cache this fact.
01295       const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(WrapType);
01296     }
01297     return PreStart;
01298   }
01299 
01300   // 3. Loop precondition.
01301   ICmpInst::Predicate Pred;
01302   const SCEV *OverflowLimit =
01303       ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE);
01304 
01305   if (OverflowLimit &&
01306       SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit))
01307     return PreStart;
01308 
01309   return nullptr;
01310 }
01311 
01312 // Get the normalized zero or sign extended expression for this AddRec's Start.
01313 template <typename ExtendOpTy>
01314 static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty,
01315                                         ScalarEvolution *SE) {
01316   auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
01317 
01318   const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE);
01319   if (!PreStart)
01320     return (SE->*GetExtendExpr)(AR->getStart(), Ty);
01321 
01322   return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty),
01323                         (SE->*GetExtendExpr)(PreStart, Ty));
01324 }
01325 
01326 // Try to prove away overflow by looking at "nearby" add recurrences.  A
01327 // motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it
01328 // does not itself wrap then we can conclude that `{1,+,4}` is `nuw`.
01329 //
01330 // Formally:
01331 //
01332 //     {S,+,X} == {S-T,+,X} + T
01333 //  => Ext({S,+,X}) == Ext({S-T,+,X} + T)
01334 //
01335 // If ({S-T,+,X} + T) does not overflow  ... (1)
01336 //
01337 //  RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T)
01338 //
01339 // If {S-T,+,X} does not overflow  ... (2)
01340 //
01341 //  RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T)
01342 //      == {Ext(S-T)+Ext(T),+,Ext(X)}
01343 //
01344 // If (S-T)+T does not overflow  ... (3)
01345 //
01346 //  RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)}
01347 //      == {Ext(S),+,Ext(X)} == LHS
01348 //
01349 // Thus, if (1), (2) and (3) are true for some T, then
01350 //   Ext({S,+,X}) == {Ext(S),+,Ext(X)}
01351 //
01352 // (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T)
01353 // does not overflow" restricted to the 0th iteration.  Therefore we only need
01354 // to check for (1) and (2).
01355 //
01356 // In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T
01357 // is `Delta` (defined below).
01358 //
01359 template <typename ExtendOpTy>
01360 bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
01361                                                 const SCEV *Step,
01362                                                 const Loop *L) {
01363   auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
01364 
01365   // We restrict `Start` to a constant to prevent SCEV from spending too much
01366   // time here.  It is correct (but more expensive) to continue with a
01367   // non-constant `Start` and do a general SCEV subtraction to compute
01368   // `PreStart` below.
01369   //
01370   const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start);
01371   if (!StartC)
01372     return false;
01373 
01374   APInt StartAI = StartC->getAPInt();
01375 
01376   for (unsigned Delta : {-2, -1, 1, 2}) {
01377     const SCEV *PreStart = getConstant(StartAI - Delta);
01378 
01379     FoldingSetNodeID ID;
01380     ID.AddInteger(scAddRecExpr);
01381     ID.AddPointer(PreStart);
01382     ID.AddPointer(Step);
01383     ID.AddPointer(L);
01384     void *IP = nullptr;
01385     const auto *PreAR =
01386       static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
01387 
01388     // Give up if we don't already have the add recurrence we need because
01389     // actually constructing an add recurrence is relatively expensive.
01390     if (PreAR && PreAR->getNoWrapFlags(WrapType)) {  // proves (2)
01391       const SCEV *DeltaS = getConstant(StartC->getType(), Delta);
01392       ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
01393       const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(
01394           DeltaS, &Pred, this);
01395       if (Limit && isKnownPredicate(Pred, PreAR, Limit))  // proves (1)
01396         return true;
01397     }
01398   }
01399 
01400   return false;
01401 }
01402 
01403 const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
01404                                                Type *Ty) {
01405   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
01406          "This is not an extending conversion!");
01407   assert(isSCEVable(Ty) &&
01408          "This is not a conversion to a SCEVable type!");
01409   Ty = getEffectiveSCEVType(Ty);
01410 
01411   // Fold if the operand is constant.
01412   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
01413     return getConstant(
01414       cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
01415 
01416   // zext(zext(x)) --> zext(x)
01417   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
01418     return getZeroExtendExpr(SZ->getOperand(), Ty);
01419 
01420   // Before doing any expensive analysis, check to see if we've already
01421   // computed a SCEV for this Op and Ty.
01422   FoldingSetNodeID ID;
01423   ID.AddInteger(scZeroExtend);
01424   ID.AddPointer(Op);
01425   ID.AddPointer(Ty);
01426   void *IP = nullptr;
01427   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
01428 
01429   // zext(trunc(x)) --> zext(x) or x or trunc(x)
01430   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
01431     // It's possible the bits taken off by the truncate were all zero bits. If
01432     // so, we should be able to simplify this further.
01433     const SCEV *X = ST->getOperand();
01434     ConstantRange CR = getUnsignedRange(X);
01435     unsigned TruncBits = getTypeSizeInBits(ST->getType());
01436     unsigned NewBits = getTypeSizeInBits(Ty);
01437     if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
01438             CR.zextOrTrunc(NewBits)))
01439       return getTruncateOrZeroExtend(X, Ty);
01440   }
01441 
01442   // If the input value is a chrec scev, and we can prove that the value
01443   // did not overflow the old, smaller, value, we can zero extend all of the
01444   // operands (often constants).  This allows analysis of something like
01445   // this:  for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
01446   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
01447     if (AR->isAffine()) {
01448       const SCEV *Start = AR->getStart();
01449       const SCEV *Step = AR->getStepRecurrence(*this);
01450       unsigned BitWidth = getTypeSizeInBits(AR->getType());
01451       const Loop *L = AR->getLoop();
01452 
01453       // If we have special knowledge that this addrec won't overflow,
01454       // we don't need to do any further analysis.
01455       if (AR->getNoWrapFlags(SCEV::FlagNUW))
01456         return getAddRecExpr(
01457             getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
01458             getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
01459 
01460       // Check whether the backedge-taken count is SCEVCouldNotCompute.
01461       // Note that this serves two purposes: It filters out loops that are
01462       // simply not analyzable, and it covers the case where this code is
01463       // being called from within backedge-taken count analysis, such that
01464       // attempting to ask for the backedge-taken count would likely result
01465       // in infinite recursion. In the later case, the analysis code will
01466       // cope with a conservative value, and it will take care to purge
01467       // that value once it has finished.
01468       const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
01469       if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
01470         // Manually compute the final value for AR, checking for
01471         // overflow.
01472 
01473         // Check whether the backedge-taken count can be losslessly casted to
01474         // the addrec's type. The count is always unsigned.
01475         const SCEV *CastedMaxBECount =
01476           getTruncateOrZeroExtend(MaxBECount, Start->getType());
01477         const SCEV *RecastedMaxBECount =
01478           getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
01479         if (MaxBECount == RecastedMaxBECount) {
01480           Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
01481           // Check whether Start+Step*MaxBECount has no unsigned overflow.
01482           const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
01483           const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul), WideTy);
01484           const SCEV *WideStart = getZeroExtendExpr(Start, WideTy);
01485           const SCEV *WideMaxBECount =
01486             getZeroExtendExpr(CastedMaxBECount, WideTy);
01487           const SCEV *OperandExtendedAdd =
01488             getAddExpr(WideStart,
01489                        getMulExpr(WideMaxBECount,
01490                                   getZeroExtendExpr(Step, WideTy)));
01491           if (ZAdd == OperandExtendedAdd) {
01492             // Cache knowledge of AR NUW, which is propagated to this AddRec.
01493             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
01494             // Return the expression with the addrec on the outside.
01495             return getAddRecExpr(
01496                 getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
01497                 getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
01498           }
01499           // Similar to above, only this time treat the step value as signed.
01500           // This covers loops that count down.
01501           OperandExtendedAdd =
01502             getAddExpr(WideStart,
01503                        getMulExpr(WideMaxBECount,
01504                                   getSignExtendExpr(Step, WideTy)));
01505           if (ZAdd == OperandExtendedAdd) {
01506             // Cache knowledge of AR NW, which is propagated to this AddRec.
01507             // Negative step causes unsigned wrap, but it still can't self-wrap.
01508             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
01509             // Return the expression with the addrec on the outside.
01510             return getAddRecExpr(
01511                 getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
01512                 getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
01513           }
01514         }
01515 
01516         // If the backedge is guarded by a comparison with the pre-inc value
01517         // the addrec is safe. Also, if the entry is guarded by a comparison
01518         // with the start value and the backedge is guarded by a comparison
01519         // with the post-inc value, the addrec is safe.
01520         if (isKnownPositive(Step)) {
01521           const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
01522                                       getUnsignedRange(Step).getUnsignedMax());
01523           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
01524               (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
01525                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
01526                                            AR->getPostIncExpr(*this), N))) {
01527             // Cache knowledge of AR NUW, which is propagated to this AddRec.
01528             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
01529             // Return the expression with the addrec on the outside.
01530             return getAddRecExpr(
01531                 getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
01532                 getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
01533           }
01534         } else if (isKnownNegative(Step)) {
01535           const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
01536                                       getSignedRange(Step).getSignedMin());
01537           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
01538               (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) &&
01539                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
01540                                            AR->getPostIncExpr(*this), N))) {
01541             // Cache knowledge of AR NW, which is propagated to this AddRec.
01542             // Negative step causes unsigned wrap, but it still can't self-wrap.
01543             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
01544             // Return the expression with the addrec on the outside.
01545             return getAddRecExpr(
01546                 getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
01547                 getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
01548           }
01549         }
01550       }
01551 
01552       if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
01553         const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
01554         return getAddRecExpr(
01555             getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
01556             getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
01557       }
01558     }
01559 
01560   if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
01561     // zext((A + B + ...)<nuw>) --> (zext(A) + zext(B) + ...)<nuw>
01562     if (SA->getNoWrapFlags(SCEV::FlagNUW)) {
01563       // If the addition does not unsign overflow then we can, by definition,
01564       // commute the zero extension with the addition operation.
01565       SmallVector<const SCEV *, 4> Ops;
01566       for (const auto *Op : SA->operands())
01567         Ops.push_back(getZeroExtendExpr(Op, Ty));
01568       return getAddExpr(Ops, SCEV::FlagNUW);
01569     }
01570   }
01571 
01572   // The cast wasn't folded; create an explicit cast node.
01573   // Recompute the insert position, as it may have been invalidated.
01574   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
01575   SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
01576                                                    Op, Ty);
01577   UniqueSCEVs.InsertNode(S, IP);
01578   return S;
01579 }
01580 
01581 const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
01582                                                Type *Ty) {
01583   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
01584          "This is not an extending conversion!");
01585   assert(isSCEVable(Ty) &&
01586          "This is not a conversion to a SCEVable type!");
01587   Ty = getEffectiveSCEVType(Ty);
01588 
01589   // Fold if the operand is constant.
01590   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
01591     return getConstant(
01592       cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
01593 
01594   // sext(sext(x)) --> sext(x)
01595   if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
01596     return getSignExtendExpr(SS->getOperand(), Ty);
01597 
01598   // sext(zext(x)) --> zext(x)
01599   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
01600     return getZeroExtendExpr(SZ->getOperand(), Ty);
01601 
01602   // Before doing any expensive analysis, check to see if we've already
01603   // computed a SCEV for this Op and Ty.
01604   FoldingSetNodeID ID;
01605   ID.AddInteger(scSignExtend);
01606   ID.AddPointer(Op);
01607   ID.AddPointer(Ty);
01608   void *IP = nullptr;
01609   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
01610 
01611   // If the input value is provably positive, build a zext instead.
01612   if (isKnownNonNegative(Op))
01613     return getZeroExtendExpr(Op, Ty);
01614 
01615   // sext(trunc(x)) --> sext(x) or x or trunc(x)
01616   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
01617     // It's possible the bits taken off by the truncate were all sign bits. If
01618     // so, we should be able to simplify this further.
01619     const SCEV *X = ST->getOperand();
01620     ConstantRange CR = getSignedRange(X);
01621     unsigned TruncBits = getTypeSizeInBits(ST->getType());
01622     unsigned NewBits = getTypeSizeInBits(Ty);
01623     if (CR.truncate(TruncBits).signExtend(NewBits).contains(
01624             CR.sextOrTrunc(NewBits)))
01625       return getTruncateOrSignExtend(X, Ty);
01626   }
01627 
01628   // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2
01629   if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
01630     if (SA->getNumOperands() == 2) {
01631       auto *SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0));
01632       auto *SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1));
01633       if (SMul && SC1) {
01634         if (auto *SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) {
01635           const APInt &C1 = SC1->getAPInt();
01636           const APInt &C2 = SC2->getAPInt();
01637           if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&
01638               C2.ugt(C1) && C2.isPowerOf2())
01639             return getAddExpr(getSignExtendExpr(SC1, Ty),
01640                               getSignExtendExpr(SMul, Ty));
01641         }
01642       }
01643     }
01644 
01645     // sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw>
01646     if (SA->getNoWrapFlags(SCEV::FlagNSW)) {
01647       // If the addition does not sign overflow then we can, by definition,
01648       // commute the sign extension with the addition operation.
01649       SmallVector<const SCEV *, 4> Ops;
01650       for (const auto *Op : SA->operands())
01651         Ops.push_back(getSignExtendExpr(Op, Ty));
01652       return getAddExpr(Ops, SCEV::FlagNSW);
01653     }
01654   }
01655   // If the input value is a chrec scev, and we can prove that the value
01656   // did not overflow the old, smaller, value, we can sign extend all of the
01657   // operands (often constants).  This allows analysis of something like
01658   // this:  for (signed char X = 0; X < 100; ++X) { int Y = X; }
01659   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
01660     if (AR->isAffine()) {
01661       const SCEV *Start = AR->getStart();
01662       const SCEV *Step = AR->getStepRecurrence(*this);
01663       unsigned BitWidth = getTypeSizeInBits(AR->getType());
01664       const Loop *L = AR->getLoop();
01665 
01666       // If we have special knowledge that this addrec won't overflow,
01667       // we don't need to do any further analysis.
01668       if (AR->getNoWrapFlags(SCEV::FlagNSW))
01669         return getAddRecExpr(
01670             getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
01671             getSignExtendExpr(Step, Ty), L, SCEV::FlagNSW);
01672 
01673       // Check whether the backedge-taken count is SCEVCouldNotCompute.
01674       // Note that this serves two purposes: It filters out loops that are
01675       // simply not analyzable, and it covers the case where this code is
01676       // being called from within backedge-taken count analysis, such that
01677       // attempting to ask for the backedge-taken count would likely result
01678       // in infinite recursion. In the later case, the analysis code will
01679       // cope with a conservative value, and it will take care to purge
01680       // that value once it has finished.
01681       const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
01682       if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
01683         // Manually compute the final value for AR, checking for
01684         // overflow.
01685 
01686         // Check whether the backedge-taken count can be losslessly casted to
01687         // the addrec's type. The count is always unsigned.
01688         const SCEV *CastedMaxBECount =
01689           getTruncateOrZeroExtend(MaxBECount, Start->getType());
01690         const SCEV *RecastedMaxBECount =
01691           getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
01692         if (MaxBECount == RecastedMaxBECount) {
01693           Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
01694           // Check whether Start+Step*MaxBECount has no signed overflow.
01695           const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
01696           const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul), WideTy);
01697           const SCEV *WideStart = getSignExtendExpr(Start, WideTy);
01698           const SCEV *WideMaxBECount =
01699             getZeroExtendExpr(CastedMaxBECount, WideTy);
01700           const SCEV *OperandExtendedAdd =
01701             getAddExpr(WideStart,
01702                        getMulExpr(WideMaxBECount,
01703                                   getSignExtendExpr(Step, WideTy)));
01704           if (SAdd == OperandExtendedAdd) {
01705             // Cache knowledge of AR NSW, which is propagated to this AddRec.
01706             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
01707             // Return the expression with the addrec on the outside.
01708             return getAddRecExpr(
01709                 getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
01710                 getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
01711           }
01712           // Similar to above, only this time treat the step value as unsigned.
01713           // This covers loops that count up with an unsigned step.
01714           OperandExtendedAdd =
01715             getAddExpr(WideStart,
01716                        getMulExpr(WideMaxBECount,
01717                                   getZeroExtendExpr(Step, WideTy)));
01718           if (SAdd == OperandExtendedAdd) {
01719             // If AR wraps around then
01720             //
01721             //    abs(Step) * MaxBECount > unsigned-max(AR->getType())
01722             // => SAdd != OperandExtendedAdd
01723             //
01724             // Thus (AR is not NW => SAdd != OperandExtendedAdd) <=>
01725             // (SAdd == OperandExtendedAdd => AR is NW)
01726 
01727             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
01728 
01729             // Return the expression with the addrec on the outside.
01730             return getAddRecExpr(
01731                 getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
01732                 getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
01733           }
01734         }
01735 
01736         // If the backedge is guarded by a comparison with the pre-inc value
01737         // the addrec is safe. Also, if the entry is guarded by a comparison
01738         // with the start value and the backedge is guarded by a comparison
01739         // with the post-inc value, the addrec is safe.
01740         ICmpInst::Predicate Pred;
01741         const SCEV *OverflowLimit =
01742             getSignedOverflowLimitForStep(Step, &Pred, this);
01743         if (OverflowLimit &&
01744             (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
01745              (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
01746               isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this),
01747                                           OverflowLimit)))) {
01748           // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
01749           const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
01750           return getAddRecExpr(
01751               getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
01752               getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
01753         }
01754       }
01755       // If Start and Step are constants, check if we can apply this
01756       // transformation:
01757       // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2
01758       auto *SC1 = dyn_cast<SCEVConstant>(Start);
01759       auto *SC2 = dyn_cast<SCEVConstant>(Step);
01760       if (SC1 && SC2) {
01761         const APInt &C1 = SC1->getAPInt();
01762         const APInt &C2 = SC2->getAPInt();
01763         if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) &&
01764             C2.isPowerOf2()) {
01765           Start = getSignExtendExpr(Start, Ty);
01766           const SCEV *NewAR = getAddRecExpr(getZero(AR->getType()), Step, L,
01767                                             AR->getNoWrapFlags());
01768           return getAddExpr(Start, getSignExtendExpr(NewAR, Ty));
01769         }
01770       }
01771 
01772       if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
01773         const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
01774         return getAddRecExpr(
01775             getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
01776             getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
01777       }
01778     }
01779 
01780   // The cast wasn't folded; create an explicit cast node.
01781   // Recompute the insert position, as it may have been invalidated.
01782   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
01783   SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
01784                                                    Op, Ty);
01785   UniqueSCEVs.InsertNode(S, IP);
01786   return S;
01787 }
01788 
01789 /// getAnyExtendExpr - Return a SCEV for the given operand extended with
01790 /// unspecified bits out to the given type.
01791 ///
01792 const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
01793                                               Type *Ty) {
01794   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
01795          "This is not an extending conversion!");
01796   assert(isSCEVable(Ty) &&
01797          "This is not a conversion to a SCEVable type!");
01798   Ty = getEffectiveSCEVType(Ty);
01799 
01800   // Sign-extend negative constants.
01801   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
01802     if (SC->getAPInt().isNegative())
01803       return getSignExtendExpr(Op, Ty);
01804 
01805   // Peel off a truncate cast.
01806   if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
01807     const SCEV *NewOp = T->getOperand();
01808     if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
01809       return getAnyExtendExpr(NewOp, Ty);
01810     return getTruncateOrNoop(NewOp, Ty);
01811   }
01812 
01813   // Next try a zext cast. If the cast is folded, use it.
01814   const SCEV *ZExt = getZeroExtendExpr(Op, Ty);
01815   if (!isa<SCEVZeroExtendExpr>(ZExt))
01816     return ZExt;
01817 
01818   // Next try a sext cast. If the cast is folded, use it.
01819   const SCEV *SExt = getSignExtendExpr(Op, Ty);
01820   if (!isa<SCEVSignExtendExpr>(SExt))
01821     return SExt;
01822 
01823   // Force the cast to be folded into the operands of an addrec.
01824   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
01825     SmallVector<const SCEV *, 4> Ops;
01826     for (const SCEV *Op : AR->operands())
01827       Ops.push_back(getAnyExtendExpr(Op, Ty));
01828     return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
01829   }
01830 
01831   // If the expression is obviously signed, use the sext cast value.
01832   if (isa<SCEVSMaxExpr>(Op))
01833     return SExt;
01834 
01835   // Absent any other information, use the zext cast value.
01836   return ZExt;
01837 }
01838 
01839 /// CollectAddOperandsWithScales - Process the given Ops list, which is
01840 /// a list of operands to be added under the given scale, update the given
01841 /// map. This is a helper function for getAddRecExpr. As an example of
01842 /// what it does, given a sequence of operands that would form an add
01843 /// expression like this:
01844 ///
01845 ///    m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r)
01846 ///
01847 /// where A and B are constants, update the map with these values:
01848 ///
01849 ///    (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0)
01850 ///
01851 /// and add 13 + A*B*29 to AccumulatedConstant.
01852 /// This will allow getAddRecExpr to produce this:
01853 ///
01854 ///    13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B)
01855 ///
01856 /// This form often exposes folding opportunities that are hidden in
01857 /// the original operand list.
01858 ///
01859 /// Return true iff it appears that any interesting folding opportunities
01860 /// may be exposed. This helps getAddRecExpr short-circuit extra work in
01861 /// the common case where no interesting opportunities are present, and
01862 /// is also used as a check to avoid infinite recursion.
01863 ///
01864 static bool
01865 CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
01866                              SmallVectorImpl<const SCEV *> &NewOps,
01867                              APInt &AccumulatedConstant,
01868                              const SCEV *const *Ops, size_t NumOperands,
01869                              const APInt &Scale,
01870                              ScalarEvolution &SE) {
01871   bool Interesting = false;
01872 
01873   // Iterate over the add operands. They are sorted, with constants first.
01874   unsigned i = 0;
01875   while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
01876     ++i;
01877     // Pull a buried constant out to the outside.
01878     if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
01879       Interesting = true;
01880     AccumulatedConstant += Scale * C->getAPInt();
01881   }
01882 
01883   // Next comes everything else. We're especially interested in multiplies
01884   // here, but they're in the middle, so just visit the rest with one loop.
01885   for (; i != NumOperands; ++i) {
01886     const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
01887     if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
01888       APInt NewScale =
01889           Scale * cast<SCEVConstant>(Mul->getOperand(0))->getAPInt();
01890       if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
01891         // A multiplication of a constant with another add; recurse.
01892         const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
01893         Interesting |=
01894           CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
01895                                        Add->op_begin(), Add->getNumOperands(),
01896                                        NewScale, SE);
01897       } else {
01898         // A multiplication of a constant with some other value. Update
01899         // the map.
01900         SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
01901         const SCEV *Key = SE.getMulExpr(MulOps);
01902         auto Pair = M.insert(std::make_pair(Key, NewScale));
01903         if (Pair.second) {
01904           NewOps.push_back(Pair.first->first);
01905         } else {
01906           Pair.first->second += NewScale;
01907           // The map already had an entry for this value, which may indicate
01908           // a folding opportunity.
01909           Interesting = true;
01910         }
01911       }
01912     } else {
01913       // An ordinary operand. Update the map.
01914       std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
01915         M.insert(std::make_pair(Ops[i], Scale));
01916       if (Pair.second) {
01917         NewOps.push_back(Pair.first->first);
01918       } else {
01919         Pair.first->second += Scale;
01920         // The map already had an entry for this value, which may indicate
01921         // a folding opportunity.
01922         Interesting = true;
01923       }
01924     }
01925   }
01926 
01927   return Interesting;
01928 }
01929 
01930 // We're trying to construct a SCEV of type `Type' with `Ops' as operands and
01931 // `OldFlags' as can't-wrap behavior.  Infer a more aggressive set of
01932 // can't-overflow flags for the operation if possible.
01933 static SCEV::NoWrapFlags
01934 StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
01935                       const SmallVectorImpl<const SCEV *> &Ops,
01936                       SCEV::NoWrapFlags Flags) {
01937   using namespace std::placeholders;
01938   typedef OverflowingBinaryOperator OBO;
01939 
01940   bool CanAnalyze =
01941       Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr;
01942   (void)CanAnalyze;
01943   assert(CanAnalyze && "don't call from other places!");
01944 
01945   int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
01946   SCEV::NoWrapFlags SignOrUnsignWrap =
01947       ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);
01948 
01949   // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
01950   auto IsKnownNonNegative = [&](const SCEV *S) {
01951     return SE->isKnownNonNegative(S);
01952   };
01953 
01954   if (SignOrUnsignWrap == SCEV::FlagNSW && all_of(Ops, IsKnownNonNegative))
01955     Flags =
01956         ScalarEvolution::setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
01957 
01958   SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);
01959 
01960   if (SignOrUnsignWrap != SignOrUnsignMask && Type == scAddExpr &&
01961       Ops.size() == 2 && isa<SCEVConstant>(Ops[0])) {
01962 
01963     // (A + C) --> (A + C)<nsw> if the addition does not sign overflow
01964     // (A + C) --> (A + C)<nuw> if the addition does not unsign overflow
01965 
01966     const APInt &C = cast<SCEVConstant>(Ops[0])->getAPInt();
01967     if (!(SignOrUnsignWrap & SCEV::FlagNSW)) {
01968       auto NSWRegion =
01969         ConstantRange::makeNoWrapRegion(Instruction::Add, C, OBO::NoSignedWrap);
01970       if (NSWRegion.contains(SE->getSignedRange(Ops[1])))
01971         Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
01972     }
01973     if (!(SignOrUnsignWrap & SCEV::FlagNUW)) {
01974       auto NUWRegion =
01975         ConstantRange::makeNoWrapRegion(Instruction::Add, C,
01976                                         OBO::NoUnsignedWrap);
01977       if (NUWRegion.contains(SE->getUnsignedRange(Ops[1])))
01978         Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
01979     }
01980   }
01981 
01982   return Flags;
01983 }
01984 
01985 /// getAddExpr - Get a canonical add expression, or something simpler if
01986 /// possible.
01987 const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
01988                                         SCEV::NoWrapFlags Flags) {
01989   assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
01990          "only nuw or nsw allowed");
01991   assert(!Ops.empty() && "Cannot get empty add!");
01992   if (Ops.size() == 1) return Ops[0];
01993 #ifndef NDEBUG
01994   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
01995   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
01996     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
01997            "SCEVAddExpr operand types don't match!");
01998 #endif
01999 
02000   // Sort by complexity, this groups all similar expression types together.
02001   GroupByComplexity(Ops, &LI);
02002 
02003   Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);
02004 
02005   // If there are any constants, fold them together.
02006   unsigned Idx = 0;
02007   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
02008     ++Idx;
02009     assert(Idx < Ops.size());
02010     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
02011       // We found two constants, fold them together!
02012       Ops[0] = getConstant(LHSC->getAPInt() + RHSC->getAPInt());
02013       if (Ops.size() == 2) return Ops[0];
02014       Ops.erase(Ops.begin()+1);  // Erase the folded element
02015       LHSC = cast<SCEVConstant>(Ops[0]);
02016     }
02017 
02018     // If we are left with a constant zero being added, strip it off.
02019     if (LHSC->getValue()->isZero()) {
02020       Ops.erase(Ops.begin());
02021       --Idx;
02022     }
02023 
02024     if (Ops.size() == 1) return Ops[0];
02025   }
02026 
02027   // Okay, check to see if the same value occurs in the operand list more than
02028   // once.  If so, merge them together into an multiply expression.  Since we
02029   // sorted the list, these values are required to be adjacent.
02030   Type *Ty = Ops[0]->getType();
02031   bool FoundMatch = false;
02032   for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
02033     if (Ops[i] == Ops[i+1]) {      //  X + Y + Y  -->  X + Y*2
02034       // Scan ahead to count how many equal operands there are.
02035       unsigned Count = 2;
02036       while (i+Count != e && Ops[i+Count] == Ops[i])
02037         ++Count;
02038       // Merge the values into a multiply.
02039       const SCEV *Scale = getConstant(Ty, Count);
02040       const SCEV *Mul = getMulExpr(Scale, Ops[i]);
02041       if (Ops.size() == Count)
02042         return Mul;
02043       Ops[i] = Mul;
02044       Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
02045       --i; e -= Count - 1;
02046       FoundMatch = true;
02047     }
02048   if (FoundMatch)
02049     return getAddExpr(Ops, Flags);
02050 
02051   // Check for truncates. If all the operands are truncated from the same
02052   // type, see if factoring out the truncate would permit the result to be
02053   // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n)
02054   // if the contents of the resulting outer trunc fold to something simple.
02055   for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) {
02056     const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
02057     Type *DstType = Trunc->getType();
02058     Type *SrcType = Trunc->getOperand()->getType();
02059     SmallVector<const SCEV *, 8> LargeOps;
02060     bool Ok = true;
02061     // Check all the operands to see if they can be represented in the
02062     // source type of the truncate.
02063     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
02064       if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) {
02065         if (T->getOperand()->getType() != SrcType) {
02066           Ok = false;
02067           break;
02068         }
02069         LargeOps.push_back(T->getOperand());
02070       } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
02071         LargeOps.push_back(getAnyExtendExpr(C, SrcType));
02072       } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
02073         SmallVector<const SCEV *, 8> LargeMulOps;
02074         for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
02075           if (const SCEVTruncateExpr *T =
02076                 dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
02077             if (T->getOperand()->getType() != SrcType) {
02078               Ok = false;
02079               break;
02080             }
02081             LargeMulOps.push_back(T->getOperand());
02082           } else if (const auto *C = dyn_cast<SCEVConstant>(M->getOperand(j))) {
02083             LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
02084           } else {
02085             Ok = false;
02086             break;
02087           }
02088         }
02089         if (Ok)
02090           LargeOps.push_back(getMulExpr(LargeMulOps));
02091       } else {
02092         Ok = false;
02093         break;
02094       }
02095     }
02096     if (Ok) {
02097       // Evaluate the expression in the larger type.
02098       const SCEV *Fold = getAddExpr(LargeOps, Flags);
02099       // If it folds to something simple, use it. Otherwise, don't.
02100       if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
02101         return getTruncateExpr(Fold, DstType);
02102     }
02103   }
02104 
02105   // Skip past any other cast SCEVs.
02106   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
02107     ++Idx;
02108 
02109   // If there are add operands they would be next.
02110   if (Idx < Ops.size()) {
02111     bool DeletedAdd = false;
02112     while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
02113       // If we have an add, expand the add operands onto the end of the operands
02114       // list.
02115       Ops.erase(Ops.begin()+Idx);
02116       Ops.append(Add->op_begin(), Add->op_end());
02117       DeletedAdd = true;
02118     }
02119 
02120     // If we deleted at least one add, we added operands to the end of the list,
02121     // and they are not necessarily sorted.  Recurse to resort and resimplify
02122     // any operands we just acquired.
02123     if (DeletedAdd)
02124       return getAddExpr(Ops);
02125   }
02126 
02127   // Skip over the add expression until we get to a multiply.
02128   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
02129     ++Idx;
02130 
02131   // Check to see if there are any folding opportunities present with
02132   // operands multiplied by constant values.
02133   if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
02134     uint64_t BitWidth = getTypeSizeInBits(Ty);
02135     DenseMap<const SCEV *, APInt> M;
02136     SmallVector<const SCEV *, 8> NewOps;
02137     APInt AccumulatedConstant(BitWidth, 0);
02138     if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
02139                                      Ops.data(), Ops.size(),
02140                                      APInt(BitWidth, 1), *this)) {
02141       struct APIntCompare {
02142         bool operator()(const APInt &LHS, const APInt &RHS) const {
02143           return LHS.ult(RHS);
02144         }
02145       };
02146 
02147       // Some interesting folding opportunity is present, so its worthwhile to
02148       // re-generate the operands list. Group the operands by constant scale,
02149       // to avoid multiplying by the same constant scale multiple times.
02150       std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
02151       for (const SCEV *NewOp : NewOps)
02152         MulOpLists[M.find(NewOp)->second].push_back(NewOp);
02153       // Re-generate the operands list.
02154       Ops.clear();
02155       if (AccumulatedConstant != 0)
02156         Ops.push_back(getConstant(AccumulatedConstant));
02157       for (auto &MulOp : MulOpLists)
02158         if (MulOp.first != 0)
02159           Ops.push_back(getMulExpr(getConstant(MulOp.first),
02160                                    getAddExpr(MulOp.second)));
02161       if (Ops.empty())
02162         return getZero(Ty);
02163       if (Ops.size() == 1)
02164         return Ops[0];
02165       return getAddExpr(Ops);
02166     }
02167   }
02168 
02169   // If we are adding something to a multiply expression, make sure the
02170   // something is not already an operand of the multiply.  If so, merge it into
02171   // the multiply.
02172   for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) {
02173     const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
02174     for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
02175       const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
02176       if (isa<SCEVConstant>(MulOpSCEV))
02177         continue;
02178       for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
02179         if (MulOpSCEV == Ops[AddOp]) {
02180           // Fold W + X + (X * Y * Z)  -->  W + (X * ((Y*Z)+1))
02181           const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
02182           if (Mul->getNumOperands() != 2) {
02183             // If the multiply has more than two operands, we must get the
02184             // Y*Z term.
02185             SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
02186                                                 Mul->op_begin()+MulOp);
02187             MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
02188             InnerMul = getMulExpr(MulOps);
02189           }
02190           const SCEV *One = getOne(Ty);
02191           const SCEV *AddOne = getAddExpr(One, InnerMul);
02192           const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
02193           if (Ops.size() == 2) return OuterMul;
02194           if (AddOp < Idx) {
02195             Ops.erase(Ops.begin()+AddOp);
02196             Ops.erase(Ops.begin()+Idx-1);
02197           } else {
02198             Ops.erase(Ops.begin()+Idx);
02199             Ops.erase(Ops.begin()+AddOp-1);
02200           }
02201           Ops.push_back(OuterMul);
02202           return getAddExpr(Ops);
02203         }
02204 
02205       // Check this multiply against other multiplies being added together.
02206       for (unsigned OtherMulIdx = Idx+1;
02207            OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]);
02208            ++OtherMulIdx) {
02209         const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
02210         // If MulOp occurs in OtherMul, we can fold the two multiplies
02211         // together.
02212         for (unsigned OMulOp = 0, e = OtherMul->getNumOperands();
02213              OMulOp != e; ++OMulOp)
02214           if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
02215             // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
02216             const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
02217             if (Mul->getNumOperands() != 2) {
02218               SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
02219                                                   Mul->op_begin()+MulOp);
02220               MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
02221               InnerMul1 = getMulExpr(MulOps);
02222             }
02223             const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
02224             if (OtherMul->getNumOperands() != 2) {
02225               SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
02226                                                   OtherMul->op_begin()+OMulOp);
02227               MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
02228               InnerMul2 = getMulExpr(MulOps);
02229             }
02230             const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
02231             const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
02232             if (Ops.size() == 2) return OuterMul;
02233             Ops.erase(Ops.begin()+Idx);
02234             Ops.erase(Ops.begin()+OtherMulIdx-1);
02235             Ops.push_back(OuterMul);
02236             return getAddExpr(Ops);
02237           }
02238       }
02239     }
02240   }
02241 
02242   // If there are any add recurrences in the operands list, see if any other
02243   // added values are loop invariant.  If so, we can fold them into the
02244   // recurrence.
02245   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
02246     ++Idx;
02247 
02248   // Scan over all recurrences, trying to fold loop invariants into them.
02249   for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
02250     // Scan all of the other operands to this add and add them to the vector if
02251     // they are loop invariant w.r.t. the recurrence.
02252     SmallVector<const SCEV *, 8> LIOps;
02253     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
02254     const Loop *AddRecLoop = AddRec->getLoop();
02255     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
02256       if (isLoopInvariant(Ops[i], AddRecLoop)) {
02257         LIOps.push_back(Ops[i]);
02258         Ops.erase(Ops.begin()+i);
02259         --i; --e;
02260       }
02261 
02262     // If we found some loop invariants, fold them into the recurrence.
02263     if (!LIOps.empty()) {
02264       //  NLI + LI + {Start,+,Step}  -->  NLI + {LI+Start,+,Step}
02265       LIOps.push_back(AddRec->getStart());
02266 
02267       SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
02268                                              AddRec->op_end());
02269       AddRecOps[0] = getAddExpr(LIOps);
02270 
02271       // Build the new addrec. Propagate the NUW and NSW flags if both the
02272       // outer add and the inner addrec are guaranteed to have no overflow.
02273       // Always propagate NW.
02274       Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW));
02275       const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags);
02276 
02277       // If all of the other operands were loop invariant, we are done.
02278       if (Ops.size() == 1) return NewRec;
02279 
02280       // Otherwise, add the folded AddRec by the non-invariant parts.
02281       for (unsigned i = 0;; ++i)
02282         if (Ops[i] == AddRec) {
02283           Ops[i] = NewRec;
02284           break;
02285         }
02286       return getAddExpr(Ops);
02287     }
02288 
02289     // Okay, if there weren't any loop invariants to be folded, check to see if
02290     // there are multiple AddRec's with the same loop induction variable being
02291     // added together.  If so, we can fold them.
02292     for (unsigned OtherIdx = Idx+1;
02293          OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
02294          ++OtherIdx)
02295       if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
02296         // Other + {A,+,B}<L> + {C,+,D}<L>  -->  Other + {A+C,+,B+D}<L>
02297         SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
02298                                                AddRec->op_end());
02299         for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
02300              ++OtherIdx)
02301           if (const auto *OtherAddRec = dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
02302             if (OtherAddRec->getLoop() == AddRecLoop) {
02303               for (unsigned i = 0, e = OtherAddRec->getNumOperands();
02304                    i != e; ++i) {
02305                 if (i >= AddRecOps.size()) {
02306                   AddRecOps.append(OtherAddRec->op_begin()+i,
02307                                    OtherAddRec->op_end());
02308                   break;
02309                 }
02310                 AddRecOps[i] = getAddExpr(AddRecOps[i],
02311                                           OtherAddRec->getOperand(i));
02312               }
02313               Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
02314             }
02315         // Step size has changed, so we cannot guarantee no self-wraparound.
02316         Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
02317         return getAddExpr(Ops);
02318       }
02319 
02320     // Otherwise couldn't fold anything into this recurrence.  Move onto the
02321     // next one.
02322   }
02323 
02324   // Okay, it looks like we really DO need an add expr.  Check to see if we
02325   // already have one, otherwise create a new one.
02326   FoldingSetNodeID ID;
02327   ID.AddInteger(scAddExpr);
02328   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
02329     ID.AddPointer(Ops[i]);
02330   void *IP = nullptr;
02331   SCEVAddExpr *S =
02332     static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
02333   if (!S) {
02334     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
02335     std::uninitialized_copy(Ops.begin(), Ops.end(), O);
02336     S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator),
02337                                         O, Ops.size());
02338     UniqueSCEVs.InsertNode(S, IP);
02339   }
02340   S->setNoWrapFlags(Flags);
02341   return S;
02342 }
02343 
02344 static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) {
02345   uint64_t k = i*j;
02346   if (j > 1 && k / j != i) Overflow = true;
02347   return k;
02348 }
02349 
02350 /// Compute the result of "n choose k", the binomial coefficient.  If an
02351 /// intermediate computation overflows, Overflow will be set and the return will
02352 /// be garbage. Overflow is not cleared on absence of overflow.
02353 static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
02354   // We use the multiplicative formula:
02355   //     n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 .
02356   // At each iteration, we take the n-th term of the numeral and divide by the
02357   // (k-n)th term of the denominator.  This division will always produce an
02358   // integral result, and helps reduce the chance of overflow in the
02359   // intermediate computations. However, we can still overflow even when the
02360   // final result would fit.
02361 
02362   if (n == 0 || n == k) return 1;
02363   if (k > n) return 0;
02364 
02365   if (k > n/2)
02366     k = n-k;
02367 
02368   uint64_t r = 1;
02369   for (uint64_t i = 1; i <= k; ++i) {
02370     r = umul_ov(r, n-(i-1), Overflow);
02371     r /= i;
02372   }
02373   return r;
02374 }
02375 
02376 /// Determine if any of the operands in this SCEV are a constant or if
02377 /// any of the add or multiply expressions in this SCEV contain a constant.
02378 static bool containsConstantSomewhere(const SCEV *StartExpr) {
02379   SmallVector<const SCEV *, 4> Ops;
02380   Ops.push_back(StartExpr);
02381   while (!Ops.empty()) {
02382     const SCEV *CurrentExpr = Ops.pop_back_val();
02383     if (isa<SCEVConstant>(*CurrentExpr))
02384       return true;
02385 
02386     if (isa<SCEVAddExpr>(*CurrentExpr) || isa<SCEVMulExpr>(*CurrentExpr)) {
02387       const auto *CurrentNAry = cast<SCEVNAryExpr>(CurrentExpr);
02388       Ops.append(CurrentNAry->op_begin(), CurrentNAry->op_end());
02389     }
02390   }
02391   return false;
02392 }
02393 
02394 /// getMulExpr - Get a canonical multiply expression, or something simpler if
02395 /// possible.
02396 const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
02397                                         SCEV::NoWrapFlags Flags) {
02398   assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) &&
02399          "only nuw or nsw allowed");
02400   assert(!Ops.empty() && "Cannot get empty mul!");
02401   if (Ops.size() == 1) return Ops[0];
02402 #ifndef NDEBUG
02403   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
02404   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
02405     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
02406            "SCEVMulExpr operand types don't match!");
02407 #endif
02408 
02409   // Sort by complexity, this groups all similar expression types together.
02410   GroupByComplexity(Ops, &LI);
02411 
02412   Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
02413 
02414   // If there are any constants, fold them together.
02415   unsigned Idx = 0;
02416   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
02417 
02418     // C1*(C2+V) -> C1*C2 + C1*V
02419     if (Ops.size() == 2)
02420         if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
02421           // If any of Add's ops are Adds or Muls with a constant,
02422           // apply this transformation as well.
02423           if (Add->getNumOperands() == 2)
02424             if (containsConstantSomewhere(Add))
02425               return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)),
02426                                 getMulExpr(LHSC, Add->getOperand(1)));
02427 
02428     ++Idx;
02429     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
02430       // We found two constants, fold them together!
02431       ConstantInt *Fold =
02432           ConstantInt::get(getContext(), LHSC->getAPInt() * RHSC->getAPInt());
02433       Ops[0] = getConstant(Fold);
02434       Ops.erase(Ops.begin()+1);  // Erase the folded element
02435       if (Ops.size() == 1) return Ops[0];
02436       LHSC = cast<SCEVConstant>(Ops[0]);
02437     }
02438 
02439     // If we are left with a constant one being multiplied, strip it off.
02440     if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) {
02441       Ops.erase(Ops.begin());
02442       --Idx;
02443     } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
02444       // If we have a multiply of zero, it will always be zero.
02445       return Ops[0];
02446     } else if (Ops[0]->isAllOnesValue()) {
02447       // If we have a mul by -1 of an add, try distributing the -1 among the
02448       // add operands.
02449       if (Ops.size() == 2) {
02450         if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
02451           SmallVector<const SCEV *, 4> NewOps;
02452           bool AnyFolded = false;
02453           for (const SCEV *AddOp : Add->operands()) {
02454             const SCEV *Mul = getMulExpr(Ops[0], AddOp);
02455             if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
02456             NewOps.push_back(Mul);
02457           }
02458           if (AnyFolded)
02459             return getAddExpr(NewOps);
02460         } else if (const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
02461           // Negation preserves a recurrence's no self-wrap property.
02462           SmallVector<const SCEV *, 4> Operands;
02463           for (const SCEV *AddRecOp : AddRec->operands())
02464             Operands.push_back(getMulExpr(Ops[0], AddRecOp));
02465 
02466           return getAddRecExpr(Operands, AddRec->getLoop(),
02467                                AddRec->getNoWrapFlags(SCEV::FlagNW));
02468         }
02469       }
02470     }
02471 
02472     if (Ops.size() == 1)
02473       return Ops[0];
02474   }
02475 
02476   // Skip over the add expression until we get to a multiply.
02477   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
02478     ++Idx;
02479 
02480   // If there are mul operands inline them all into this expression.
02481   if (Idx < Ops.size()) {
02482     bool DeletedMul = false;
02483     while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
02484       // If we have an mul, expand the mul operands onto the end of the operands
02485       // list.
02486       Ops.erase(Ops.begin()+Idx);
02487       Ops.append(Mul->op_begin(), Mul->op_end());
02488       DeletedMul = true;
02489     }
02490 
02491     // If we deleted at least one mul, we added operands to the end of the list,
02492     // and they are not necessarily sorted.  Recurse to resort and resimplify
02493     // any operands we just acquired.
02494     if (DeletedMul)
02495       return getMulExpr(Ops);
02496   }
02497 
02498   // If there are any add recurrences in the operands list, see if any other
02499   // added values are loop invariant.  If so, we can fold them into the
02500   // recurrence.
02501   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
02502     ++Idx;
02503 
02504   // Scan over all recurrences, trying to fold loop invariants into them.
02505   for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
02506     // Scan all of the other operands to this mul and add them to the vector if
02507     // they are loop invariant w.r.t. the recurrence.
02508     SmallVector<const SCEV *, 8> LIOps;
02509     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
02510     const Loop *AddRecLoop = AddRec->getLoop();
02511     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
02512       if (isLoopInvariant(Ops[i], AddRecLoop)) {
02513         LIOps.push_back(Ops[i]);
02514         Ops.erase(Ops.begin()+i);
02515         --i; --e;
02516       }
02517 
02518     // If we found some loop invariants, fold them into the recurrence.
02519     if (!LIOps.empty()) {
02520       //  NLI * LI * {Start,+,Step}  -->  NLI * {LI*Start,+,LI*Step}
02521       SmallVector<const SCEV *, 4> NewOps;
02522       NewOps.reserve(AddRec->getNumOperands());
02523       const SCEV *Scale = getMulExpr(LIOps);
02524       for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
02525         NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
02526 
02527       // Build the new addrec. Propagate the NUW and NSW flags if both the
02528       // outer mul and the inner addrec are guaranteed to have no overflow.
02529       //
02530       // No self-wrap cannot be guaranteed after changing the step size, but
02531       // will be inferred if either NUW or NSW is true.
02532       Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW));
02533       const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags);
02534 
02535       // If all of the other operands were loop invariant, we are done.
02536       if (Ops.size() == 1) return NewRec;
02537 
02538       // Otherwise, multiply the folded AddRec by the non-invariant parts.
02539       for (unsigned i = 0;; ++i)
02540         if (Ops[i] == AddRec) {
02541           Ops[i] = NewRec;
02542           break;
02543         }
02544       return getMulExpr(Ops);
02545     }
02546 
02547     // Okay, if there weren't any loop invariants to be folded, check to see if
02548     // there are multiple AddRec's with the same loop induction variable being
02549     // multiplied together.  If so, we can fold them.
02550 
02551     // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
02552     // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
02553     //       choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
02554     //   ]]],+,...up to x=2n}.
02555     // Note that the arguments to choose() are always integers with values
02556     // known at compile time, never SCEV objects.
02557     //
02558     // The implementation avoids pointless extra computations when the two
02559     // addrec's are of different length (mathematically, it's equivalent to
02560     // an infinite stream of zeros on the right).
02561     bool OpsModified = false;
02562     for (unsigned OtherIdx = Idx+1;
02563          OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
02564          ++OtherIdx) {
02565       const SCEVAddRecExpr *OtherAddRec =
02566         dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
02567       if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
02568         continue;
02569 
02570       bool Overflow = false;
02571       Type *Ty = AddRec->getType();
02572       bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
02573       SmallVector<const SCEV*, 7> AddRecOps;
02574       for (int x = 0, xe = AddRec->getNumOperands() +
02575              OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
02576         const SCEV *Term = getZero(Ty);
02577         for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
02578           uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
02579           for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
02580                  ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
02581                z < ze && !Overflow; ++z) {
02582             uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
02583             uint64_t Coeff;
02584             if (LargerThan64Bits)
02585               Coeff = umul_ov(Coeff1, Coeff2, Overflow);
02586             else
02587               Coeff = Coeff1*Coeff2;
02588             const SCEV *CoeffTerm = getConstant(Ty, Coeff);
02589             const SCEV *Term1 = AddRec->getOperand(y-z);
02590             const SCEV *Term2 = OtherAddRec->getOperand(z);
02591             Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
02592           }
02593         }
02594         AddRecOps.push_back(Term);
02595       }
02596       if (!Overflow) {
02597         const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
02598                                               SCEV::FlagAnyWrap);
02599         if (Ops.size() == 2) return NewAddRec;
02600         Ops[Idx] = NewAddRec;
02601         Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
02602         OpsModified = true;
02603         AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
02604         if (!AddRec)
02605           break;
02606       }
02607     }
02608     if (OpsModified)
02609       return getMulExpr(Ops);
02610 
02611     // Otherwise couldn't fold anything into this recurrence.  Move onto the
02612     // next one.
02613   }
02614 
02615   // Okay, it looks like we really DO need an mul expr.  Check to see if we
02616   // already have one, otherwise create a new one.
02617   FoldingSetNodeID ID;
02618   ID.AddInteger(scMulExpr);
02619   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
02620     ID.AddPointer(Ops[i]);
02621   void *IP = nullptr;
02622   SCEVMulExpr *S =
02623     static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
02624   if (!S) {
02625     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
02626     std::uninitialized_copy(Ops.begin(), Ops.end(), O);
02627     S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
02628                                         O, Ops.size());
02629     UniqueSCEVs.InsertNode(S, IP);
02630   }
02631   S->setNoWrapFlags(Flags);
02632   return S;
02633 }
02634 
02635 /// getUDivExpr - Get a canonical unsigned division expression, or something
02636 /// simpler if possible.
02637 const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
02638                                          const SCEV *RHS) {
02639   assert(getEffectiveSCEVType(LHS->getType()) ==
02640          getEffectiveSCEVType(RHS->getType()) &&
02641          "SCEVUDivExpr operand types don't match!");
02642 
02643   if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
02644     if (RHSC->getValue()->equalsInt(1))
02645       return LHS;                               // X udiv 1 --> x
02646     // If the denominator is zero, the result of the udiv is undefined. Don't
02647     // try to analyze it, because the resolution chosen here may differ from
02648     // the resolution chosen in other parts of the compiler.
02649     if (!RHSC->getValue()->isZero()) {
02650       // Determine if the division can be folded into the operands of
02651       // its operands.
02652       // TODO: Generalize this to non-constants by using known-bits information.
02653       Type *Ty = LHS->getType();
02654       unsigned LZ = RHSC->getAPInt().countLeadingZeros();
02655       unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
02656       // For non-power-of-two values, effectively round the value up to the
02657       // nearest power of two.
02658       if (!RHSC->getAPInt().isPowerOf2())
02659         ++MaxShiftAmt;
02660       IntegerType *ExtTy =
02661         IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
02662       if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
02663         if (const SCEVConstant *Step =
02664             dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {
02665           // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
02666           const APInt &StepInt = Step->getAPInt();
02667           const APInt &DivInt = RHSC->getAPInt();
02668           if (!StepInt.urem(DivInt) &&
02669               getZeroExtendExpr(AR, ExtTy) ==
02670               getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
02671                             getZeroExtendExpr(Step, ExtTy),
02672                             AR->getLoop(), SCEV::FlagAnyWrap)) {
02673             SmallVector<const SCEV *, 4> Operands;
02674             for (const SCEV *Op : AR->operands())
02675               Operands.push_back(getUDivExpr(Op, RHS));
02676             return getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagNW);
02677           }
02678           /// Get a canonical UDivExpr for a recurrence.
02679           /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
02680           // We can currently only fold X%N if X is constant.
02681           const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
02682           if (StartC && !DivInt.urem(StepInt) &&
02683               getZeroExtendExpr(AR, ExtTy) ==
02684               getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
02685                             getZeroExtendExpr(Step, ExtTy),
02686                             AR->getLoop(), SCEV::FlagAnyWrap)) {
02687             const APInt &StartInt = StartC->getAPInt();
02688             const APInt &StartRem = StartInt.urem(StepInt);
02689             if (StartRem != 0)
02690               LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step,
02691                                   AR->getLoop(), SCEV::FlagNW);
02692           }
02693         }
02694       // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
02695       if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
02696         SmallVector<const SCEV *, 4> Operands;
02697         for (const SCEV *Op : M->operands())
02698           Operands.push_back(getZeroExtendExpr(Op, ExtTy));
02699         if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
02700           // Find an operand that's safely divisible.
02701           for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
02702             const SCEV *Op = M->getOperand(i);
02703             const SCEV *Div = getUDivExpr(Op, RHSC);
02704             if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
02705               Operands = SmallVector<const SCEV *, 4>(M->op_begin(),
02706                                                       M->op_end());
02707               Operands[i] = Div;
02708               return getMulExpr(Operands);
02709             }
02710           }
02711       }
02712       // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
02713       if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
02714         SmallVector<const SCEV *, 4> Operands;
02715         for (const SCEV *Op : A->operands())
02716           Operands.push_back(getZeroExtendExpr(Op, ExtTy));
02717         if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
02718           Operands.clear();
02719           for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
02720             const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
02721             if (isa<SCEVUDivExpr>(Op) ||
02722                 getMulExpr(Op, RHS) != A->getOperand(i))
02723               break;
02724             Operands.push_back(Op);
02725           }
02726           if (Operands.size() == A->getNumOperands())
02727             return getAddExpr(Operands);
02728         }
02729       }
02730 
02731       // Fold if both operands are constant.
02732       if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
02733         Constant *LHSCV = LHSC->getValue();
02734         Constant *RHSCV = RHSC->getValue();
02735         return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
02736                                                                    RHSCV)));
02737       }
02738     }
02739   }
02740 
02741   FoldingSetNodeID ID;
02742   ID.AddInteger(scUDivExpr);
02743   ID.AddPointer(LHS);
02744   ID.AddPointer(RHS);
02745   void *IP = nullptr;
02746   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
02747   SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
02748                                              LHS, RHS);
02749   UniqueSCEVs.InsertNode(S, IP);
02750   return S;
02751 }
02752 
02753 static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
02754   APInt A = C1->getAPInt().abs();
02755   APInt B = C2->getAPInt().abs();
02756   uint32_t ABW = A.getBitWidth();
02757   uint32_t BBW = B.getBitWidth();
02758 
02759   if (ABW > BBW)
02760     B = B.zext(ABW);
02761   else if (ABW < BBW)
02762     A = A.zext(BBW);
02763 
02764   return APIntOps::GreatestCommonDivisor(A, B);
02765 }
02766 
02767 /// getUDivExactExpr - Get a canonical unsigned division expression, or
02768 /// something simpler if possible. There is no representation for an exact udiv
02769 /// in SCEV IR, but we can attempt to remove factors from the LHS and RHS.
02770 /// We can't do this when it's not exact because the udiv may be clearing bits.
02771 const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,
02772                                               const SCEV *RHS) {
02773   // TODO: we could try to find factors in all sorts of things, but for now we
02774   // just deal with u/exact (multiply, constant). See SCEVDivision towards the
02775   // end of this file for inspiration.
02776 
02777   const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS);
02778   if (!Mul)
02779     return getUDivExpr(LHS, RHS);
02780 
02781   if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) {
02782     // If the mulexpr multiplies by a constant, then that constant must be the
02783     // first element of the mulexpr.
02784     if (const auto *LHSCst = dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
02785       if (LHSCst == RHSCst) {
02786         SmallVector<const SCEV *, 2> Operands;
02787         Operands.append(Mul->op_begin() + 1, Mul->op_end());
02788         return getMulExpr(Operands);
02789       }
02790 
02791       // We can't just assume that LHSCst divides RHSCst cleanly, it could be
02792       // that there's a factor provided by one of the other terms. We need to
02793       // check.
02794       APInt Factor = gcd(LHSCst, RHSCst);
02795       if (!Factor.isIntN(1)) {
02796         LHSCst =
02797             cast<SCEVConstant>(getConstant(LHSCst->getAPInt().udiv(Factor)));
02798         RHSCst =
02799             cast<SCEVConstant>(getConstant(RHSCst->getAPInt().udiv(Factor)));
02800         SmallVector<const SCEV *, 2> Operands;
02801         Operands.push_back(LHSCst);
02802         Operands.append(Mul->op_begin() + 1, Mul->op_end());
02803         LHS = getMulExpr(Operands);
02804         RHS = RHSCst;
02805         Mul = dyn_cast<SCEVMulExpr>(LHS);
02806         if (!Mul)
02807           return getUDivExactExpr(LHS, RHS);
02808       }
02809     }
02810   }
02811 
02812   for (int i = 0, e = Mul->getNumOperands(); i != e; ++i) {
02813     if (Mul->getOperand(i) == RHS) {
02814       SmallVector<const SCEV *, 2> Operands;
02815       Operands.append(Mul->op_begin(), Mul->op_begin() + i);
02816       Operands.append(Mul->op_begin() + i + 1, Mul->op_end());
02817       return getMulExpr(Operands);
02818     }
02819   }
02820 
02821   return getUDivExpr(LHS, RHS);
02822 }
02823 
02824 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
02825 /// Simplify the expression as much as possible.
02826 const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step,
02827                                            const Loop *L,
02828                                            SCEV::NoWrapFlags Flags) {
02829   SmallVector<const SCEV *, 4> Operands;
02830   Operands.push_back(Start);
02831   if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
02832     if (StepChrec->getLoop() == L) {
02833       Operands.append(StepChrec->op_begin(), StepChrec->op_end());
02834       return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW));
02835     }
02836 
02837   Operands.push_back(Step);
02838   return getAddRecExpr(Operands, L, Flags);
02839 }
02840 
02841 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
02842 /// Simplify the expression as much as possible.
02843 const SCEV *
02844 ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
02845                                const Loop *L, SCEV::NoWrapFlags Flags) {
02846   if (Operands.size() == 1) return Operands[0];
02847 #ifndef NDEBUG
02848   Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
02849   for (unsigned i = 1, e = Operands.size(); i != e; ++i)
02850     assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
02851            "SCEVAddRecExpr operand types don't match!");
02852   for (unsigned i = 0, e = Operands.size(); i != e; ++i)
02853     assert(isLoopInvariant(Operands[i], L) &&
02854            "SCEVAddRecExpr operand is not loop-invariant!");
02855 #endif
02856 
02857   if (Operands.back()->isZero()) {
02858     Operands.pop_back();
02859     return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0}  -->  X
02860   }
02861 
02862   // It's tempting to want to call getMaxBackedgeTakenCount count here and
02863   // use that information to infer NUW and NSW flags. However, computing a
02864   // BE count requires calling getAddRecExpr, so we may not yet have a
02865   // meaningful BE count at this point (and if we don't, we'd be stuck
02866   // with a SCEVCouldNotCompute as the cached BE count).
02867 
02868   Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags);
02869 
02870   // Canonicalize nested AddRecs in by nesting them in order of loop depth.
02871   if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
02872     const Loop *NestedLoop = NestedAR->getLoop();
02873     if (L->contains(NestedLoop)
02874             ? (L->getLoopDepth() < NestedLoop->getLoopDepth())
02875             : (!NestedLoop->contains(L) &&
02876                DT.dominates(L->getHeader(), NestedLoop->getHeader()))) {
02877       SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
02878                                                   NestedAR->op_end());
02879       Operands[0] = NestedAR->getStart();
02880       // AddRecs require their operands be loop-invariant with respect to their
02881       // loops. Don't perform this transformation if it would break this
02882       // requirement.
02883       bool AllInvariant = all_of(
02884           Operands, [&](const SCEV *Op) { return isLoopInvariant(Op, L); });
02885 
02886       if (AllInvariant) {
02887         // Create a recurrence for the outer loop with the same step size.
02888         //
02889         // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the
02890         // inner recurrence has the same property.
02891         SCEV::NoWrapFlags OuterFlags =
02892           maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
02893 
02894         NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
02895         AllInvariant = all_of(NestedOperands, [&](const SCEV *Op) {
02896           return isLoopInvariant(Op, NestedLoop);
02897         });
02898 
02899         if (AllInvariant) {
02900           // Ok, both add recurrences are valid after the transformation.
02901           //
02902           // The inner recurrence keeps its NW flag but only keeps NUW/NSW if
02903           // the outer recurrence has the same property.
02904           SCEV::NoWrapFlags InnerFlags =
02905             maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags);
02906           return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags);
02907         }
02908       }
02909       // Reset Operands to its original state.
02910       Operands[0] = NestedAR;
02911     }
02912   }
02913 
02914   // Okay, it looks like we really DO need an addrec expr.  Check to see if we
02915   // already have one, otherwise create a new one.
02916   FoldingSetNodeID ID;
02917   ID.AddInteger(scAddRecExpr);
02918   for (unsigned i = 0, e = Operands.size(); i != e; ++i)
02919     ID.AddPointer(Operands[i]);
02920   ID.AddPointer(L);
02921   void *IP = nullptr;
02922   SCEVAddRecExpr *S =
02923     static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
02924   if (!S) {
02925     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size());
02926     std::uninitialized_copy(Operands.begin(), Operands.end(), O);
02927     S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator),
02928                                            O, Operands.size(), L);
02929     UniqueSCEVs.InsertNode(S, IP);
02930   }
02931   S->setNoWrapFlags(Flags);
02932   return S;
02933 }
02934 
02935 const SCEV *
02936 ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr,
02937                             const SmallVectorImpl<const SCEV *> &IndexExprs,
02938                             bool InBounds) {
02939   // getSCEV(Base)->getType() has the same address space as Base->getType()
02940   // because SCEV::getType() preserves the address space.
02941   Type *IntPtrTy = getEffectiveSCEVType(BaseExpr->getType());
02942   // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP
02943   // instruction to its SCEV, because the Instruction may be guarded by control
02944   // flow and the no-overflow bits may not be valid for the expression in any
02945   // context. This can be fixed similarly to how these flags are handled for
02946   // adds.
02947   SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
02948 
02949   const SCEV *TotalOffset = getZero(IntPtrTy);
02950   // The address space is unimportant. The first thing we do on CurTy is getting
02951   // its element type.
02952   Type *CurTy = PointerType::getUnqual(PointeeType);
02953   for (const SCEV *IndexExpr : IndexExprs) {
02954     // Compute the (potentially symbolic) offset in bytes for this index.
02955     if (StructType *STy = dyn_cast<StructType>(CurTy)) {
02956       // For a struct, add the member offset.
02957       ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue();
02958       unsigned FieldNo = Index->getZExtValue();
02959       const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo);
02960 
02961       // Add the field offset to the running total offset.
02962       TotalOffset = getAddExpr(TotalOffset, FieldOffset);
02963 
02964       // Update CurTy to the type of the field at Index.
02965       CurTy = STy->getTypeAtIndex(Index);
02966     } else {
02967       // Update CurTy to its element type.
02968       CurTy = cast<SequentialType>(CurTy)->getElementType();
02969       // For an array, add the element offset, explicitly scaled.
02970       const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, CurTy);
02971       // Getelementptr indices are signed.
02972       IndexExpr = getTruncateOrSignExtend(IndexExpr, IntPtrTy);
02973 
02974       // Multiply the index by the element size to compute the element offset.
02975       const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, Wrap);
02976 
02977       // Add the element offset to the running total offset.
02978       TotalOffset = getAddExpr(TotalOffset, LocalOffset);
02979     }
02980   }
02981 
02982   // Add the total offset from all the GEP indices to the base.
02983   return getAddExpr(BaseExpr, TotalOffset, Wrap);
02984 }
02985 
02986 const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
02987                                          const SCEV *RHS) {
02988   SmallVector<const SCEV *, 2> Ops;
02989   Ops.push_back(LHS);
02990   Ops.push_back(RHS);
02991   return getSMaxExpr(Ops);
02992 }
02993 
02994 const SCEV *
02995 ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
02996   assert(!Ops.empty() && "Cannot get empty smax!");
02997   if (Ops.size() == 1) return Ops[0];
02998 #ifndef NDEBUG
02999   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
03000   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
03001     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
03002            "SCEVSMaxExpr operand types don't match!");
03003 #endif
03004 
03005   // Sort by complexity, this groups all similar expression types together.
03006   GroupByComplexity(Ops, &LI);
03007 
03008   // If there are any constants, fold them together.
03009   unsigned Idx = 0;
03010   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
03011     ++Idx;
03012     assert(Idx < Ops.size());
03013     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
03014       // We found two constants, fold them together!
03015       ConstantInt *Fold = ConstantInt::get(
03016           getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt()));
03017       Ops[0] = getConstant(Fold);
03018       Ops.erase(Ops.begin()+1);  // Erase the folded element
03019       if (Ops.size() == 1) return Ops[0];
03020       LHSC = cast<SCEVConstant>(Ops[0]);
03021     }
03022 
03023     // If we are left with a constant minimum-int, strip it off.
03024     if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
03025       Ops.erase(Ops.begin());
03026       --Idx;
03027     } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
03028       // If we have an smax with a constant maximum-int, it will always be
03029       // maximum-int.
03030       return Ops[0];
03031     }
03032 
03033     if (Ops.size() == 1) return Ops[0];
03034   }
03035 
03036   // Find the first SMax
03037   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
03038     ++Idx;
03039 
03040   // Check to see if one of the operands is an SMax. If so, expand its operands
03041   // onto our operand list, and recurse to simplify.
03042   if (Idx < Ops.size()) {
03043     bool DeletedSMax = false;
03044     while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
03045       Ops.erase(Ops.begin()+Idx);
03046       Ops.append(SMax->op_begin(), SMax->op_end());
03047       DeletedSMax = true;
03048     }
03049 
03050     if (DeletedSMax)
03051       return getSMaxExpr(Ops);
03052   }
03053 
03054   // Okay, check to see if the same value occurs in the operand list twice.  If
03055   // so, delete one.  Since we sorted the list, these values are required to
03056   // be adjacent.
03057   for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
03058     //  X smax Y smax Y  -->  X smax Y
03059     //  X smax Y         -->  X, if X is always greater than Y
03060     if (Ops[i] == Ops[i+1] ||
03061         isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
03062       Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
03063       --i; --e;
03064     } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
03065       Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
03066       --i; --e;
03067     }
03068 
03069   if (Ops.size() == 1) return Ops[0];
03070 
03071   assert(!Ops.empty() && "Reduced smax down to nothing!");
03072 
03073   // Okay, it looks like we really DO need an smax expr.  Check to see if we
03074   // already have one, otherwise create a new one.
03075   FoldingSetNodeID ID;
03076   ID.AddInteger(scSMaxExpr);
03077   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
03078     ID.AddPointer(Ops[i]);
03079   void *IP = nullptr;
03080   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
03081   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
03082   std::uninitialized_copy(Ops.begin(), Ops.end(), O);
03083   SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
03084                                              O, Ops.size());
03085   UniqueSCEVs.InsertNode(S, IP);
03086   return S;
03087 }
03088 
03089 const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
03090                                          const SCEV *RHS) {
03091   SmallVector<const SCEV *, 2> Ops;
03092   Ops.push_back(LHS);
03093   Ops.push_back(RHS);
03094   return getUMaxExpr(Ops);
03095 }
03096 
03097 const SCEV *
03098 ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
03099   assert(!Ops.empty() && "Cannot get empty umax!");
03100   if (Ops.size() == 1) return Ops[0];
03101 #ifndef NDEBUG
03102   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
03103   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
03104     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
03105            "SCEVUMaxExpr operand types don't match!");
03106 #endif
03107 
03108   // Sort by complexity, this groups all similar expression types together.
03109   GroupByComplexity(Ops, &LI);
03110 
03111   // If there are any constants, fold them together.
03112   unsigned Idx = 0;
03113   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
03114     ++Idx;
03115     assert(Idx < Ops.size());
03116     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
03117       // We found two constants, fold them together!
03118       ConstantInt *Fold = ConstantInt::get(
03119           getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt()));
03120       Ops[0] = getConstant(Fold);
03121       Ops.erase(Ops.begin()+1);  // Erase the folded element
03122       if (Ops.size() == 1) return Ops[0];
03123       LHSC = cast<SCEVConstant>(Ops[0]);
03124     }
03125 
03126     // If we are left with a constant minimum-int, strip it off.
03127     if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
03128       Ops.erase(Ops.begin());
03129       --Idx;
03130     } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
03131       // If we have an umax with a constant maximum-int, it will always be
03132       // maximum-int.
03133       return Ops[0];
03134     }
03135 
03136     if (Ops.size() == 1) return Ops[0];
03137   }
03138 
03139   // Find the first UMax
03140   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
03141     ++Idx;
03142 
03143   // Check to see if one of the operands is a UMax. If so, expand its operands
03144   // onto our operand list, and recurse to simplify.
03145   if (Idx < Ops.size()) {
03146     bool DeletedUMax = false;
03147     while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
03148       Ops.erase(Ops.begin()+Idx);
03149       Ops.append(UMax->op_begin(), UMax->op_end());
03150       DeletedUMax = true;
03151     }
03152 
03153     if (DeletedUMax)
03154       return getUMaxExpr(Ops);
03155   }
03156 
03157   // Okay, check to see if the same value occurs in the operand list twice.  If
03158   // so, delete one.  Since we sorted the list, these values are required to
03159   // be adjacent.
03160   for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
03161     //  X umax Y umax Y  -->  X umax Y
03162     //  X umax Y         -->  X, if X is always greater than Y
03163     if (Ops[i] == Ops[i+1] ||
03164         isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) {
03165       Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
03166       --i; --e;
03167     } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) {
03168       Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
03169       --i; --e;
03170     }
03171 
03172   if (Ops.size() == 1) return Ops[0];
03173 
03174   assert(!Ops.empty() && "Reduced umax down to nothing!");
03175 
03176   // Okay, it looks like we really DO need a umax expr.  Check to see if we
03177   // already have one, otherwise create a new one.
03178   FoldingSetNodeID ID;
03179   ID.AddInteger(scUMaxExpr);
03180   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
03181     ID.AddPointer(Ops[i]);
03182   void *IP = nullptr;
03183   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
03184   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
03185   std::uninitialized_copy(Ops.begin(), Ops.end(), O);
03186   SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
03187                                              O, Ops.size());
03188   UniqueSCEVs.InsertNode(S, IP);
03189   return S;
03190 }
03191 
03192 const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
03193                                          const SCEV *RHS) {
03194   // ~smax(~x, ~y) == smin(x, y).
03195   return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
03196 }
03197 
03198 const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
03199                                          const SCEV *RHS) {
03200   // ~umax(~x, ~y) == umin(x, y)
03201   return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
03202 }
03203 
03204 const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
03205   // We can bypass creating a target-independent
03206   // constant expression and then folding it back into a ConstantInt.
03207   // This is just a compile-time optimization.
03208   return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy));
03209 }
03210 
03211 const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
03212                                              StructType *STy,
03213                                              unsigned FieldNo) {
03214   // We can bypass creating a target-independent
03215   // constant expression and then folding it back into a ConstantInt.
03216   // This is just a compile-time optimization.
03217   return getConstant(
03218       IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo));
03219 }
03220 
03221 const SCEV *ScalarEvolution::getUnknown(Value *V) {
03222   // Don't attempt to do anything other than create a SCEVUnknown object
03223   // here.  createSCEV only calls getUnknown after checking for all other
03224   // interesting possibilities, and any other code that calls getUnknown
03225   // is doing so in order to hide a value from SCEV canonicalization.
03226 
03227   FoldingSetNodeID ID;
03228   ID.AddInteger(scUnknown);
03229   ID.AddPointer(V);
03230   void *IP = nullptr;
03231   if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
03232     assert(cast<SCEVUnknown>(S)->getValue() == V &&
03233            "Stale SCEVUnknown in uniquing map!");
03234     return S;
03235   }
03236   SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
03237                                             FirstUnknown);
03238   FirstUnknown = cast<SCEVUnknown>(S);
03239   UniqueSCEVs.InsertNode(S, IP);
03240   return S;
03241 }
03242 
03243 //===----------------------------------------------------------------------===//
03244 //            Basic SCEV Analysis and PHI Idiom Recognition Code
03245 //
03246 
03247 /// isSCEVable - Test if values of the given type are analyzable within
03248 /// the SCEV framework. This primarily includes integer types, and it
03249 /// can optionally include pointer types if the ScalarEvolution class
03250 /// has access to target-specific information.
03251 bool ScalarEvolution::isSCEVable(Type *Ty) const {
03252   // Integers and pointers are always SCEVable.
03253   return Ty->isIntegerTy() || Ty->isPointerTy();
03254 }
03255 
03256 /// getTypeSizeInBits - Return the size in bits of the specified type,
03257 /// for which isSCEVable must return true.
03258 uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
03259   assert(isSCEVable(Ty) && "Type is not SCEVable!");
03260   return getDataLayout().getTypeSizeInBits(Ty);
03261 }
03262 
03263 /// getEffectiveSCEVType - Return a type with the same bitwidth as
03264 /// the given type and which represents how SCEV will treat the given
03265 /// type, for which isSCEVable must return true. For pointer types,
03266 /// this is the pointer-sized integer type.
03267 Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
03268   assert(isSCEVable(Ty) && "Type is not SCEVable!");
03269 
03270   if (Ty->isIntegerTy())
03271     return Ty;
03272 
03273   // The only other support type is pointer.
03274   assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
03275   return getDataLayout().getIntPtrType(Ty);
03276 }
03277 
03278 const SCEV *ScalarEvolution::getCouldNotCompute() {
03279   return CouldNotCompute.get();
03280 }
03281 
03282 
03283 bool ScalarEvolution::checkValidity(const SCEV *S) const {
03284   // Helper class working with SCEVTraversal to figure out if a SCEV contains
03285   // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne
03286   // is set iff if find such SCEVUnknown.
03287   //
03288   struct FindInvalidSCEVUnknown {
03289     bool FindOne;
03290     FindInvalidSCEVUnknown() { FindOne = false; }
03291     bool follow(const SCEV *S) {
03292       switch (static_cast<SCEVTypes>(S->getSCEVType())) {
03293       case scConstant:
03294         return false;
03295       case scUnknown:
03296         if (!cast<SCEVUnknown>(S)->getValue())
03297           FindOne = true;
03298         return false;
03299       default:
03300         return true;
03301       }
03302     }
03303     bool isDone() const { return FindOne; }
03304   };
03305 
03306   FindInvalidSCEVUnknown F;
03307   SCEVTraversal<FindInvalidSCEVUnknown> ST(F);
03308   ST.visitAll(S);
03309 
03310   return !F.FindOne;
03311 }
03312 
03313 /// getSCEV - Return an existing SCEV if it exists, otherwise analyze the
03314 /// expression and create a new one.
03315 const SCEV *ScalarEvolution::getSCEV(Value *V) {
03316   assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
03317 
03318   const SCEV *S = getExistingSCEV(V);
03319   if (S == nullptr) {
03320     S = createSCEV(V);
03321     ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
03322   }
03323   return S;
03324 }
03325 
03326 const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
03327   assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
03328 
03329   ValueExprMapType::iterator I = ValueExprMap.find_as(V);
03330   if (I != ValueExprMap.end()) {
03331     const SCEV *S = I->second;
03332     if (checkValidity(S))
03333       return S;
03334     ValueExprMap.erase(I);
03335   }
03336   return nullptr;
03337 }
03338 
03339 /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
03340 ///
03341 const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
03342                                              SCEV::NoWrapFlags Flags) {
03343   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
03344     return getConstant(
03345                cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
03346 
03347   Type *Ty = V->getType();
03348   Ty = getEffectiveSCEVType(Ty);
03349   return getMulExpr(
03350       V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags);
03351 }
03352 
03353 /// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
03354 const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
03355   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
03356     return getConstant(
03357                 cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
03358 
03359   Type *Ty = V->getType();
03360   Ty = getEffectiveSCEVType(Ty);
03361   const SCEV *AllOnes =
03362                    getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)));
03363   return getMinusSCEV(AllOnes, V);
03364 }
03365 
03366 /// getMinusSCEV - Return LHS-RHS.  Minus is represented in SCEV as A+B*-1.
03367 const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
03368                                           SCEV::NoWrapFlags Flags) {
03369   // Fast path: X - X --> 0.
03370   if (LHS == RHS)
03371     return getZero(LHS->getType());
03372 
03373   // We represent LHS - RHS as LHS + (-1)*RHS. This transformation
03374   // makes it so that we cannot make much use of NUW.
03375   auto AddFlags = SCEV::FlagAnyWrap;
03376   const bool RHSIsNotMinSigned =
03377       !getSignedRange(RHS).getSignedMin().isMinSignedValue();
03378   if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) {
03379     // Let M be the minimum representable signed value. Then (-1)*RHS
03380     // signed-wraps if and only if RHS is M. That can happen even for
03381     // a NSW subtraction because e.g. (-1)*M signed-wraps even though
03382     // -1 - M does not. So to transfer NSW from LHS - RHS to LHS +
03383     // (-1)*RHS, we need to prove that RHS != M.
03384     //
03385     // If LHS is non-negative and we know that LHS - RHS does not
03386     // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap
03387     // either by proving that RHS > M or that LHS >= 0.
03388     if (RHSIsNotMinSigned || isKnownNonNegative(LHS)) {
03389       AddFlags = SCEV::FlagNSW;
03390     }
03391   }
03392 
03393   // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS -
03394   // RHS is NSW and LHS >= 0.
03395   //
03396   // The difficulty here is that the NSW flag may have been proven
03397   // relative to a loop that is to be found in a recurrence in LHS and
03398   // not in RHS. Applying NSW to (-1)*M may then let the NSW have a
03399   // larger scope than intended.
03400   auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
03401 
03402   return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags);
03403 }
03404 
03405 /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
03406 /// input value to the specified type.  If the type must be extended, it is zero
03407 /// extended.
03408 const SCEV *
03409 ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) {
03410   Type *SrcTy = V->getType();
03411   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
03412          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
03413          "Cannot truncate or zero extend with non-integer arguments!");
03414   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
03415     return V;  // No conversion
03416   if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
03417     return getTruncateExpr(V, Ty);
03418   return getZeroExtendExpr(V, Ty);
03419 }
03420 
03421 /// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the
03422 /// input value to the specified type.  If the type must be extended, it is sign
03423 /// extended.
03424 const SCEV *
03425 ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
03426                                          Type *Ty) {
03427   Type *SrcTy = V->getType();
03428   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
03429          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
03430          "Cannot truncate or zero extend with non-integer arguments!");
03431   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
03432     return V;  // No conversion
03433   if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
03434     return getTruncateExpr(V, Ty);
03435   return getSignExtendExpr(V, Ty);
03436 }
03437 
03438 /// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the
03439 /// input value to the specified type.  If the type must be extended, it is zero
03440 /// extended.  The conversion must not be narrowing.
03441 const SCEV *
03442 ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) {
03443   Type *SrcTy = V->getType();
03444   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
03445          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
03446          "Cannot noop or zero extend with non-integer arguments!");
03447   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
03448          "getNoopOrZeroExtend cannot truncate!");
03449   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
03450     return V;  // No conversion
03451   return getZeroExtendExpr(V, Ty);
03452 }
03453 
03454 /// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the
03455 /// input value to the specified type.  If the type must be extended, it is sign
03456 /// extended.  The conversion must not be narrowing.
03457 const SCEV *
03458 ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) {
03459   Type *SrcTy = V->getType();
03460   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
03461          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
03462          "Cannot noop or sign extend with non-integer arguments!");
03463   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
03464          "getNoopOrSignExtend cannot truncate!");
03465   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
03466     return V;  // No conversion
03467   return getSignExtendExpr(V, Ty);
03468 }
03469 
03470 /// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of
03471 /// the input value to the specified type. If the type must be extended,
03472 /// it is extended with unspecified bits. The conversion must not be
03473 /// narrowing.
03474 const SCEV *
03475 ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) {
03476   Type *SrcTy = V->getType();
03477   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
03478          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
03479          "Cannot noop or any extend with non-integer arguments!");
03480   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
03481          "getNoopOrAnyExtend cannot truncate!");
03482   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
03483     return V;  // No conversion
03484   return getAnyExtendExpr(V, Ty);
03485 }
03486 
03487 /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
03488 /// input value to the specified type.  The conversion must not be widening.
03489 const SCEV *
03490 ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) {
03491   Type *SrcTy = V->getType();
03492   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
03493          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
03494          "Cannot truncate or noop with non-integer arguments!");
03495   assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
03496          "getTruncateOrNoop cannot extend!");
03497   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
03498     return V;  // No conversion
03499   return getTruncateExpr(V, Ty);
03500 }
03501 
03502 /// getUMaxFromMismatchedTypes - Promote the operands to the wider of
03503 /// the types using zero-extension, and then perform a umax operation
03504 /// with them.
03505 const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
03506                                                         const SCEV *RHS) {
03507   const SCEV *PromotedLHS = LHS;
03508   const SCEV *PromotedRHS = RHS;
03509 
03510   if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
03511     PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
03512   else
03513     PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
03514 
03515   return getUMaxExpr(PromotedLHS, PromotedRHS);
03516 }
03517 
03518 /// getUMinFromMismatchedTypes - Promote the operands to the wider of
03519 /// the types using zero-extension, and then perform a umin operation
03520 /// with them.
03521 const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
03522                                                         const SCEV *RHS) {
03523   const SCEV *PromotedLHS = LHS;
03524   const SCEV *PromotedRHS = RHS;
03525 
03526   if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
03527     PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
03528   else
03529     PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
03530 
03531   return getUMinExpr(PromotedLHS, PromotedRHS);
03532 }
03533 
03534 /// getPointerBase - Transitively follow the chain of pointer-type operands
03535 /// until reaching a SCEV that does not have a single pointer operand. This
03536 /// returns a SCEVUnknown pointer for well-formed pointer-type expressions,
03537 /// but corner cases do exist.
03538 const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
03539   // A pointer operand may evaluate to a nonpointer expression, such as null.
03540   if (!V->getType()->isPointerTy())
03541     return V;
03542 
03543   if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {
03544     return getPointerBase(Cast->getOperand());
03545   } else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
03546     const SCEV *PtrOp = nullptr;
03547     for (const SCEV *NAryOp : NAry->operands()) {
03548       if (NAryOp->getType()->isPointerTy()) {
03549         // Cannot find the base of an expression with multiple pointer operands.
03550         if (PtrOp)
03551           return V;
03552         PtrOp = NAryOp;
03553       }
03554     }
03555     if (!PtrOp)
03556       return V;
03557     return getPointerBase(PtrOp);
03558   }
03559   return V;
03560 }
03561 
03562 /// PushDefUseChildren - Push users of the given Instruction
03563 /// onto the given Worklist.
03564 static void
03565 PushDefUseChildren(Instruction *I,
03566                    SmallVectorImpl<Instruction *> &Worklist) {
03567   // Push the def-use children onto the Worklist stack.
03568   for (User *U : I->users())
03569     Worklist.push_back(cast<Instruction>(U));
03570 }
03571 
03572 /// ForgetSymbolicValue - This looks up computed SCEV values for all
03573 /// instructions that depend on the given instruction and removes them from
03574 /// the ValueExprMapType map if they reference SymName. This is used during PHI
03575 /// resolution.
03576 void
03577 ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
03578   SmallVector<Instruction *, 16> Worklist;
03579   PushDefUseChildren(PN, Worklist);
03580 
03581   SmallPtrSet<Instruction *, 8> Visited;
03582   Visited.insert(PN);
03583   while (!Worklist.empty()) {
03584     Instruction *I = Worklist.pop_back_val();
03585     if (!Visited.insert(I).second)
03586       continue;
03587 
03588     auto It = ValueExprMap.find_as(static_cast<Value *>(I));
03589     if (It != ValueExprMap.end()) {
03590       const SCEV *Old = It->second;
03591 
03592       // Short-circuit the def-use traversal if the symbolic name
03593       // ceases to appear in expressions.
03594       if (Old != SymName && !hasOperand(Old, SymName))
03595         continue;
03596 
03597       // SCEVUnknown for a PHI either means that it has an unrecognized
03598       // structure, it's a PHI that's in the progress of being computed
03599       // by createNodeForPHI, or it's a single-value PHI. In the first case,
03600       // additional loop trip count information isn't going to change anything.
03601       // In the second case, createNodeForPHI will perform the necessary
03602       // updates on its own when it gets to that point. In the third, we do
03603       // want to forget the SCEVUnknown.
03604       if (!isa<PHINode>(I) ||
03605           !isa<SCEVUnknown>(Old) ||
03606           (I != PN && Old == SymName)) {
03607         forgetMemoizedResults(Old);
03608         ValueExprMap.erase(It);
03609       }
03610     }
03611 
03612     PushDefUseChildren(I, Worklist);
03613   }
03614 }
03615 
03616 namespace {
03617 class SCEVInitRewriter : public SCEVRewriteVisitor<SCEVInitRewriter> {
03618 public:
03619   static const SCEV *rewrite(const SCEV *Scev, const Loop *L,
03620                              ScalarEvolution &SE) {
03621     SCEVInitRewriter Rewriter(L, SE);
03622     const SCEV *Result = Rewriter.visit(Scev);
03623     return Rewriter.isValid() ? Result : SE.getCouldNotCompute();
03624   }
03625 
03626   SCEVInitRewriter(const Loop *L, ScalarEvolution &SE)
03627       : SCEVRewriteVisitor(SE), L(L), Valid(true) {}
03628 
03629   const SCEV *visitUnknown(const SCEVUnknown *Expr) {
03630     if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant))
03631       Valid = false;
03632     return Expr;
03633   }
03634 
03635   const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
03636     // Only allow AddRecExprs for this loop.
03637     if (Expr->getLoop() == L)
03638       return Expr->getStart();
03639     Valid = false;
03640     return Expr;
03641   }
03642 
03643   bool isValid() { return Valid; }
03644 
03645 private:
03646   const Loop *L;
03647   bool Valid;
03648 };
03649 
03650 class SCEVShiftRewriter : public SCEVRewriteVisitor<SCEVShiftRewriter> {
03651 public:
03652   static const SCEV *rewrite(const SCEV *Scev, const Loop *L,
03653                              ScalarEvolution &SE) {
03654     SCEVShiftRewriter Rewriter(L, SE);
03655     const SCEV *Result = Rewriter.visit(Scev);
03656     return Rewriter.isValid() ? Result : SE.getCouldNotCompute();
03657   }
03658 
03659   SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE)
03660       : SCEVRewriteVisitor(SE), L(L), Valid(true) {}
03661 
03662   const SCEV *visitUnknown(const SCEVUnknown *Expr) {
03663     // Only allow AddRecExprs for this loop.
03664     if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant))
03665       Valid = false;
03666     return Expr;
03667   }
03668 
03669   const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
03670     if (Expr->getLoop() == L && Expr->isAffine())
03671       return SE.getMinusSCEV(Expr, Expr->getStepRecurrence(SE));
03672     Valid = false;
03673     return Expr;
03674   }
03675   bool isValid() { return Valid; }
03676 
03677 private:
03678   const Loop *L;
03679   bool Valid;
03680 };
03681 } // end anonymous namespace
03682 
03683 const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
03684   const Loop *L = LI.getLoopFor(PN->getParent());
03685   if (!L || L->getHeader() != PN->getParent())
03686     return nullptr;
03687 
03688   // The loop may have multiple entrances or multiple exits; we can analyze
03689   // this phi as an addrec if it has a unique entry value and a unique
03690   // backedge value.
03691   Value *BEValueV = nullptr, *StartValueV = nullptr;
03692   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
03693     Value *V = PN->getIncomingValue(i);
03694     if (L->contains(PN->getIncomingBlock(i))) {
03695       if (!BEValueV) {
03696         BEValueV = V;
03697       } else if (BEValueV != V) {
03698         BEValueV = nullptr;
03699         break;
03700       }
03701     } else if (!StartValueV) {
03702       StartValueV = V;
03703     } else if (StartValueV != V) {
03704       StartValueV = nullptr;
03705       break;
03706     }
03707   }
03708   if (BEValueV && StartValueV) {
03709     // While we are analyzing this PHI node, handle its value symbolically.
03710     const SCEV *SymbolicName = getUnknown(PN);
03711     assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
03712            "PHI node already processed?");
03713     ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
03714 
03715     // Using this symbolic name for the PHI, analyze the value coming around
03716     // the back-edge.
03717     const SCEV *BEValue = getSCEV(BEValueV);
03718 
03719     // NOTE: If BEValue is loop invariant, we know that the PHI node just
03720     // has a special value for the first iteration of the loop.
03721 
03722     // If the value coming around the backedge is an add with the symbolic
03723     // value we just inserted, then we found a simple induction variable!
03724     if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
03725       // If there is a single occurrence of the symbolic value, replace it
03726       // with a recurrence.
03727       unsigned FoundIndex = Add->getNumOperands();
03728       for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
03729         if (Add->getOperand(i) == SymbolicName)
03730           if (FoundIndex == e) {
03731             FoundIndex = i;
03732             break;
03733           }
03734 
03735       if (FoundIndex != Add->getNumOperands()) {
03736         // Create an add with everything but the specified operand.
03737         SmallVector<const SCEV *, 8> Ops;
03738         for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
03739           if (i != FoundIndex)
03740             Ops.push_back(Add->getOperand(i));
03741         const SCEV *Accum = getAddExpr(Ops);
03742 
03743         // This is not a valid addrec if the step amount is varying each
03744         // loop iteration, but is not itself an addrec in this loop.
03745         if (isLoopInvariant(Accum, L) ||
03746             (isa<SCEVAddRecExpr>(Accum) &&
03747              cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
03748           SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
03749 
03750           // If the increment doesn't overflow, then neither the addrec nor
03751           // the post-increment will overflow.
03752           if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
03753             if (OBO->getOperand(0) == PN) {
03754               if (OBO->hasNoUnsignedWrap())
03755                 Flags = setFlags(Flags, SCEV::FlagNUW);
03756               if (OBO->hasNoSignedWrap())
03757                 Flags = setFlags(Flags, SCEV::FlagNSW);
03758             }
03759           } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
03760             // If the increment is an inbounds GEP, then we know the address
03761             // space cannot be wrapped around. We cannot make any guarantee
03762             // about signed or unsigned overflow because pointers are
03763             // unsigned but we may have a negative index from the base
03764             // pointer. We can guarantee that no unsigned wrap occurs if the
03765             // indices form a positive value.
03766             if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
03767               Flags = setFlags(Flags, SCEV::FlagNW);
03768 
03769               const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
03770               if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
03771                 Flags = setFlags(Flags, SCEV::FlagNUW);
03772             }
03773 
03774             // We cannot transfer nuw and nsw flags from subtraction
03775             // operations -- sub nuw X, Y is not the same as add nuw X, -Y
03776             // for instance.
03777           }
03778 
03779           const SCEV *StartVal = getSCEV(StartValueV);
03780           const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
03781 
03782           // Since the no-wrap flags are on the increment, they apply to the
03783           // post-incremented value as well.
03784           if (isLoopInvariant(Accum, L))
03785             (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
03786 
03787           // Okay, for the entire analysis of this edge we assumed the PHI
03788           // to be symbolic.  We now need to go back and purge all of the
03789           // entries for the scalars that use the symbolic expression.
03790           ForgetSymbolicName(PN, SymbolicName);
03791           ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
03792           return PHISCEV;
03793         }
03794       }
03795     } else {
03796       // Otherwise, this could be a loop like this:
03797       //     i = 0;  for (j = 1; ..; ++j) { ....  i = j; }
03798       // In this case, j = {1,+,1}  and BEValue is j.
03799       // Because the other in-value of i (0) fits the evolution of BEValue
03800       // i really is an addrec evolution.
03801       //
03802       // We can generalize this saying that i is the shifted value of BEValue
03803       // by one iteration:
03804       //   PHI(f(0), f({1,+,1})) --> f({0,+,1})
03805       const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this);
03806       const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this);
03807       if (Shifted != getCouldNotCompute() &&
03808           Start != getCouldNotCompute()) {
03809         const SCEV *StartVal = getSCEV(StartValueV);
03810         if (Start == StartVal) {
03811           // Okay, for the entire analysis of this edge we assumed the PHI
03812           // to be symbolic.  We now need to go back and purge all of the
03813           // entries for the scalars that use the symbolic expression.
03814           ForgetSymbolicName(PN, SymbolicName);
03815           ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted;
03816           return Shifted;
03817         }
03818       }
03819     }
03820   }
03821 
03822   return nullptr;
03823 }
03824 
03825 // Checks if the SCEV S is available at BB.  S is considered available at BB
03826 // if S can be materialized at BB without introducing a fault.
03827 static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S,
03828                                BasicBlock *BB) {
03829   struct CheckAvailable {
03830     bool TraversalDone = false;
03831     bool Available = true;
03832 
03833     const Loop *L = nullptr;  // The loop BB is in (can be nullptr)
03834     BasicBlock *BB = nullptr;
03835     DominatorTree &DT;
03836 
03837     CheckAvailable(const Loop *L, BasicBlock *BB, DominatorTree &DT)
03838       : L(L), BB(BB), DT(DT) {}
03839 
03840     bool setUnavailable() {
03841       TraversalDone = true;
03842       Available = false;
03843       return false;
03844     }
03845 
03846     bool follow(const SCEV *S) {
03847       switch (S->getSCEVType()) {
03848       case scConstant: case scTruncate: case scZeroExtend: case scSignExtend:
03849       case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr:
03850         // These expressions are available if their operand(s) is/are.
03851         return true;
03852 
03853       case scAddRecExpr: {
03854         // We allow add recurrences that are on the loop BB is in, or some
03855         // outer loop.  This guarantees availability because the value of the
03856         // add recurrence at BB is simply the "current" value of the induction
03857         // variable.  We can relax this in the future; for instance an add
03858         // recurrence on a sibling dominating loop is also available at BB.
03859         const auto *ARLoop = cast<SCEVAddRecExpr>(S)->getLoop();
03860         if (L && (ARLoop == L || ARLoop->contains(L)))
03861           return true;
03862 
03863         return setUnavailable();
03864       }
03865 
03866       case scUnknown: {
03867         // For SCEVUnknown, we check for simple dominance.
03868         const auto *SU = cast<SCEVUnknown>(S);
03869         Value *V = SU->getValue();
03870 
03871         if (isa<Argument>(V))
03872           return false;
03873 
03874         if (isa<Instruction>(V) && DT.dominates(cast<Instruction>(V), BB))
03875           return false;
03876 
03877         return setUnavailable();
03878       }
03879 
03880       case scUDivExpr:
03881       case scCouldNotCompute:
03882         // We do not try to smart about these at all.
03883         return setUnavailable();
03884       }
03885       llvm_unreachable("switch should be fully covered!");
03886     }
03887 
03888     bool isDone() { return TraversalDone; }
03889   };
03890 
03891   CheckAvailable CA(L, BB, DT);
03892   SCEVTraversal<CheckAvailable> ST(CA);
03893 
03894   ST.visitAll(S);
03895   return CA.Available;
03896 }
03897 
03898 // Try to match a control flow sequence that branches out at BI and merges back
03899 // at Merge into a "C ? LHS : RHS" select pattern.  Return true on a successful
03900 // match.
03901 static bool BrPHIToSelect(DominatorTree &DT, BranchInst *BI, PHINode *Merge,
03902                           Value *&C, Value *&LHS, Value *&RHS) {
03903   C = BI->getCondition();
03904 
03905   BasicBlockEdge LeftEdge(BI->getParent(), BI->getSuccessor(0));
03906   BasicBlockEdge RightEdge(BI->getParent(), BI->getSuccessor(1));
03907 
03908   if (!LeftEdge.isSingleEdge())
03909     return false;
03910 
03911   assert(RightEdge.isSingleEdge() && "Follows from LeftEdge.isSingleEdge()");
03912 
03913   Use &LeftUse = Merge->getOperandUse(0);
03914   Use &RightUse = Merge->getOperandUse(1);
03915 
03916   if (DT.dominates(LeftEdge, LeftUse) && DT.dominates(RightEdge, RightUse)) {
03917     LHS = LeftUse;
03918     RHS = RightUse;
03919     return true;
03920   }
03921 
03922   if (DT.dominates(LeftEdge, RightUse) && DT.dominates(RightEdge, LeftUse)) {
03923     LHS = RightUse;
03924     RHS = LeftUse;
03925     return true;
03926   }
03927 
03928   return false;
03929 }
03930 
03931 const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) {
03932   if (PN->getNumIncomingValues() == 2) {
03933     const Loop *L = LI.getLoopFor(PN->getParent());
03934 
03935     // We don't want to break LCSSA, even in a SCEV expression tree.
03936     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
03937       if (LI.getLoopFor(PN->getIncomingBlock(i)) != L)
03938         return nullptr;
03939 
03940     // Try to match
03941     //
03942     //  br %cond, label %left, label %right
03943     // left:
03944     //  br label %merge
03945     // right:
03946     //  br label %merge
03947     // merge:
03948     //  V = phi [ %x, %left ], [ %y, %right ]
03949     //
03950     // as "select %cond, %x, %y"
03951 
03952     BasicBlock *IDom = DT[PN->getParent()]->getIDom()->getBlock();
03953     assert(IDom && "At least the entry block should dominate PN");
03954 
03955     auto *BI = dyn_cast<BranchInst>(IDom->getTerminator());
03956     Value *Cond = nullptr, *LHS = nullptr, *RHS = nullptr;
03957 
03958     if (BI && BI->isConditional() &&
03959         BrPHIToSelect(DT, BI, PN, Cond, LHS, RHS) &&
03960         IsAvailableOnEntry(L, DT, getSCEV(LHS), PN->getParent()) &&
03961         IsAvailableOnEntry(L, DT, getSCEV(RHS), PN->getParent()))
03962       return createNodeForSelectOrPHI(PN, Cond, LHS, RHS);
03963   }
03964 
03965   return nullptr;
03966 }
03967 
03968 const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
03969   if (const SCEV *S = createAddRecFromPHI(PN))
03970     return S;
03971 
03972   if (const SCEV *S = createNodeFromSelectLikePHI(PN))
03973     return S;
03974 
03975   // If the PHI has a single incoming value, follow that value, unless the
03976   // PHI's incoming blocks are in a different loop, in which case doing so
03977   // risks breaking LCSSA form. Instcombine would normally zap these, but
03978   // it doesn't have DominatorTree information, so it may miss cases.
03979   if (Value *V = SimplifyInstruction(PN, getDataLayout(), &TLI, &DT, &AC))
03980     if (LI.replacementPreservesLCSSAForm(PN, V))
03981       return getSCEV(V);
03982 
03983   // If it's not a loop phi, we can't handle it yet.
03984   return getUnknown(PN);
03985 }
03986 
03987 const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I,
03988                                                       Value *Cond,
03989                                                       Value *TrueVal,
03990                                                       Value *FalseVal) {
03991   // Handle "constant" branch or select. This can occur for instance when a
03992   // loop pass transforms an inner loop and moves on to process the outer loop.
03993   if (auto *CI = dyn_cast<ConstantInt>(Cond))
03994     return getSCEV(CI->isOne() ? TrueVal : FalseVal);
03995 
03996   // Try to match some simple smax or umax patterns.
03997   auto *ICI = dyn_cast<ICmpInst>(Cond);
03998   if (!ICI)
03999     return getUnknown(I);
04000 
04001   Value *LHS = ICI->getOperand(0);
04002   Value *RHS = ICI->getOperand(1);
04003 
04004   switch (ICI->getPredicate()) {
04005   case ICmpInst::ICMP_SLT:
04006   case ICmpInst::ICMP_SLE:
04007     std::swap(LHS, RHS);
04008   // fall through
04009   case ICmpInst::ICMP_SGT:
04010   case ICmpInst::ICMP_SGE:
04011     // a >s b ? a+x : b+x  ->  smax(a, b)+x
04012     // a >s b ? b+x : a+x  ->  smin(a, b)+x
04013     if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) {
04014       const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), I->getType());
04015       const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), I->getType());
04016       const SCEV *LA = getSCEV(TrueVal);
04017       const SCEV *RA = getSCEV(FalseVal);
04018       const SCEV *LDiff = getMinusSCEV(LA, LS);
04019       const SCEV *RDiff = getMinusSCEV(RA, RS);
04020       if (LDiff == RDiff)
04021         return getAddExpr(getSMaxExpr(LS, RS), LDiff);
04022       LDiff = getMinusSCEV(LA, RS);
04023       RDiff = getMinusSCEV(RA, LS);
04024       if (LDiff == RDiff)
04025         return getAddExpr(getSMinExpr(LS, RS), LDiff);
04026     }
04027     break;
04028   case ICmpInst::ICMP_ULT:
04029   case ICmpInst::ICMP_ULE:
04030     std::swap(LHS, RHS);
04031   // fall through
04032   case ICmpInst::ICMP_UGT:
04033   case ICmpInst::ICMP_UGE:
04034     // a >u b ? a+x : b+x  ->  umax(a, b)+x
04035     // a >u b ? b+x : a+x  ->  umin(a, b)+x
04036     if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) {
04037       const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
04038       const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), I->getType());
04039       const SCEV *LA = getSCEV(TrueVal);
04040       const SCEV *RA = getSCEV(FalseVal);
04041       const SCEV *LDiff = getMinusSCEV(LA, LS);
04042       const SCEV *RDiff = getMinusSCEV(RA, RS);
04043       if (LDiff == RDiff)
04044         return getAddExpr(getUMaxExpr(LS, RS), LDiff);
04045       LDiff = getMinusSCEV(LA, RS);
04046       RDiff = getMinusSCEV(RA, LS);
04047       if (LDiff == RDiff)
04048         return getAddExpr(getUMinExpr(LS, RS), LDiff);
04049     }
04050     break;
04051   case ICmpInst::ICMP_NE:
04052     // n != 0 ? n+x : 1+x  ->  umax(n, 1)+x
04053     if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
04054         isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
04055       const SCEV *One = getOne(I->getType());
04056       const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
04057       const SCEV *LA = getSCEV(TrueVal);
04058       const SCEV *RA = getSCEV(FalseVal);
04059       const SCEV *LDiff = getMinusSCEV(LA, LS);
04060       const SCEV *RDiff = getMinusSCEV(RA, One);
04061       if (LDiff == RDiff)
04062         return getAddExpr(getUMaxExpr(One, LS), LDiff);
04063     }
04064     break;
04065   case ICmpInst::ICMP_EQ:
04066     // n == 0 ? 1+x : n+x  ->  umax(n, 1)+x
04067     if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
04068         isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
04069       const SCEV *One = getOne(I->getType());
04070       const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
04071       const SCEV *LA = getSCEV(TrueVal);
04072       const SCEV *RA = getSCEV(FalseVal);
04073       const SCEV *LDiff = getMinusSCEV(LA, One);
04074       const SCEV *RDiff = getMinusSCEV(RA, LS);
04075       if (LDiff == RDiff)
04076         return getAddExpr(getUMaxExpr(One, LS), LDiff);
04077     }
04078     break;
04079   default:
04080     break;
04081   }
04082 
04083   return getUnknown(I);
04084 }
04085 
04086 /// createNodeForGEP - Expand GEP instructions into add and multiply
04087 /// operations. This allows them to be analyzed by regular SCEV code.
04088 ///
04089 const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
04090   // Don't attempt to analyze GEPs over unsized objects.
04091   if (!GEP->getSourceElementType()->isSized())
04092     return getUnknown(GEP);
04093 
04094   SmallVector<const SCEV *, 4> IndexExprs;
04095   for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index)
04096     IndexExprs.push_back(getSCEV(*Index));
04097   return getGEPExpr(GEP->getSourceElementType(),
04098                     getSCEV(GEP->getPointerOperand()),
04099                     IndexExprs, GEP->isInBounds());
04100 }
04101 
04102 /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
04103 /// guaranteed to end in (at every loop iteration).  It is, at the same time,
04104 /// the minimum number of times S is divisible by 2.  For example, given {4,+,8}
04105 /// it returns 2.  If S is guaranteed to be 0, it returns the bitwidth of S.
04106 uint32_t
04107 ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
04108   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
04109     return C->getAPInt().countTrailingZeros();
04110 
04111   if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
04112     return std::min(GetMinTrailingZeros(T->getOperand()),
04113                     (uint32_t)getTypeSizeInBits(T->getType()));
04114 
04115   if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
04116     uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
04117     return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
04118              getTypeSizeInBits(E->getType()) : OpRes;
04119   }
04120 
04121   if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
04122     uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
04123     return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
04124              getTypeSizeInBits(E->getType()) : OpRes;
04125   }
04126 
04127   if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
04128     // The result is the min of all operands results.
04129     uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
04130     for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
04131       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
04132     return MinOpRes;
04133   }
04134 
04135   if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
04136     // The result is the sum of all operands results.
04137     uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0));
04138     uint32_t BitWidth = getTypeSizeInBits(M->getType());
04139     for (unsigned i = 1, e = M->getNumOperands();
04140          SumOpRes != BitWidth && i != e; ++i)
04141       SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)),
04142                           BitWidth);
04143     return SumOpRes;
04144   }
04145 
04146   if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
04147     // The result is the min of all operands results.
04148     uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
04149     for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
04150       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
04151     return MinOpRes;
04152   }
04153 
04154   if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) {
04155     // The result is the min of all operands results.
04156     uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
04157     for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
04158       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
04159     return MinOpRes;
04160   }
04161 
04162   if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) {
04163     // The result is the min of all operands results.
04164     uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
04165     for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
04166       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
04167     return MinOpRes;
04168   }
04169 
04170   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
04171     // For a SCEVUnknown, ask ValueTracking.
04172     unsigned BitWidth = getTypeSizeInBits(U->getType());
04173     APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
04174     computeKnownBits(U->getValue(), Zeros, Ones, getDataLayout(), 0, &AC,
04175                      nullptr, &DT);
04176     return Zeros.countTrailingOnes();
04177   }
04178 
04179   // SCEVUDivExpr
04180   return 0;
04181 }
04182 
04183 /// GetRangeFromMetadata - Helper method to assign a range to V from
04184 /// metadata present in the IR.
04185 static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
04186   if (Instruction *I = dyn_cast<Instruction>(V))
04187     if (MDNode *MD = I->getMetadata(LLVMContext::MD_range))
04188       return getConstantRangeFromMetadata(*MD);
04189 
04190   return None;
04191 }
04192 
04193 /// getRange - Determine the range for a particular SCEV.  If SignHint is
04194 /// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges
04195 /// with a "cleaner" unsigned (resp. signed) representation.
04196 ///
04197 ConstantRange
04198 ScalarEvolution::getRange(const SCEV *S,
04199                           ScalarEvolution::RangeSignHint SignHint) {
04200   DenseMap<const SCEV *, ConstantRange> &Cache =
04201       SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges
04202                                                        : SignedRanges;
04203 
04204   // See if we've computed this range already.
04205   DenseMap<const SCEV *, ConstantRange>::iterator I = Cache.find(S);
04206   if (I != Cache.end())
04207     return I->second;
04208 
04209   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
04210     return setRange(C, SignHint, ConstantRange(C->getAPInt()));
04211 
04212   unsigned BitWidth = getTypeSizeInBits(S->getType());
04213   ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
04214 
04215   // If the value has known zeros, the maximum value will have those known zeros
04216   // as well.
04217   uint32_t TZ = GetMinTrailingZeros(S);
04218   if (TZ != 0) {
04219     if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED)
04220       ConservativeResult =
04221           ConstantRange(APInt::getMinValue(BitWidth),
04222                         APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
04223     else
04224       ConservativeResult = ConstantRange(
04225           APInt::getSignedMinValue(BitWidth),
04226           APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
04227   }
04228 
04229   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
04230     ConstantRange X = getRange(Add->getOperand(0), SignHint);
04231     for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
04232       X = X.add(getRange(Add->getOperand(i), SignHint));
04233     return setRange(Add, SignHint, ConservativeResult.intersectWith(X));
04234   }
04235 
04236   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
04237     ConstantRange X = getRange(Mul->getOperand(0), SignHint);
04238     for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
04239       X = X.multiply(getRange(Mul->getOperand(i), SignHint));
04240     return setRange(Mul, SignHint, ConservativeResult.intersectWith(X));
04241   }
04242 
04243   if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
04244     ConstantRange X = getRange(SMax->getOperand(0), SignHint);
04245     for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
04246       X = X.smax(getRange(SMax->getOperand(i), SignHint));
04247     return setRange(SMax, SignHint, ConservativeResult.intersectWith(X));
04248   }
04249 
04250   if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
04251     ConstantRange X = getRange(UMax->getOperand(0), SignHint);
04252     for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
04253       X = X.umax(getRange(UMax->getOperand(i), SignHint));
04254     return setRange(UMax, SignHint, ConservativeResult.intersectWith(X));
04255   }
04256 
04257   if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
04258     ConstantRange X = getRange(UDiv->getLHS(), SignHint);
04259     ConstantRange Y = getRange(UDiv->getRHS(), SignHint);
04260     return setRange(UDiv, SignHint,
04261                     ConservativeResult.intersectWith(X.udiv(Y)));
04262   }
04263 
04264   if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
04265     ConstantRange X = getRange(ZExt->getOperand(), SignHint);
04266     return setRange(ZExt, SignHint,
04267                     ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
04268   }
04269 
04270   if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
04271     ConstantRange X = getRange(SExt->getOperand(), SignHint);
04272     return setRange(SExt, SignHint,
04273                     ConservativeResult.intersectWith(X.signExtend(BitWidth)));
04274   }
04275 
04276   if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
04277     ConstantRange X = getRange(Trunc->getOperand(), SignHint);
04278     return setRange(Trunc, SignHint,
04279                     ConservativeResult.intersectWith(X.truncate(BitWidth)));
04280   }
04281 
04282   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
04283     // If there's no unsigned wrap, the value will never be less than its
04284     // initial value.
04285     if (AddRec->getNoWrapFlags(SCEV::FlagNUW))
04286       if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
04287         if (!C->getValue()->isZero())
04288           ConservativeResult = ConservativeResult.intersectWith(
04289               ConstantRange(C->getAPInt(), APInt(BitWidth, 0)));
04290 
04291     // If there's no signed wrap, and all the operands have the same sign or
04292     // zero, the value won't ever change sign.
04293     if (AddRec->getNoWrapFlags(SCEV::FlagNSW)) {
04294       bool AllNonNeg = true;
04295       bool AllNonPos = true;
04296       for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
04297         if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false;
04298         if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false;
04299       }
04300       if (AllNonNeg)
04301         ConservativeResult = ConservativeResult.intersectWith(
04302           ConstantRange(APInt(BitWidth, 0),
04303                         APInt::getSignedMinValue(BitWidth)));
04304       else if (AllNonPos)
04305         ConservativeResult = ConservativeResult.intersectWith(
04306           ConstantRange(APInt::getSignedMinValue(BitWidth),
04307                         APInt(BitWidth, 1)));
04308     }
04309 
04310     // TODO: non-affine addrec
04311     if (AddRec->isAffine()) {
04312       Type *Ty = AddRec->getType();
04313       const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
04314       if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
04315           getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
04316 
04317         // Check for overflow.  This must be done with ConstantRange arithmetic
04318         // because we could be called from within the ScalarEvolution overflow
04319         // checking code.
04320 
04321         MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
04322         ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
04323         ConstantRange ZExtMaxBECountRange =
04324             MaxBECountRange.zextOrTrunc(BitWidth * 2 + 1);
04325 
04326         const SCEV *Start = AddRec->getStart();
04327         const SCEV *Step = AddRec->getStepRecurrence(*this);
04328         ConstantRange StepSRange = getSignedRange(Step);
04329         ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2 + 1);
04330 
04331         ConstantRange StartURange = getUnsignedRange(Start);
04332         ConstantRange EndURange =
04333             StartURange.add(MaxBECountRange.multiply(StepSRange));
04334 
04335         // Check for unsigned overflow.
04336         ConstantRange ZExtStartURange =
04337             StartURange.zextOrTrunc(BitWidth * 2 + 1);
04338         ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2 + 1);
04339         if (ZExtStartURange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
04340             ZExtEndURange) {
04341           APInt Min = APIntOps::umin(StartURange.getUnsignedMin(),
04342                                      EndURange.getUnsignedMin());
04343           APInt Max = APIntOps::umax(StartURange.getUnsignedMax(),
04344                                      EndURange.getUnsignedMax());
04345           bool IsFullRange = Min.isMinValue() && Max.isMaxValue();
04346           if (!IsFullRange)
04347             ConservativeResult =
04348                 ConservativeResult.intersectWith(ConstantRange(Min, Max + 1));
04349         }
04350 
04351         ConstantRange StartSRange = getSignedRange(Start);
04352         ConstantRange EndSRange =
04353             StartSRange.add(MaxBECountRange.multiply(StepSRange));
04354 
04355         // Check for signed overflow. This must be done with ConstantRange
04356         // arithmetic because we could be called from within the ScalarEvolution
04357         // overflow checking code.
04358         ConstantRange SExtStartSRange =
04359             StartSRange.sextOrTrunc(BitWidth * 2 + 1);
04360         ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2 + 1);
04361         if (SExtStartSRange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
04362             SExtEndSRange) {
04363           APInt Min = APIntOps::smin(StartSRange.getSignedMin(),
04364                                      EndSRange.getSignedMin());
04365           APInt Max = APIntOps::smax(StartSRange.getSignedMax(),
04366                                      EndSRange.getSignedMax());
04367           bool IsFullRange = Min.isMinSignedValue() && Max.isMaxSignedValue();
04368           if (!IsFullRange)
04369             ConservativeResult =
04370                 ConservativeResult.intersectWith(ConstantRange(Min, Max + 1));
04371         }
04372       }
04373     }
04374 
04375     return setRange(AddRec, SignHint, ConservativeResult);
04376   }
04377 
04378   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
04379     // Check if the IR explicitly contains !range metadata.
04380     Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
04381     if (MDRange.hasValue())
04382       ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue());
04383 
04384     // Split here to avoid paying the compile-time cost of calling both
04385     // computeKnownBits and ComputeNumSignBits.  This restriction can be lifted
04386     // if needed.
04387     const DataLayout &DL = getDataLayout();
04388     if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) {
04389       // For a SCEVUnknown, ask ValueTracking.
04390       APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
04391       computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, &AC, nullptr, &DT);
04392       if (Ones != ~Zeros + 1)
04393         ConservativeResult =
04394             ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1));
04395     } else {
04396       assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&
04397              "generalize as needed!");
04398       unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
04399       if (NS > 1)
04400         ConservativeResult = ConservativeResult.intersectWith(
04401             ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
04402                           APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1));
04403     }
04404 
04405     return setRange(U, SignHint, ConservativeResult);
04406   }
04407 
04408   return setRange(S, SignHint, ConservativeResult);
04409 }
04410 
04411 SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) {
04412   if (isa<ConstantExpr>(V)) return SCEV::FlagAnyWrap;
04413   const BinaryOperator *BinOp = cast<BinaryOperator>(V);
04414 
04415   // Return early if there are no flags to propagate to the SCEV.
04416   SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
04417   if (BinOp->hasNoUnsignedWrap())
04418     Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
04419   if (BinOp->hasNoSignedWrap())
04420     Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
04421   if (Flags == SCEV::FlagAnyWrap) {
04422     return SCEV::FlagAnyWrap;
04423   }
04424 
04425   // Here we check that BinOp is in the header of the innermost loop
04426   // containing BinOp, since we only deal with instructions in the loop
04427   // header. The actual loop we need to check later will come from an add
04428   // recurrence, but getting that requires computing the SCEV of the operands,
04429   // which can be expensive. This check we can do cheaply to rule out some
04430   // cases early.
04431   Loop *innermostContainingLoop = LI.getLoopFor(BinOp->getParent());
04432   if (innermostContainingLoop == nullptr ||
04433       innermostContainingLoop->getHeader() != BinOp->getParent())
04434     return SCEV::FlagAnyWrap;
04435 
04436   // Only proceed if we can prove that BinOp does not yield poison.
04437   if (!isKnownNotFullPoison(BinOp)) return SCEV::FlagAnyWrap;
04438 
04439   // At this point we know that if V is executed, then it does not wrap
04440   // according to at least one of NSW or NUW. If V is not executed, then we do
04441   // not know if the calculation that V represents would wrap. Multiple
04442   // instructions can map to the same SCEV. If we apply NSW or NUW from V to
04443   // the SCEV, we must guarantee no wrapping for that SCEV also when it is
04444   // derived from other instructions that map to the same SCEV. We cannot make
04445   // that guarantee for cases where V is not executed. So we need to find the
04446   // loop that V is considered in relation to and prove that V is executed for
04447   // every iteration of that loop. That implies that the value that V
04448   // calculates does not wrap anywhere in the loop, so then we can apply the
04449   // flags to the SCEV.
04450   //
04451   // We check isLoopInvariant to disambiguate in case we are adding two
04452   // recurrences from different loops, so that we know which loop to prove
04453   // that V is executed in.
04454   for (int OpIndex = 0; OpIndex < 2; ++OpIndex) {
04455     const SCEV *Op = getSCEV(BinOp->getOperand(OpIndex));
04456     if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
04457       const int OtherOpIndex = 1 - OpIndex;
04458       const SCEV *OtherOp = getSCEV(BinOp->getOperand(OtherOpIndex));
04459       if (isLoopInvariant(OtherOp, AddRec->getLoop()) &&
04460           isGuaranteedToExecuteForEveryIteration(BinOp, AddRec->getLoop()))
04461         return Flags;
04462     }
04463   }
04464   return SCEV::FlagAnyWrap;
04465 }
04466 
04467 /// createSCEV - We know that there is no SCEV for the specified value.  Analyze
04468 /// the expression.
04469 ///
04470 const SCEV *ScalarEvolution::createSCEV(Value *V) {
04471   if (!isSCEVable(V->getType()))
04472     return getUnknown(V);
04473 
04474   unsigned Opcode = Instruction::UserOp1;
04475   if (Instruction *I = dyn_cast<Instruction>(V)) {
04476     Opcode = I->getOpcode();
04477 
04478     // Don't attempt to analyze instructions in blocks that aren't
04479     // reachable. Such instructions don't matter, and they aren't required
04480     // to obey basic rules for definitions dominating uses which this
04481     // analysis depends on.
04482     if (!DT.isReachableFromEntry(I->getParent()))
04483       return getUnknown(V);
04484   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
04485     Opcode = CE->getOpcode();
04486   else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
04487     return getConstant(CI);
04488   else if (isa<ConstantPointerNull>(V))
04489     return getZero(V->getType());
04490   else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
04491     return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee());
04492   else
04493     return getUnknown(V);
04494 
04495   Operator *U = cast<Operator>(V);
04496   switch (Opcode) {
04497   case Instruction::Add: {
04498     // The simple thing to do would be to just call getSCEV on both operands
04499     // and call getAddExpr with the result. However if we're looking at a
04500     // bunch of things all added together, this can be quite inefficient,
04501     // because it leads to N-1 getAddExpr calls for N ultimate operands.
04502     // Instead, gather up all the operands and make a single getAddExpr call.
04503     // LLVM IR canonical form means we need only traverse the left operands.
04504     SmallVector<const SCEV *, 4> AddOps;
04505     for (Value *Op = U;; Op = U->getOperand(0)) {
04506       U = dyn_cast<Operator>(Op);
04507       unsigned Opcode = U ? U->getOpcode() : 0;
04508       if (!U || (Opcode != Instruction::Add && Opcode != Instruction::Sub)) {
04509         assert(Op != V && "V should be an add");
04510         AddOps.push_back(getSCEV(Op));
04511         break;
04512       }
04513 
04514       if (auto *OpSCEV = getExistingSCEV(U)) {
04515         AddOps.push_back(OpSCEV);
04516         break;
04517       }
04518 
04519       // If a NUW or NSW flag can be applied to the SCEV for this
04520       // addition, then compute the SCEV for this addition by itself
04521       // with a separate call to getAddExpr. We need to do that
04522       // instead of pushing the operands of the addition onto AddOps,
04523       // since the flags are only known to apply to this particular
04524       // addition - they may not apply to other additions that can be
04525       // formed with operands from AddOps.
04526       const SCEV *RHS = getSCEV(U->getOperand(1));
04527       SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
04528       if (Flags != SCEV::FlagAnyWrap) {
04529         const SCEV *LHS = getSCEV(U->getOperand(0));
04530         if (Opcode == Instruction::Sub)
04531           AddOps.push_back(getMinusSCEV(LHS, RHS, Flags));
04532         else
04533           AddOps.push_back(getAddExpr(LHS, RHS, Flags));
04534         break;
04535       }
04536 
04537       if (Opcode == Instruction::Sub)
04538         AddOps.push_back(getNegativeSCEV(RHS));
04539       else
04540         AddOps.push_back(RHS);
04541     }
04542     return getAddExpr(AddOps);
04543   }
04544 
04545   case Instruction::Mul: {
04546     SmallVector<const SCEV *, 4> MulOps;
04547     for (Value *Op = U;; Op = U->getOperand(0)) {
04548       U = dyn_cast<Operator>(Op);
04549       if (!U || U->getOpcode() != Instruction::Mul) {
04550         assert(Op != V && "V should be a mul");
04551         MulOps.push_back(getSCEV(Op));
04552         break;
04553       }
04554 
04555       if (auto *OpSCEV = getExistingSCEV(U)) {
04556         MulOps.push_back(OpSCEV);
04557         break;
04558       }
04559 
04560       SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
04561       if (Flags != SCEV::FlagAnyWrap) {
04562         MulOps.push_back(getMulExpr(getSCEV(U->getOperand(0)),
04563                                     getSCEV(U->getOperand(1)), Flags));
04564         break;
04565       }
04566 
04567       MulOps.push_back(getSCEV(U->getOperand(1)));
04568     }
04569     return getMulExpr(MulOps);
04570   }
04571   case Instruction::UDiv:
04572     return getUDivExpr(getSCEV(U->getOperand(0)),
04573                        getSCEV(U->getOperand(1)));
04574   case Instruction::Sub:
04575     return getMinusSCEV(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)),
04576                         getNoWrapFlagsFromUB(U));
04577   case Instruction::And:
04578     // For an expression like x&255 that merely masks off the high bits,
04579     // use zext(trunc(x)) as the SCEV expression.
04580     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
04581       if (CI->isNullValue())
04582         return getSCEV(U->getOperand(1));
04583       if (CI->isAllOnesValue())
04584         return getSCEV(U->getOperand(0));
04585       const APInt &A = CI->getValue();
04586 
04587       // Instcombine's ShrinkDemandedConstant may strip bits out of
04588       // constants, obscuring what would otherwise be a low-bits mask.
04589       // Use computeKnownBits to compute what ShrinkDemandedConstant
04590       // knew about to reconstruct a low-bits mask value.
04591       unsigned LZ = A.countLeadingZeros();
04592       unsigned TZ = A.countTrailingZeros();
04593       unsigned BitWidth = A.getBitWidth();
04594       APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
04595       computeKnownBits(U->getOperand(0), KnownZero, KnownOne, getDataLayout(),
04596                        0, &AC, nullptr, &DT);
04597 
04598       APInt EffectiveMask =
04599           APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
04600       if ((LZ != 0 || TZ != 0) && !((~A & ~KnownZero) & EffectiveMask)) {
04601         const SCEV *MulCount = getConstant(
04602             ConstantInt::get(getContext(), APInt::getOneBitSet(BitWidth, TZ)));
04603         return getMulExpr(
04604             getZeroExtendExpr(
04605                 getTruncateExpr(
04606                     getUDivExactExpr(getSCEV(U->getOperand(0)), MulCount),
04607                     IntegerType::get(getContext(), BitWidth - LZ - TZ)),
04608                 U->getType()),
04609             MulCount);
04610       }
04611     }
04612     break;
04613 
04614   case Instruction::Or:
04615     // If the RHS of the Or is a constant, we may have something like:
04616     // X*4+1 which got turned into X*4|1.  Handle this as an Add so loop
04617     // optimizations will transparently handle this case.
04618     //
04619     // In order for this transformation to be safe, the LHS must be of the
04620     // form X*(2^n) and the Or constant must be less than 2^n.
04621     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
04622       const SCEV *LHS = getSCEV(U->getOperand(0));
04623       const APInt &CIVal = CI->getValue();
04624       if (GetMinTrailingZeros(LHS) >=
04625           (CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
04626         // Build a plain add SCEV.
04627         const SCEV *S = getAddExpr(LHS, getSCEV(CI));
04628         // If the LHS of the add was an addrec and it has no-wrap flags,
04629         // transfer the no-wrap flags, since an or won't introduce a wrap.
04630         if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) {
04631           const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS);
04632           const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags(
04633             OldAR->getNoWrapFlags());
04634         }
04635         return S;
04636       }
04637     }
04638     break;
04639   case Instruction::Xor:
04640     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
04641       // If the RHS of the xor is a signbit, then this is just an add.
04642       // Instcombine turns add of signbit into xor as a strength reduction step.
04643       if (CI->getValue().isSignBit())
04644         return getAddExpr(getSCEV(U->getOperand(0)),
04645                           getSCEV(U->getOperand(1)));
04646 
04647       // If the RHS of xor is -1, then this is a not operation.
04648       if (CI->isAllOnesValue())
04649         return getNotSCEV(getSCEV(U->getOperand(0)));
04650 
04651       // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask.
04652       // This is a variant of the check for xor with -1, and it handles
04653       // the case where instcombine has trimmed non-demanded bits out
04654       // of an xor with -1.
04655       if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0)))
04656         if (ConstantInt *LCI = dyn_cast<ConstantInt>(BO->getOperand(1)))
04657           if (BO->getOpcode() == Instruction::And &&
04658               LCI->getValue() == CI->getValue())
04659             if (const SCEVZeroExtendExpr *Z =
04660                   dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) {
04661               Type *UTy = U->getType();
04662               const SCEV *Z0 = Z->getOperand();
04663               Type *Z0Ty = Z0->getType();
04664               unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
04665 
04666               // If C is a low-bits mask, the zero extend is serving to
04667               // mask off the high bits. Complement the operand and
04668               // re-apply the zext.
04669               if (APIntOps::isMask(Z0TySize, CI->getValue()))
04670                 return getZeroExtendExpr(getNotSCEV(Z0), UTy);
04671 
04672               // If C is a single bit, it may be in the sign-bit position
04673               // before the zero-extend. In this case, represent the xor
04674               // using an add, which is equivalent, and re-apply the zext.
04675               APInt Trunc = CI->getValue().trunc(Z0TySize);
04676               if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
04677                   Trunc.isSignBit())
04678                 return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
04679                                          UTy);
04680             }
04681     }
04682     break;
04683 
04684   case Instruction::Shl:
04685     // Turn shift left of a constant amount into a multiply.
04686     if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
04687       uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
04688 
04689       // If the shift count is not less than the bitwidth, the result of
04690       // the shift is undefined. Don't try to analyze it, because the
04691       // resolution chosen here may differ from the resolution chosen in
04692       // other parts of the compiler.
04693       if (SA->getValue().uge(BitWidth))
04694         break;
04695 
04696       // It is currently not resolved how to interpret NSW for left
04697       // shift by BitWidth - 1, so we avoid applying flags in that
04698       // case. Remove this check (or this comment) once the situation
04699       // is resolved. See
04700       // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html
04701       // and http://reviews.llvm.org/D8890 .
04702       auto Flags = SCEV::FlagAnyWrap;
04703       if (SA->getValue().ult(BitWidth - 1)) Flags = getNoWrapFlagsFromUB(U);
04704 
04705       Constant *X = ConstantInt::get(getContext(),
04706         APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
04707       return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X), Flags);
04708     }
04709     break;
04710 
04711   case Instruction::LShr:
04712     // Turn logical shift right of a constant into a unsigned divide.
04713     if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
04714       uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
04715 
04716       // If the shift count is not less than the bitwidth, the result of
04717       // the shift is undefined. Don't try to analyze it, because the
04718       // resolution chosen here may differ from the resolution chosen in
04719       // other parts of the compiler.
04720       if (SA->getValue().uge(BitWidth))
04721         break;
04722 
04723       Constant *X = ConstantInt::get(getContext(),
04724         APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
04725       return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X));
04726     }
04727     break;
04728 
04729   case Instruction::AShr:
04730     // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression.
04731     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1)))
04732       if (Operator *L = dyn_cast<Operator>(U->getOperand(0)))
04733         if (L->getOpcode() == Instruction::Shl &&
04734             L->getOperand(1) == U->getOperand(1)) {
04735           uint64_t BitWidth = getTypeSizeInBits(U->getType());
04736 
04737           // If the shift count is not less than the bitwidth, the result of
04738           // the shift is undefined. Don't try to analyze it, because the
04739           // resolution chosen here may differ from the resolution chosen in
04740           // other parts of the compiler.
04741           if (CI->getValue().uge(BitWidth))
04742             break;
04743 
04744           uint64_t Amt = BitWidth - CI->getZExtValue();
04745           if (Amt == BitWidth)
04746             return getSCEV(L->getOperand(0));       // shift by zero --> noop
04747           return
04748             getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)),
04749                                               IntegerType::get(getContext(),
04750                                                                Amt)),
04751                               U->getType());
04752         }
04753     break;
04754 
04755   case Instruction::Trunc:
04756     return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType());
04757 
04758   case Instruction::ZExt:
04759     return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType());
04760 
04761   case Instruction::SExt:
04762     return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType());
04763 
04764   case Instruction::BitCast:
04765     // BitCasts are no-op casts so we just eliminate the cast.
04766     if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType()))
04767       return getSCEV(U->getOperand(0));
04768     break;
04769 
04770   // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can
04771   // lead to pointer expressions which cannot safely be expanded to GEPs,
04772   // because ScalarEvolution doesn't respect the GEP aliasing rules when
04773   // simplifying integer expressions.
04774 
04775   case Instruction::GetElementPtr:
04776     return createNodeForGEP(cast<GEPOperator>(U));
04777 
04778   case Instruction::PHI:
04779     return createNodeForPHI(cast<PHINode>(U));
04780 
04781   case Instruction::Select:
04782     // U can also be a select constant expr, which let fall through.  Since
04783     // createNodeForSelect only works for a condition that is an `ICmpInst`, and
04784     // constant expressions cannot have instructions as operands, we'd have
04785     // returned getUnknown for a select constant expressions anyway.
04786     if (isa<Instruction>(U))
04787       return createNodeForSelectOrPHI(cast<Instruction>(U), U->getOperand(0),
04788                                       U->getOperand(1), U->getOperand(2));
04789 
04790   default: // We cannot analyze this expression.
04791     break;
04792   }
04793 
04794   return getUnknown(V);
04795 }
04796 
04797 
04798 
04799 //===----------------------------------------------------------------------===//
04800 //                   Iteration Count Computation Code
04801 //
04802 
04803 unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L) {
04804   if (BasicBlock *ExitingBB = L->getExitingBlock())
04805     return getSmallConstantTripCount(L, ExitingBB);
04806 
04807   // No trip count information for multiple exits.
04808   return 0;
04809 }
04810 
04811 /// getSmallConstantTripCount - Returns the maximum trip count of this loop as a
04812 /// normal unsigned value. Returns 0 if the trip count is unknown or not
04813 /// constant. Will also return 0 if the maximum trip count is very large (>=
04814 /// 2^32).
04815 ///
04816 /// This "trip count" assumes that control exits via ExitingBlock. More
04817 /// precisely, it is the number of times that control may reach ExitingBlock
04818 /// before taking the branch. For loops with multiple exits, it may not be the
04819 /// number times that the loop header executes because the loop may exit
04820 /// prematurely via another branch.
04821 unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
04822                                                     BasicBlock *ExitingBlock) {
04823   assert(ExitingBlock && "Must pass a non-null exiting block!");
04824   assert(L->isLoopExiting(ExitingBlock) &&
04825          "Exiting block must actually branch out of the loop!");
04826   const SCEVConstant *ExitCount =
04827       dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
04828   if (!ExitCount)
04829     return 0;
04830 
04831   ConstantInt *ExitConst = ExitCount->getValue();
04832 
04833   // Guard against huge trip counts.
04834   if (ExitConst->getValue().getActiveBits() > 32)
04835     return 0;
04836 
04837   // In case of integer overflow, this returns 0, which is correct.
04838   return ((unsigned)ExitConst->getZExtValue()) + 1;
04839 }
04840 
04841 unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L) {
04842   if (BasicBlock *ExitingBB = L->getExitingBlock())
04843     return getSmallConstantTripMultiple(L, ExitingBB);
04844 
04845   // No trip multiple information for multiple exits.
04846   return 0;
04847 }
04848 
04849 /// getSmallConstantTripMultiple - Returns the largest constant divisor of the
04850 /// trip count of this loop as a normal unsigned value, if possible. This
04851 /// means that the actual trip count is always a multiple of the returned
04852 /// value (don't forget the trip count could very well be zero as well!).
04853 ///
04854 /// Returns 1 if the trip count is unknown or not guaranteed to be the
04855 /// multiple of a constant (which is also the case if the trip count is simply
04856 /// constant, use getSmallConstantTripCount for that case), Will also return 1
04857 /// if the trip count is very large (>= 2^32).
04858 ///
04859 /// As explained in the comments for getSmallConstantTripCount, this assumes
04860 /// that control exits the loop via ExitingBlock.
04861 unsigned
04862 ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
04863                                               BasicBlock *ExitingBlock) {
04864   assert(ExitingBlock && "Must pass a non-null exiting block!");
04865   assert(L->isLoopExiting(ExitingBlock) &&
04866          "Exiting block must actually branch out of the loop!");
04867   const SCEV *ExitCount = getExitCount(L, ExitingBlock);
04868   if (ExitCount == getCouldNotCompute())
04869     return 1;
04870 
04871   // Get the trip count from the BE count by adding 1.
04872   const SCEV *TCMul = getAddExpr(ExitCount, getOne(ExitCount->getType()));
04873   // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt
04874   // to factor simple cases.
04875   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul))
04876     TCMul = Mul->getOperand(0);
04877 
04878   const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul);
04879   if (!MulC)
04880     return 1;
04881 
04882   ConstantInt *Result = MulC->getValue();
04883 
04884   // Guard against huge trip counts (this requires checking
04885   // for zero to handle the case where the trip count == -1 and the
04886   // addition wraps).
04887   if (!Result || Result->getValue().getActiveBits() > 32 ||
04888       Result->getValue().getActiveBits() == 0)
04889     return 1;
04890 
04891   return (unsigned)Result->getZExtValue();
04892 }
04893 
04894 // getExitCount - Get the expression for the number of loop iterations for which
04895 // this loop is guaranteed not to exit via ExitingBlock. Otherwise return
04896 // SCEVCouldNotCompute.
04897 const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) {
04898   return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
04899 }
04900 
04901 /// getBackedgeTakenCount - If the specified loop has a predictable
04902 /// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
04903 /// object. The backedge-taken count is the number of times the loop header
04904 /// will be branched to from within the loop. This is one less than the
04905 /// trip count of the loop, since it doesn't count the first iteration,
04906 /// when the header is branched to from outside the loop.
04907 ///
04908 /// Note that it is not valid to call this method on a loop without a
04909 /// loop-invariant backedge-taken count (see
04910 /// hasLoopInvariantBackedgeTakenCount).
04911 ///
04912 const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
04913   return getBackedgeTakenInfo(L).getExact(this);
04914 }
04915 
04916 /// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
04917 /// return the least SCEV value that is known never to be less than the
04918 /// actual backedge taken count.
04919 const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
04920   return getBackedgeTakenInfo(L).getMax(this);
04921 }
04922 
04923 /// PushLoopPHIs - Push PHI nodes in the header of the given loop
04924 /// onto the given Worklist.
04925 static void
04926 PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
04927   BasicBlock *Header = L->getHeader();
04928 
04929   // Push all Loop-header PHIs onto the Worklist stack.
04930   for (BasicBlock::iterator I = Header->begin();
04931        PHINode *PN = dyn_cast<PHINode>(I); ++I)
04932     Worklist.push_back(PN);
04933 }
04934 
04935 const ScalarEvolution::BackedgeTakenInfo &
04936 ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
04937   // Initially insert an invalid entry for this loop. If the insertion
04938   // succeeds, proceed to actually compute a backedge-taken count and
04939   // update the value. The temporary CouldNotCompute value tells SCEV
04940   // code elsewhere that it shouldn't attempt to request a new
04941   // backedge-taken count, which could result in infinite recursion.
04942   std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
04943     BackedgeTakenCounts.insert(std::make_pair(L, BackedgeTakenInfo()));
04944   if (!Pair.second)
04945     return Pair.first->second;
04946 
04947   // computeBackedgeTakenCount may allocate memory for its result. Inserting it
04948   // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result
04949   // must be cleared in this scope.
04950   BackedgeTakenInfo Result = computeBackedgeTakenCount(L);
04951 
04952   if (Result.getExact(this) != getCouldNotCompute()) {
04953     assert(isLoopInvariant(Result.getExact(this), L) &&
04954            isLoopInvariant(Result.getMax(this), L) &&
04955            "Computed backedge-taken count isn't loop invariant for loop!");
04956     ++NumTripCountsComputed;
04957   }
04958   else if (Result.getMax(this) == getCouldNotCompute() &&
04959            isa<PHINode>(L->getHeader()->begin())) {
04960     // Only count loops that have phi nodes as not being computable.
04961     ++NumTripCountsNotComputed;
04962   }
04963 
04964   // Now that we know more about the trip count for this loop, forget any
04965   // existing SCEV values for PHI nodes in this loop since they are only
04966   // conservative estimates made without the benefit of trip count
04967   // information. This is similar to the code in forgetLoop, except that
04968   // it handles SCEVUnknown PHI nodes specially.
04969   if (Result.hasAnyInfo()) {
04970     SmallVector<Instruction *, 16> Worklist;
04971     PushLoopPHIs(L, Worklist);
04972 
04973     SmallPtrSet<Instruction *, 8> Visited;
04974     while (!Worklist.empty()) {
04975       Instruction *I = Worklist.pop_back_val();
04976       if (!Visited.insert(I).second)
04977         continue;
04978 
04979       ValueExprMapType::iterator It =
04980         ValueExprMap.find_as(static_cast<Value *>(I));
04981       if (It != ValueExprMap.end()) {
04982         const SCEV *Old = It->second;
04983 
04984         // SCEVUnknown for a PHI either means that it has an unrecognized
04985         // structure, or it's a PHI that's in the progress of being computed
04986         // by createNodeForPHI.  In the former case, additional loop trip
04987         // count information isn't going to change anything. In the later
04988         // case, createNodeForPHI will perform the necessary updates on its
04989         // own when it gets to that point.
04990         if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
04991           forgetMemoizedResults(Old);
04992           ValueExprMap.erase(It);
04993         }
04994         if (PHINode *PN = dyn_cast<PHINode>(I))
04995           ConstantEvolutionLoopExitValue.erase(PN);
04996       }
04997 
04998       PushDefUseChildren(I, Worklist);
04999     }
05000   }
05001 
05002   // Re-lookup the insert position, since the call to
05003   // computeBackedgeTakenCount above could result in a
05004   // recusive call to getBackedgeTakenInfo (on a different
05005   // loop), which would invalidate the iterator computed
05006   // earlier.
05007   return BackedgeTakenCounts.find(L)->second = Result;
05008 }
05009 
05010 /// forgetLoop - This method should be called by the client when it has
05011 /// changed a loop in a way that may effect ScalarEvolution's ability to
05012 /// compute a trip count, or if the loop is deleted.
05013 void ScalarEvolution::forgetLoop(const Loop *L) {
05014   // Drop any stored trip count value.
05015   DenseMap<const Loop*, BackedgeTakenInfo>::iterator BTCPos =
05016     BackedgeTakenCounts.find(L);
05017   if (BTCPos != BackedgeTakenCounts.end()) {
05018     BTCPos->second.clear();
05019     BackedgeTakenCounts.erase(BTCPos);
05020   }
05021 
05022   // Drop information about expressions based on loop-header PHIs.
05023   SmallVector<Instruction *, 16> Worklist;
05024   PushLoopPHIs(L, Worklist);
05025 
05026   SmallPtrSet<Instruction *, 8> Visited;
05027   while (!Worklist.empty()) {
05028     Instruction *I = Worklist.pop_back_val();
05029     if (!Visited.insert(I).second)
05030       continue;
05031 
05032     ValueExprMapType::iterator It =
05033       ValueExprMap.find_as(static_cast<Value *>(I));
05034     if (It != ValueExprMap.end()) {
05035       forgetMemoizedResults(It->second);
05036       ValueExprMap.erase(It);
05037       if (PHINode *PN = dyn_cast<PHINode>(I))
05038         ConstantEvolutionLoopExitValue.erase(PN);
05039     }
05040 
05041     PushDefUseChildren(I, Worklist);
05042   }
05043 
05044   // Forget all contained loops too, to avoid dangling entries in the
05045   // ValuesAtScopes map.
05046   for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
05047     forgetLoop(*I);
05048 }
05049 
05050 /// forgetValue - This method should be called by the client when it has
05051 /// changed a value in a way that may effect its value, or which may
05052 /// disconnect it from a def-use chain linking it to a loop.
05053 void ScalarEvolution::forgetValue(Value *V) {
05054   Instruction *I = dyn_cast<Instruction>(V);
05055   if (!I) return;
05056 
05057   // Drop information about expressions based on loop-header PHIs.
05058   SmallVector<Instruction *, 16> Worklist;
05059   Worklist.push_back(I);
05060 
05061   SmallPtrSet<Instruction *, 8> Visited;
05062   while (!Worklist.empty()) {
05063     I = Worklist.pop_back_val();
05064     if (!Visited.insert(I).second)
05065       continue;
05066 
05067     ValueExprMapType::iterator It =
05068       ValueExprMap.find_as(static_cast<Value *>(I));
05069     if (It != ValueExprMap.end()) {
05070       forgetMemoizedResults(It->second);
05071       ValueExprMap.erase(It);
05072       if (PHINode *PN = dyn_cast<PHINode>(I))
05073         ConstantEvolutionLoopExitValue.erase(PN);
05074     }
05075 
05076     PushDefUseChildren(I, Worklist);
05077   }
05078 }
05079 
05080 /// getExact - Get the exact loop backedge taken count considering all loop
05081 /// exits. A computable result can only be returned for loops with a single
05082 /// exit.  Returning the minimum taken count among all exits is incorrect
05083 /// because one of the loop's exit limit's may have been skipped. HowFarToZero
05084 /// assumes that the limit of each loop test is never skipped. This is a valid
05085 /// assumption as long as the loop exits via that test. For precise results, it
05086 /// is the caller's responsibility to specify the relevant loop exit using
05087 /// getExact(ExitingBlock, SE).
05088 const SCEV *
05089 ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
05090   // If any exits were not computable, the loop is not computable.
05091   if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute();
05092 
05093   // We need exactly one computable exit.
05094   if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
05095   assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
05096 
05097   const SCEV *BECount = nullptr;
05098   for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
05099        ENT != nullptr; ENT = ENT->getNextExit()) {
05100 
05101     assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
05102 
05103     if (!BECount)
05104       BECount = ENT->ExactNotTaken;
05105     else if (BECount != ENT->ExactNotTaken)
05106       return SE->getCouldNotCompute();
05107   }
05108   assert(BECount && "Invalid not taken count for loop exit");
05109   return BECount;
05110 }
05111 
05112 /// getExact - Get the exact not taken count for this loop exit.
05113 const SCEV *
05114 ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
05115                                              ScalarEvolution *SE) const {
05116   for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
05117        ENT != nullptr; ENT = ENT->getNextExit()) {
05118 
05119     if (ENT->ExitingBlock == ExitingBlock)
05120       return ENT->ExactNotTaken;
05121   }
05122   return SE->getCouldNotCompute();
05123 }
05124 
05125 /// getMax - Get the max backedge taken count for the loop.
05126 const SCEV *
05127 ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
05128   return Max ? Max : SE->getCouldNotCompute();
05129 }
05130 
05131 bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
05132                                                     ScalarEvolution *SE) const {
05133   if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S))
05134     return true;
05135 
05136   if (!ExitNotTaken.ExitingBlock)
05137     return false;
05138 
05139   for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
05140        ENT != nullptr; ENT = ENT->getNextExit()) {
05141 
05142     if (ENT->ExactNotTaken != SE->getCouldNotCompute()
05143         && SE->hasOperand(ENT->ExactNotTaken, S)) {
05144       return true;
05145     }
05146   }
05147   return false;
05148 }
05149 
05150 /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
05151 /// computable exit into a persistent ExitNotTakenInfo array.
05152 ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
05153   SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts,
05154   bool Complete, const SCEV *MaxCount) : Max(MaxCount) {
05155 
05156   if (!Complete)
05157     ExitNotTaken.setIncomplete();
05158 
05159   unsigned NumExits = ExitCounts.size();
05160   if (NumExits == 0) return;
05161 
05162   ExitNotTaken.ExitingBlock = ExitCounts[0].first;
05163   ExitNotTaken.ExactNotTaken = ExitCounts[0].second;
05164   if (NumExits == 1) return;
05165 
05166   // Handle the rare case of multiple computable exits.
05167   ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits-1];
05168 
05169   ExitNotTakenInfo *PrevENT = &ExitNotTaken;
05170   for (unsigned i = 1; i < NumExits; ++i, PrevENT = ENT, ++ENT) {
05171     PrevENT->setNextExit(ENT);
05172     ENT->ExitingBlock = ExitCounts[i].first;
05173     ENT->ExactNotTaken = ExitCounts[i].second;
05174   }
05175 }
05176 
05177 /// clear - Invalidate this result and free the ExitNotTakenInfo array.
05178 void ScalarEvolution::BackedgeTakenInfo::clear() {
05179   ExitNotTaken.ExitingBlock = nullptr;
05180   ExitNotTaken.ExactNotTaken = nullptr;
05181   delete[] ExitNotTaken.getNextExit();
05182 }
05183 
05184 /// computeBackedgeTakenCount - Compute the number of times the backedge
05185 /// of the specified loop will execute.
05186 ScalarEvolution::BackedgeTakenInfo
05187 ScalarEvolution::computeBackedgeTakenCount(const Loop *L) {
05188   SmallVector<BasicBlock *, 8> ExitingBlocks;
05189   L->getExitingBlocks(ExitingBlocks);
05190 
05191   SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
05192   bool CouldComputeBECount = true;
05193   BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
05194   const SCEV *MustExitMaxBECount = nullptr;
05195   const SCEV *MayExitMaxBECount = nullptr;
05196 
05197   // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts
05198   // and compute maxBECount.
05199   for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
05200     BasicBlock *ExitBB = ExitingBlocks[i];
05201     ExitLimit EL = computeExitLimit(L, ExitBB);
05202 
05203     // 1. For each exit that can be computed, add an entry to ExitCounts.
05204     // CouldComputeBECount is true only if all exits can be computed.
05205     if (EL.Exact == getCouldNotCompute())
05206       // We couldn't compute an exact value for this exit, so
05207       // we won't be able to compute an exact value for the loop.
05208       CouldComputeBECount = false;
05209     else
05210       ExitCounts.push_back(std::make_pair(ExitBB, EL.Exact));
05211 
05212     // 2. Derive the loop's MaxBECount from each exit's max number of
05213     // non-exiting iterations. Partition the loop exits into two kinds:
05214     // LoopMustExits and LoopMayExits.
05215     //
05216     // If the exit dominates the loop latch, it is a LoopMustExit otherwise it
05217     // is a LoopMayExit.  If any computable LoopMustExit is found, then
05218     // MaxBECount is the minimum EL.Max of computable LoopMustExits. Otherwise,
05219     // MaxBECount is conservatively the maximum EL.Max, where CouldNotCompute is
05220     // considered greater than any computable EL.Max.
05221     if (EL.Max != getCouldNotCompute() && Latch &&
05222         DT.dominates(ExitBB, Latch)) {
05223       if (!MustExitMaxBECount)
05224         MustExitMaxBECount = EL.Max;
05225       else {
05226         MustExitMaxBECount =
05227           getUMinFromMismatchedTypes(MustExitMaxBECount, EL.Max);
05228       }
05229     } else if (MayExitMaxBECount != getCouldNotCompute()) {
05230       if (!MayExitMaxBECount || EL.Max == getCouldNotCompute())
05231         MayExitMaxBECount = EL.Max;
05232       else {
05233         MayExitMaxBECount =
05234           getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.Max);
05235       }
05236     }
05237   }
05238   const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount :
05239     (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute());
05240   return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
05241 }
05242 
05243 ScalarEvolution::ExitLimit
05244 ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
05245 
05246   // Okay, we've chosen an exiting block.  See what condition causes us to exit
05247   // at this block and remember the exit block and whether all other targets
05248   // lead to the loop header.
05249   bool MustExecuteLoopHeader = true;
05250   BasicBlock *Exit = nullptr;
05251   for (auto *SBB : successors(ExitingBlock))
05252     if (!L->contains(SBB)) {
05253       if (Exit) // Multiple exit successors.
05254         return getCouldNotCompute();
05255       Exit = SBB;
05256     } else if (SBB != L->getHeader()) {
05257       MustExecuteLoopHeader = false;
05258     }
05259 
05260   // At this point, we know we have a conditional branch that determines whether
05261   // the loop is exited.  However, we don't know if the branch is executed each
05262   // time through the loop.  If not, then the execution count of the branch will
05263   // not be equal to the trip count of the loop.
05264   //
05265   // Currently we check for this by checking to see if the Exit branch goes to
05266   // the loop header.  If so, we know it will always execute the same number of
05267   // times as the loop.  We also handle the case where the exit block *is* the
05268   // loop header.  This is common for un-rotated loops.
05269   //
05270   // If both of those tests fail, walk up the unique predecessor chain to the
05271   // header, stopping if there is an edge that doesn't exit the loop. If the
05272   // header is reached, the execution count of the branch will be equal to the
05273   // trip count of the loop.
05274   //
05275   //  More extensive analysis could be done to handle more cases here.
05276   //
05277   if (!MustExecuteLoopHeader && ExitingBlock != L->getHeader()) {
05278     // The simple checks failed, try climbing the unique predecessor chain
05279     // up to the header.
05280     bool Ok = false;
05281     for (BasicBlock *BB = ExitingBlock; BB; ) {
05282       BasicBlock *Pred = BB->getUniquePredecessor();
05283       if (!Pred)
05284         return getCouldNotCompute();
05285       TerminatorInst *PredTerm = Pred->getTerminator();
05286       for (const BasicBlock *PredSucc : PredTerm->successors()) {
05287         if (PredSucc == BB)
05288           continue;
05289         // If the predecessor has a successor that isn't BB and isn't
05290         // outside the loop, assume the worst.
05291         if (L->contains(PredSucc))
05292           return getCouldNotCompute();
05293       }
05294       if (Pred == L->getHeader()) {
05295         Ok = true;
05296         break;
05297       }
05298       BB = Pred;
05299     }
05300     if (!Ok)
05301       return getCouldNotCompute();
05302   }
05303 
05304   bool IsOnlyExit = (L->getExitingBlock() != nullptr);
05305   TerminatorInst *Term = ExitingBlock->getTerminator();
05306   if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
05307     assert(BI->isConditional() && "If unconditional, it can't be in loop!");
05308     // Proceed to the next level to examine the exit condition expression.
05309     return computeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0),
05310                                     BI->getSuccessor(1),
05311                                     /*ControlsExit=*/IsOnlyExit);
05312   }
05313 
05314   if (SwitchInst *SI = dyn_cast<SwitchInst>(Term))
05315     return computeExitLimitFromSingleExitSwitch(L, SI, Exit,
05316                                                 /*ControlsExit=*/IsOnlyExit);
05317 
05318   return getCouldNotCompute();
05319 }
05320 
05321 /// computeExitLimitFromCond - Compute the number of times the
05322 /// backedge of the specified loop will execute if its exit condition
05323 /// were a conditional branch of ExitCond, TBB, and FBB.
05324 ///
05325 /// @param ControlsExit is true if ExitCond directly controls the exit
05326 /// branch. In this case, we can assume that the loop exits only if the
05327 /// condition is true and can infer that failing to meet the condition prior to
05328 /// integer wraparound results in undefined behavior.
05329 ScalarEvolution::ExitLimit
05330 ScalarEvolution::computeExitLimitFromCond(const Loop *L,
05331                                           Value *ExitCond,
05332                                           BasicBlock *TBB,
05333                                           BasicBlock *FBB,
05334                                           bool ControlsExit) {
05335   // Check if the controlling expression for this loop is an And or Or.
05336   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
05337     if (BO->getOpcode() == Instruction::And) {
05338       // Recurse on the operands of the and.
05339       bool EitherMayExit = L->contains(TBB);
05340       ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
05341                                                ControlsExit && !EitherMayExit);
05342       ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
05343                                                ControlsExit && !EitherMayExit);
05344       const SCEV *BECount = getCouldNotCompute();
05345       const SCEV *MaxBECount = getCouldNotCompute();
05346       if (EitherMayExit) {
05347         // Both conditions must be true for the loop to continue executing.
05348         // Choose the less conservative count.
05349         if (EL0.Exact == getCouldNotCompute() ||
05350             EL1.Exact == getCouldNotCompute())
05351           BECount = getCouldNotCompute();
05352         else
05353           BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
05354         if (EL0.Max == getCouldNotCompute())
05355           MaxBECount = EL1.Max;
05356         else if (EL1.Max == getCouldNotCompute())
05357           MaxBECount = EL0.Max;
05358         else
05359           MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
05360       } else {
05361         // Both conditions must be true at the same time for the loop to exit.
05362         // For now, be conservative.
05363         assert(L->contains(FBB) && "Loop block has no successor in loop!");
05364         if (EL0.Max == EL1.Max)
05365           MaxBECount = EL0.Max;
05366         if (EL0.Exact == EL1.Exact)
05367           BECount = EL0.Exact;
05368       }
05369 
05370       // There are cases (e.g. PR26207) where computeExitLimitFromCond is able
05371       // to be more aggressive when computing BECount than when computing
05372       // MaxBECount.  In these cases it is possible for EL0.Exact and EL1.Exact
05373       // to match, but for EL0.Max and EL1.Max to not.
05374       if (isa<SCEVCouldNotCompute>(MaxBECount) &&
05375           !isa<SCEVCouldNotCompute>(BECount))
05376         MaxBECount = BECount;
05377 
05378       return ExitLimit(BECount, MaxBECount);
05379     }
05380     if (BO->getOpcode() == Instruction::Or) {
05381       // Recurse on the operands of the or.
05382       bool EitherMayExit = L->contains(FBB);
05383       ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
05384                                                ControlsExit && !EitherMayExit);
05385       ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
05386                                                ControlsExit && !EitherMayExit);
05387       const SCEV *BECount = getCouldNotCompute();
05388       const SCEV *MaxBECount = getCouldNotCompute();
05389       if (EitherMayExit) {
05390         // Both conditions must be false for the loop to continue executing.
05391         // Choose the less conservative count.
05392         if (EL0.Exact == getCouldNotCompute() ||
05393             EL1.Exact == getCouldNotCompute())
05394           BECount = getCouldNotCompute();
05395         else
05396           BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
05397         if (EL0.Max == getCouldNotCompute())
05398           MaxBECount = EL1.Max;
05399         else if (EL1.Max == getCouldNotCompute())
05400           MaxBECount = EL0.Max;
05401         else
05402           MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
05403       } else {
05404         // Both conditions must be false at the same time for the loop to exit.
05405         // For now, be conservative.
05406         assert(L->contains(TBB) && "Loop block has no successor in loop!");
05407         if (EL0.Max == EL1.Max)
05408           MaxBECount = EL0.Max;
05409         if (EL0.Exact == EL1.Exact)
05410           BECount = EL0.Exact;
05411       }
05412 
05413       return ExitLimit(BECount, MaxBECount);
05414     }
05415   }
05416 
05417   // With an icmp, it may be feasible to compute an exact backedge-taken count.
05418   // Proceed to the next level to examine the icmp.
05419   if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
05420     return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);
05421 
05422   // Check for a constant condition. These are normally stripped out by
05423   // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
05424   // preserve the CFG and is temporarily leaving constant conditions
05425   // in place.
05426   if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
05427     if (L->contains(FBB) == !CI->getZExtValue())
05428       // The backedge is always taken.
05429       return getCouldNotCompute();
05430     else
05431       // The backedge is never taken.
05432       return getZero(CI->getType());
05433   }
05434 
05435   // If it's not an integer or pointer comparison then compute it the hard way.
05436   return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
05437 }
05438 
05439 ScalarEvolution::ExitLimit
05440 ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
05441                                           ICmpInst *ExitCond,
05442                                           BasicBlock *TBB,
05443                                           BasicBlock *FBB,
05444                                           bool ControlsExit) {
05445 
05446   // If the condition was exit on true, convert the condition to exit on false
05447   ICmpInst::Predicate Cond;
05448   if (!L->contains(FBB))
05449     Cond = ExitCond->getPredicate();
05450   else
05451     Cond = ExitCond->getInversePredicate();
05452 
05453   // Handle common loops like: for (X = "string"; *X; ++X)
05454   if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
05455     if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
05456       ExitLimit ItCnt =
05457         computeLoadConstantCompareExitLimit(LI, RHS, L, Cond);
05458       if (ItCnt.hasAnyInfo())
05459         return ItCnt;
05460     }
05461 
05462   ExitLimit ShiftEL = computeShiftCompareExitLimit(
05463       ExitCond->getOperand(0), ExitCond->getOperand(1), L, Cond);
05464   if (ShiftEL.hasAnyInfo())
05465     return ShiftEL;
05466 
05467   const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
05468   const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
05469 
05470   // Try to evaluate any dependencies out of the loop.
05471   LHS = getSCEVAtScope(LHS, L);
05472   RHS = getSCEVAtScope(RHS, L);
05473 
05474   // At this point, we would like to compute how many iterations of the
05475   // loop the predicate will return true for these inputs.
05476   if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) {
05477     // If there is a loop-invariant, force it into the RHS.
05478     std::swap(LHS, RHS);
05479     Cond = ICmpInst::getSwappedPredicate(Cond);
05480   }
05481 
05482   // Simplify the operands before analyzing them.
05483   (void)SimplifyICmpOperands(Cond, LHS, RHS);
05484 
05485   // If we have a comparison of a chrec against a constant, try to use value
05486   // ranges to answer this query.
05487   if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS))
05488     if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS))
05489       if (AddRec->getLoop() == L) {
05490         // Form the constant range.
05491         ConstantRange CompRange(
05492             ICmpInst::makeConstantRange(Cond, RHSC->getAPInt()));
05493 
05494         const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
05495         if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
05496       }
05497 
05498   switch (Cond) {
05499   case ICmpInst::ICMP_NE: {                     // while (X != Y)
05500     // Convert to: while (X-Y != 0)
05501     ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
05502     if (EL.hasAnyInfo()) return EL;
05503     break;
05504   }
05505   case ICmpInst::ICMP_EQ: {                     // while (X == Y)
05506     // Convert to: while (X-Y == 0)
05507     ExitLimit EL = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
05508     if (EL.hasAnyInfo()) return EL;
05509     break;
05510   }
05511   case ICmpInst::ICMP_SLT:
05512   case ICmpInst::ICMP_ULT: {                    // while (X < Y)
05513     bool IsSigned = Cond == ICmpInst::ICMP_SLT;
05514     ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit);
05515     if (EL.hasAnyInfo()) return EL;
05516     break;
05517   }
05518   case ICmpInst::ICMP_SGT:
05519   case ICmpInst::ICMP_UGT: {                    // while (X > Y)
05520     bool IsSigned = Cond == ICmpInst::ICMP_SGT;
05521     ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit);
05522     if (EL.hasAnyInfo()) return EL;
05523     break;
05524   }
05525   default:
05526     break;
05527   }
05528   return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
05529 }
05530 
05531 ScalarEvolution::ExitLimit
05532 ScalarEvolution::computeExitLimitFromSingleExitSwitch(const Loop *L,
05533                                                       SwitchInst *Switch,
05534                                                       BasicBlock *ExitingBlock,
05535                                                       bool ControlsExit) {
05536   assert(!L->contains(ExitingBlock) && "Not an exiting block!");
05537 
05538   // Give up if the exit is the default dest of a switch.
05539   if (Switch->getDefaultDest() == ExitingBlock)
05540     return getCouldNotCompute();
05541 
05542   assert(L->contains(Switch->getDefaultDest()) &&
05543          "Default case must not exit the loop!");
05544   const SCEV *LHS = getSCEVAtScope(Switch->getCondition(), L);
05545   const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock));
05546 
05547   // while (X != Y) --> while (X-Y != 0)
05548   ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
05549   if (EL.hasAnyInfo())
05550     return EL;
05551 
05552   return getCouldNotCompute();
05553 }
05554 
05555 static ConstantInt *
05556 EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
05557                                 ScalarEvolution &SE) {
05558   const SCEV *InVal = SE.getConstant(C);
05559   const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE);
05560   assert(isa<SCEVConstant>(Val) &&
05561          "Evaluation of SCEV at constant didn't fold correctly?");
05562   return cast<SCEVConstant>(Val)->getValue();
05563 }
05564 
05565 /// computeLoadConstantCompareExitLimit - Given an exit condition of
05566 /// 'icmp op load X, cst', try to see if we can compute the backedge
05567 /// execution count.
05568 ScalarEvolution::ExitLimit
05569 ScalarEvolution::computeLoadConstantCompareExitLimit(
05570   LoadInst *LI,
05571   Constant *RHS,
05572   const Loop *L,
05573   ICmpInst::Predicate predicate) {
05574 
05575   if (LI->isVolatile()) return getCouldNotCompute();
05576 
05577   // Check to see if the loaded pointer is a getelementptr of a global.
05578   // TODO: Use SCEV instead of manually grubbing with GEPs.
05579   GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
05580   if (!GEP) return getCouldNotCompute();
05581 
05582   // Make sure that it is really a constant global we are gepping, with an
05583   // initializer, and make sure the first IDX is really 0.
05584   GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
05585   if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
05586       GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) ||
05587       !cast<Constant>(GEP->getOperand(1))->isNullValue())
05588     return getCouldNotCompute();
05589 
05590   // Okay, we allow one non-constant index into the GEP instruction.
05591   Value *VarIdx = nullptr;
05592   std::vector<Constant*> Indexes;
05593   unsigned VarIdxNum = 0;
05594   for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
05595     if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
05596       Indexes.push_back(CI);
05597     } else if (!isa<ConstantInt>(GEP->getOperand(i))) {
05598       if (VarIdx) return getCouldNotCompute();  // Multiple non-constant idx's.
05599       VarIdx = GEP->getOperand(i);
05600       VarIdxNum = i-2;
05601       Indexes.push_back(nullptr);
05602     }
05603 
05604   // Loop-invariant loads may be a byproduct of loop optimization. Skip them.
05605   if (!VarIdx)
05606     return getCouldNotCompute();
05607 
05608   // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
05609   // Check to see if X is a loop variant variable value now.
05610   const SCEV *Idx = getSCEV(VarIdx);
05611   Idx = getSCEVAtScope(Idx, L);
05612 
05613   // We can only recognize very limited forms of loop index expressions, in
05614   // particular, only affine AddRec's like {C1,+,C2}.
05615   const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx);
05616   if (!IdxExpr || !IdxExpr->isAffine() || isLoopInvariant(IdxExpr, L) ||
05617       !isa<SCEVConstant>(IdxExpr->getOperand(0)) ||
05618       !isa<SCEVConstant>(IdxExpr->getOperand(1)))
05619     return getCouldNotCompute();
05620 
05621   unsigned MaxSteps = MaxBruteForceIterations;
05622   for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
05623     ConstantInt *ItCst = ConstantInt::get(
05624                            cast<IntegerType>(IdxExpr->getType()), IterationNum);
05625     ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
05626 
05627     // Form the GEP offset.
05628     Indexes[VarIdxNum] = Val;
05629 
05630     Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(),
05631                                                          Indexes);
05632     if (!Result) break;  // Cannot compute!
05633 
05634     // Evaluate the condition for this iteration.
05635     Result = ConstantExpr::getICmp(predicate, Result, RHS);
05636     if (!isa<ConstantInt>(Result)) break;  // Couldn't decide for sure
05637     if (cast<ConstantInt>(Result)->getValue().isMinValue()) {
05638       ++NumArrayLenItCounts;
05639       return getConstant(ItCst);   // Found terminating iteration!
05640     }
05641   }
05642   return getCouldNotCompute();
05643 }
05644 
05645 ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
05646     Value *LHS, Value *RHSV, const Loop *L, ICmpInst::Predicate Pred) {
05647   ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV);
05648   if (!RHS)
05649     return getCouldNotCompute();
05650 
05651   const BasicBlock *Latch = L->getLoopLatch();
05652   if (!Latch)
05653     return getCouldNotCompute();
05654 
05655   const BasicBlock *Predecessor = L->getLoopPredecessor();
05656   if (!Predecessor)
05657     return getCouldNotCompute();
05658 
05659   // Return true if V is of the form "LHS `shift_op` <positive constant>".
05660   // Return LHS in OutLHS and shift_opt in OutOpCode.
05661   auto MatchPositiveShift =
05662       [](Value *V, Value *&OutLHS, Instruction::BinaryOps &OutOpCode) {
05663 
05664     using namespace PatternMatch;
05665 
05666     ConstantInt *ShiftAmt;
05667     if (match(V, m_LShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
05668       OutOpCode = Instruction::LShr;
05669     else if (match(V, m_AShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
05670       OutOpCode = Instruction::AShr;
05671     else if (match(V, m_Shl(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
05672       OutOpCode = Instruction::Shl;
05673     else
05674       return false;
05675 
05676     return ShiftAmt->getValue().isStrictlyPositive();
05677   };
05678 
05679   // Recognize a "shift recurrence" either of the form %iv or of %iv.shifted in
05680   //
05681   // loop:
05682   //   %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ]
05683   //   %iv.shifted = lshr i32 %iv, <positive constant>
05684   //
05685   // Return true on a succesful match.  Return the corresponding PHI node (%iv
05686   // above) in PNOut and the opcode of the shift operation in OpCodeOut.
05687   auto MatchShiftRecurrence =
05688       [&](Value *V, PHINode *&PNOut, Instruction::BinaryOps &OpCodeOut) {
05689     Optional<Instruction::BinaryOps> PostShiftOpCode;
05690 
05691     {
05692       Instruction::BinaryOps OpC;
05693       Value *V;
05694 
05695       // If we encounter a shift instruction, "peel off" the shift operation,
05696       // and remember that we did so.  Later when we inspect %iv's backedge
05697       // value, we will make sure that the backedge value uses the same
05698       // operation.
05699       //
05700       // Note: the peeled shift operation does not have to be the same
05701       // instruction as the one feeding into the PHI's backedge value.  We only
05702       // really care about it being the same *kind* of shift instruction --
05703       // that's all that is required for our later inferences to hold.
05704       if (MatchPositiveShift(LHS, V, OpC)) {
05705         PostShiftOpCode = OpC;
05706         LHS = V;
05707       }
05708     }
05709 
05710     PNOut = dyn_cast<PHINode>(LHS);
05711     if (!PNOut || PNOut->getParent() != L->getHeader())
05712       return false;
05713 
05714     Value *BEValue = PNOut->getIncomingValueForBlock(Latch);
05715     Value *OpLHS;
05716 
05717     return
05718         // The backedge value for the PHI node must be a shift by a positive
05719         // amount
05720         MatchPositiveShift(BEValue, OpLHS, OpCodeOut) &&
05721 
05722         // of the PHI node itself
05723         OpLHS == PNOut &&
05724 
05725         // and the kind of shift should be match the kind of shift we peeled
05726         // off, if any.
05727         (!PostShiftOpCode.hasValue() || *PostShiftOpCode == OpCodeOut);
05728   };
05729 
05730   PHINode *PN;
05731   Instruction::BinaryOps OpCode;
05732   if (!MatchShiftRecurrence(LHS, PN, OpCode))
05733     return getCouldNotCompute();
05734 
05735   const DataLayout &DL = getDataLayout();
05736 
05737   // The key rationale for this optimization is that for some kinds of shift
05738   // recurrences, the value of the recurrence "stabilizes" to either 0 or -1
05739   // within a finite number of iterations.  If the condition guarding the
05740   // backedge (in the sense that the backedge is taken if the condition is true)
05741   // is false for the value the shift recurrence stabilizes to, then we know
05742   // that the backedge is taken only a finite number of times.
05743 
05744   ConstantInt *StableValue = nullptr;
05745   switch (OpCode) {
05746   default:
05747     llvm_unreachable("Impossible case!");
05748 
05749   case Instruction::AShr: {
05750     // {K,ashr,<positive-constant>} stabilizes to signum(K) in at most
05751     // bitwidth(K) iterations.
05752     Value *FirstValue = PN->getIncomingValueForBlock(Predecessor);
05753     bool KnownZero, KnownOne;
05754     ComputeSignBit(FirstValue, KnownZero, KnownOne, DL, 0, nullptr,
05755                    Predecessor->getTerminator(), &DT);
05756     auto *Ty = cast<IntegerType>(RHS->getType());
05757     if (KnownZero)
05758       StableValue = ConstantInt::get(Ty, 0);
05759     else if (KnownOne)
05760       StableValue = ConstantInt::get(Ty, -1, true);
05761     else
05762       return getCouldNotCompute();
05763 
05764     break;
05765   }
05766   case Instruction::LShr:
05767   case Instruction::Shl:
05768     // Both {K,lshr,<positive-constant>} and {K,shl,<positive-constant>}
05769     // stabilize to 0 in at most bitwidth(K) iterations.
05770     StableValue = ConstantInt::get(cast<IntegerType>(RHS->getType()), 0);
05771     break;
05772   }
05773 
05774   auto *Result =
05775       ConstantFoldCompareInstOperands(Pred, StableValue, RHS, DL, &TLI);
05776   assert(Result->getType()->isIntegerTy(1) &&
05777          "Otherwise cannot be an operand to a branch instruction");
05778 
05779   if (Result->isZeroValue()) {
05780     unsigned BitWidth = getTypeSizeInBits(RHS->getType());
05781     const SCEV *UpperBound =
05782         getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth);
05783     return ExitLimit(getCouldNotCompute(), UpperBound);
05784   }
05785 
05786   return getCouldNotCompute();
05787 }
05788 
05789 /// CanConstantFold - Return true if we can constant fold an instruction of the
05790 /// specified type, assuming that all operands were constants.
05791 static bool CanConstantFold(const Instruction *I) {
05792   if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
05793       isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
05794       isa<LoadInst>(I))
05795     return true;
05796 
05797   if (const CallInst *CI = dyn_cast<CallInst>(I))
05798     if (const Function *F = CI->getCalledFunction())
05799       return canConstantFoldCallTo(F);
05800   return false;
05801 }
05802 
05803 /// Determine whether this instruction can constant evolve within this loop
05804 /// assuming its operands can all constant evolve.
05805 static bool canConstantEvolve(Instruction *I, const Loop *L) {
05806   // An instruction outside of the loop can't be derived from a loop PHI.
05807   if (!L->contains(I)) return false;
05808 
05809   if (isa<PHINode>(I)) {
05810     // We don't currently keep track of the control flow needed to evaluate
05811     // PHIs, so we cannot handle PHIs inside of loops.
05812     return L->getHeader() == I->getParent();
05813   }
05814 
05815   // If we won't be able to constant fold this expression even if the operands
05816   // are constants, bail early.
05817   return CanConstantFold(I);
05818 }
05819 
05820 /// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by
05821 /// recursing through each instruction operand until reaching a loop header phi.
05822 static PHINode *
05823 getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
05824                                DenseMap<Instruction *, PHINode *> &PHIMap) {
05825 
05826   // Otherwise, we can evaluate this instruction if all of its operands are
05827   // constant or derived from a PHI node themselves.
05828   PHINode *PHI = nullptr;
05829   for (Value *Op : UseInst->operands()) {
05830     if (isa<Constant>(Op)) continue;
05831 
05832     Instruction *OpInst = dyn_cast<Instruction>(Op);
05833     if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr;
05834 
05835     PHINode *P = dyn_cast<PHINode>(OpInst);
05836     if (!P)
05837       // If this operand is already visited, reuse the prior result.
05838       // We may have P != PHI if this is the deepest point at which the
05839       // inconsistent paths meet.
05840       P = PHIMap.lookup(OpInst);
05841     if (!P) {
05842       // Recurse and memoize the results, whether a phi is found or not.
05843       // This recursive call invalidates pointers into PHIMap.
05844       P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap);
05845       PHIMap[OpInst] = P;
05846     }
05847     if (!P)
05848       return nullptr;  // Not evolving from PHI
05849     if (PHI && PHI != P)
05850       return nullptr;  // Evolving from multiple different PHIs.
05851     PHI = P;
05852   }
05853   // This is a expression evolving from a constant PHI!
05854   return PHI;
05855 }
05856 
05857 /// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
05858 /// in the loop that V is derived from.  We allow arbitrary operations along the
05859 /// way, but the operands of an operation must either be constants or a value
05860 /// derived from a constant PHI.  If this expression does not fit with these
05861 /// constraints, return null.
05862 static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
05863   Instruction *I = dyn_cast<Instruction>(V);
05864   if (!I || !canConstantEvolve(I, L)) return nullptr;
05865 
05866   if (PHINode *PN = dyn_cast<PHINode>(I))
05867     return PN;
05868 
05869   // Record non-constant instructions contained by the loop.
05870   DenseMap<Instruction *, PHINode *> PHIMap;
05871   return getConstantEvolvingPHIOperands(I, L, PHIMap);
05872 }
05873 
05874 /// EvaluateExpression - Given an expression that passes the
05875 /// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
05876 /// in the loop has the value PHIVal.  If we can't fold this expression for some
05877 /// reason, return null.
05878 static Constant *EvaluateExpression(Value *V, const Loop *L,
05879                                     DenseMap<Instruction *, Constant *> &Vals,
05880                                     const DataLayout &DL,
05881                                     const TargetLibraryInfo *TLI) {
05882   // Convenient constant check, but redundant for recursive calls.
05883   if (Constant *C = dyn_cast<Constant>(V)) return C;
05884   Instruction *I = dyn_cast<Instruction>(V);
05885   if (!I) return nullptr;
05886 
05887   if (Constant *C = Vals.lookup(I)) return C;
05888 
05889   // An instruction inside the loop depends on a value outside the loop that we
05890   // weren't given a mapping for, or a value such as a call inside the loop.
05891   if (!canConstantEvolve(I, L)) return nullptr;
05892 
05893   // An unmapped PHI can be due to a branch or another loop inside this loop,
05894   // or due to this not being the initial iteration through a loop where we
05895   // couldn't compute the evolution of this particular PHI last time.
05896   if (isa<PHINode>(I)) return nullptr;
05897 
05898   std::vector<Constant*> Operands(I->getNumOperands());
05899 
05900   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
05901     Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i));
05902     if (!Operand) {
05903       Operands[i] = dyn_cast<Constant>(I->getOperand(i));
05904       if (!Operands[i]) return nullptr;
05905       continue;
05906     }
05907     Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI);
05908     Vals[Operand] = C;
05909     if (!C) return nullptr;
05910     Operands[i] = C;
05911   }
05912 
05913   if (CmpInst *CI = dyn_cast<CmpInst>(I))
05914     return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
05915                                            Operands[1], DL, TLI);
05916   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
05917     if (!LI->isVolatile())
05918       return ConstantFoldLoadFromConstPtr(Operands[0], LI->getType(), DL);
05919   }
05920   return ConstantFoldInstOperands(I, Operands, DL, TLI);
05921 }
05922 
05923 
05924 // If every incoming value to PN except the one for BB is a specific Constant,
05925 // return that, else return nullptr.
05926 static Constant *getOtherIncomingValue(PHINode *PN, BasicBlock *BB) {
05927   Constant *IncomingVal = nullptr;
05928 
05929   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
05930     if (PN->getIncomingBlock(i) == BB)
05931       continue;
05932 
05933     auto *CurrentVal = dyn_cast<Constant>(PN->getIncomingValue(i));
05934     if (!CurrentVal)
05935       return nullptr;
05936 
05937     if (IncomingVal != CurrentVal) {
05938       if (IncomingVal)
05939         return nullptr;
05940       IncomingVal = CurrentVal;
05941     }
05942   }
05943 
05944   return IncomingVal;
05945 }
05946 
05947 /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
05948 /// in the header of its containing loop, we know the loop executes a
05949 /// constant number of times, and the PHI node is just a recurrence
05950 /// involving constants, fold it.
05951 Constant *
05952 ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
05953                                                    const APInt &BEs,
05954                                                    const Loop *L) {
05955   auto I = ConstantEvolutionLoopExitValue.find(PN);
05956   if (I != ConstantEvolutionLoopExitValue.end())
05957     return I->second;
05958 
05959   if (BEs.ugt(MaxBruteForceIterations))
05960     return ConstantEvolutionLoopExitValue[PN] = nullptr;  // Not going to evaluate it.
05961 
05962   Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
05963 
05964   DenseMap<Instruction *, Constant *> CurrentIterVals;
05965   BasicBlock *Header = L->getHeader();
05966   assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
05967 
05968   BasicBlock *Latch = L->getLoopLatch();
05969   if (!Latch)
05970     return nullptr;
05971 
05972   for (auto &I : *Header) {
05973     PHINode *PHI = dyn_cast<PHINode>(&I);
05974     if (!PHI) break;
05975     auto *StartCST = getOtherIncomingValue(PHI, Latch);
05976     if (!StartCST) continue;
05977     CurrentIterVals[PHI] = StartCST;
05978   }
05979   if (!CurrentIterVals.count(PN))
05980     return RetVal = nullptr;
05981 
05982   Value *BEValue = PN->getIncomingValueForBlock(Latch);
05983 
05984   // Execute the loop symbolically to determine the exit value.
05985   if (BEs.getActiveBits() >= 32)
05986     return RetVal = nullptr; // More than 2^32-1 iterations?? Not doing it!
05987 
05988   unsigned NumIterations = BEs.getZExtValue(); // must be in range
05989   unsigned IterationNum = 0;
05990   const DataLayout &DL = getDataLayout();
05991   for (; ; ++IterationNum) {
05992     if (IterationNum == NumIterations)
05993       return RetVal = CurrentIterVals[PN];  // Got exit value!
05994 
05995     // Compute the value of the PHIs for the next iteration.
05996     // EvaluateExpression adds non-phi values to the CurrentIterVals map.
05997     DenseMap<Instruction *, Constant *> NextIterVals;
05998     Constant *NextPHI =
05999         EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
06000     if (!NextPHI)
06001       return nullptr;        // Couldn't evaluate!
06002     NextIterVals[PN] = NextPHI;
06003 
06004     bool StoppedEvolving = NextPHI == CurrentIterVals[PN];
06005 
06006     // Also evaluate the other PHI nodes.  However, we don't get to stop if we
06007     // cease to be able to evaluate one of them or if they stop evolving,
06008     // because that doesn't necessarily prevent us from computing PN.
06009     SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute;
06010     for (const auto &I : CurrentIterVals) {
06011       PHINode *PHI = dyn_cast<PHINode>(I.first);
06012       if (!PHI || PHI == PN || PHI->getParent() != Header) continue;
06013       PHIsToCompute.emplace_back(PHI, I.second);
06014     }
06015     // We use two distinct loops because EvaluateExpression may invalidate any
06016     // iterators into CurrentIterVals.
06017     for (const auto &I : PHIsToCompute) {
06018       PHINode *PHI = I.first;
06019       Constant *&NextPHI = NextIterVals[PHI];
06020       if (!NextPHI) {   // Not already computed.
06021         Value *BEValue = PHI->getIncomingValueForBlock(Latch);
06022         NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
06023       }
06024       if (NextPHI != I.second)
06025         StoppedEvolving = false;
06026     }
06027 
06028     // If all entries in CurrentIterVals == NextIterVals then we can stop
06029     // iterating, the loop can't continue to change.
06030     if (StoppedEvolving)
06031       return RetVal = CurrentIterVals[PN];
06032 
06033     CurrentIterVals.swap(NextIterVals);
06034   }
06035 }
06036 
06037 const SCEV *ScalarEvolution::computeExitCountExhaustively(const Loop *L,
06038                                                           Value *Cond,
06039                                                           bool ExitWhen) {
06040   PHINode *PN = getConstantEvolvingPHI(Cond, L);
06041   if (!PN) return getCouldNotCompute();
06042 
06043   // If the loop is canonicalized, the PHI will have exactly two entries.
06044   // That's the only form we support here.
06045   if (PN->getNumIncomingValues() != 2) return getCouldNotCompute();
06046 
06047   DenseMap<Instruction *, Constant *> CurrentIterVals;
06048   BasicBlock *Header = L->getHeader();
06049   assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
06050 
06051   BasicBlock *Latch = L->getLoopLatch();
06052   assert(Latch && "Should follow from NumIncomingValues == 2!");
06053 
06054   for (auto &I : *Header) {
06055     PHINode *PHI = dyn_cast<PHINode>(&I);
06056     if (!PHI)
06057       break;
06058     auto *StartCST = getOtherIncomingValue(PHI, Latch);
06059     if (!StartCST) continue;
06060     CurrentIterVals[PHI] = StartCST;
06061   }
06062   if (!CurrentIterVals.count(PN))
06063     return getCouldNotCompute();
06064 
06065   // Okay, we find a PHI node that defines the trip count of this loop.  Execute
06066   // the loop symbolically to determine when the condition gets a value of
06067   // "ExitWhen".
06068   unsigned MaxIterations = MaxBruteForceIterations;   // Limit analysis.
06069   const DataLayout &DL = getDataLayout();
06070   for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
06071     auto *CondVal = dyn_cast_or_null<ConstantInt>(
06072         EvaluateExpression(Cond, L, CurrentIterVals, DL, &TLI));
06073 
06074     // Couldn't symbolically evaluate.
06075     if (!CondVal) return getCouldNotCompute();
06076 
06077     if (CondVal->getValue() == uint64_t(ExitWhen)) {
06078       ++NumBruteForceTripCountsComputed;
06079       return getConstant(Type::getInt32Ty(getContext()), IterationNum);
06080     }
06081 
06082     // Update all the PHI nodes for the next iteration.
06083     DenseMap<Instruction *, Constant *> NextIterVals;
06084 
06085     // Create a list of which PHIs we need to compute. We want to do this before
06086     // calling EvaluateExpression on them because that may invalidate iterators
06087     // into CurrentIterVals.
06088     SmallVector<PHINode *, 8> PHIsToCompute;
06089     for (const auto &I : CurrentIterVals) {
06090       PHINode *PHI = dyn_cast<PHINode>(I.first);
06091       if (!PHI || PHI->getParent() != Header) continue;
06092       PHIsToCompute.push_back(PHI);
06093     }
06094     for (PHINode *PHI : PHIsToCompute) {
06095       Constant *&NextPHI = NextIterVals[PHI];
06096       if (NextPHI) continue;    // Already computed!
06097 
06098       Value *BEValue = PHI->getIncomingValueForBlock(Latch);
06099       NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
06100     }
06101     CurrentIterVals.swap(NextIterVals);
06102   }
06103 
06104   // Too many iterations were needed to evaluate.
06105   return getCouldNotCompute();
06106 }
06107 
06108 /// getSCEVAtScope - Return a SCEV expression for the specified value
06109 /// at the specified scope in the program.  The L value specifies a loop
06110 /// nest to evaluate the expression at, where null is the top-level or a
06111 /// specified loop is immediately inside of the loop.
06112 ///
06113 /// This method can be used to compute the exit value for a variable defined
06114 /// in a loop by querying what the value will hold in the parent loop.
06115 ///
06116 /// In the case that a relevant loop exit value cannot be computed, the
06117 /// original value V is returned.
06118 const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
06119   SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values =
06120       ValuesAtScopes[V];
06121   // Check to see if we've folded this expression at this loop before.
06122   for (auto &LS : Values)
06123     if (LS.first == L)
06124       return LS.second ? LS.second : V;
06125 
06126   Values.emplace_back(L, nullptr);
06127 
06128   // Otherwise compute it.
06129   const SCEV *C = computeSCEVAtScope(V, L);
06130   for (auto &LS : reverse(ValuesAtScopes[V]))
06131     if (LS.first == L) {
06132       LS.second = C;
06133       break;
06134     }
06135   return C;
06136 }
06137 
06138 /// This builds up a Constant using the ConstantExpr interface.  That way, we
06139 /// will return Constants for objects which aren't represented by a
06140 /// SCEVConstant, because SCEVConstant is restricted to ConstantInt.
06141 /// Returns NULL if the SCEV isn't representable as a Constant.
06142 static Constant *BuildConstantFromSCEV(const SCEV *V) {
06143   switch (static_cast<SCEVTypes>(V->getSCEVType())) {
06144     case scCouldNotCompute:
06145     case scAddRecExpr:
06146       break;
06147     case scConstant:
06148       return cast<SCEVConstant>(V)->getValue();
06149     case scUnknown:
06150       return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue());
06151     case scSignExtend: {
06152       const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V);
06153       if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand()))
06154         return ConstantExpr::getSExt(CastOp, SS->getType());
06155       break;
06156     }
06157     case scZeroExtend: {
06158       const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V);
06159       if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand()))
06160         return ConstantExpr::getZExt(CastOp, SZ->getType());
06161       break;
06162     }
06163     case scTruncate: {
06164       const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V);
06165       if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand()))
06166         return ConstantExpr::getTrunc(CastOp, ST->