LLVM  mainline
ScalarEvolution.cpp
Go to the documentation of this file.
00001 //===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file contains the implementation of the scalar evolution analysis
00011 // engine, which is used primarily to analyze expressions involving induction
00012 // variables in loops.
00013 //
00014 // There are several aspects to this library.  First is the representation of
00015 // scalar expressions, which are represented as subclasses of the SCEV class.
00016 // These classes are used to represent certain types of subexpressions that we
00017 // can handle. We only create one SCEV of a particular shape, so
00018 // pointer-comparisons for equality are legal.
00019 //
00020 // One important aspect of the SCEV objects is that they are never cyclic, even
00021 // if there is a cycle in the dataflow for an expression (ie, a PHI node).  If
00022 // the PHI node is one of the idioms that we can represent (e.g., a polynomial
00023 // recurrence) then we represent it directly as a recurrence node, otherwise we
00024 // represent it as a SCEVUnknown node.
00025 //
00026 // In addition to being able to represent expressions of various types, we also
00027 // have folders that are used to build the *canonical* representation for a
00028 // particular expression.  These folders are capable of using a variety of
00029 // rewrite rules to simplify the expressions.
00030 //
00031 // Once the folders are defined, we can implement the more interesting
00032 // higher-level code, such as the code that recognizes PHI nodes of various
00033 // types, computes the execution count of a loop, etc.
00034 //
00035 // TODO: We should use these routines and value representations to implement
00036 // dependence analysis!
00037 //
00038 //===----------------------------------------------------------------------===//
00039 //
00040 // There are several good references for the techniques used in this analysis.
00041 //
00042 //  Chains of recurrences -- a method to expedite the evaluation
00043 //  of closed-form functions
00044 //  Olaf Bachmann, Paul S. Wang, Eugene V. Zima
00045 //
00046 //  On computational properties of chains of recurrences
00047 //  Eugene V. Zima
00048 //
00049 //  Symbolic Evaluation of Chains of Recurrences for Loop Optimization
00050 //  Robert A. van Engelen
00051 //
00052 //  Efficient Symbolic Analysis for Optimizing Compilers
00053 //  Robert A. van Engelen
00054 //
00055 //  Using the chains of recurrences algebra for data dependence testing and
00056 //  induction variable substitution
00057 //  MS Thesis, Johnie Birch
00058 //
00059 //===----------------------------------------------------------------------===//
00060 
00061 #include "llvm/Analysis/ScalarEvolution.h"
00062 #include "llvm/ADT/Optional.h"
00063 #include "llvm/ADT/STLExtras.h"
00064 #include "llvm/ADT/SmallPtrSet.h"
00065 #include "llvm/ADT/Statistic.h"
00066 #include "llvm/Analysis/AssumptionCache.h"
00067 #include "llvm/Analysis/ConstantFolding.h"
00068 #include "llvm/Analysis/InstructionSimplify.h"
00069 #include "llvm/Analysis/LoopInfo.h"
00070 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
00071 #include "llvm/Analysis/TargetLibraryInfo.h"
00072 #include "llvm/Analysis/ValueTracking.h"
00073 #include "llvm/IR/ConstantRange.h"
00074 #include "llvm/IR/Constants.h"
00075 #include "llvm/IR/DataLayout.h"
00076 #include "llvm/IR/DerivedTypes.h"
00077 #include "llvm/IR/Dominators.h"
00078 #include "llvm/IR/GetElementPtrTypeIterator.h"
00079 #include "llvm/IR/GlobalAlias.h"
00080 #include "llvm/IR/GlobalVariable.h"
00081 #include "llvm/IR/InstIterator.h"
00082 #include "llvm/IR/Instructions.h"
00083 #include "llvm/IR/LLVMContext.h"
00084 #include "llvm/IR/Metadata.h"
00085 #include "llvm/IR/Operator.h"
00086 #include "llvm/Support/CommandLine.h"
00087 #include "llvm/Support/Debug.h"
00088 #include "llvm/Support/ErrorHandling.h"
00089 #include "llvm/Support/MathExtras.h"
00090 #include "llvm/Support/raw_ostream.h"
00091 #include <algorithm>
00092 using namespace llvm;
00093 
00094 #define DEBUG_TYPE "scalar-evolution"
00095 
00096 STATISTIC(NumArrayLenItCounts,
00097           "Number of trip counts computed with array length");
00098 STATISTIC(NumTripCountsComputed,
00099           "Number of loops with predictable loop counts");
00100 STATISTIC(NumTripCountsNotComputed,
00101           "Number of loops without predictable loop counts");
00102 STATISTIC(NumBruteForceTripCountsComputed,
00103           "Number of loops with trip counts computed by force");
00104 
00105 static cl::opt<unsigned>
00106 MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
00107                         cl::desc("Maximum number of iterations SCEV will "
00108                                  "symbolically execute a constant "
00109                                  "derived loop"),
00110                         cl::init(100));
00111 
00112 // FIXME: Enable this with XDEBUG when the test suite is clean.
00113 static cl::opt<bool>
00114 VerifySCEV("verify-scev",
00115            cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
00116 
00117 INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
00118                 "Scalar Evolution Analysis", false, true)
00119 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
00120 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
00121 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
00122 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
00123 INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
00124                 "Scalar Evolution Analysis", false, true)
00125 char ScalarEvolution::ID = 0;
00126 
00127 //===----------------------------------------------------------------------===//
00128 //                           SCEV class definitions
00129 //===----------------------------------------------------------------------===//
00130 
00131 //===----------------------------------------------------------------------===//
00132 // Implementation of the SCEV class.
00133 //
00134 
00135 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
00136 void SCEV::dump() const {
00137   print(dbgs());
00138   dbgs() << '\n';
00139 }
00140 #endif
00141 
00142 void SCEV::print(raw_ostream &OS) const {
00143   switch (static_cast<SCEVTypes>(getSCEVType())) {
00144   case scConstant:
00145     cast<SCEVConstant>(this)->getValue()->printAsOperand(OS, false);
00146     return;
00147   case scTruncate: {
00148     const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
00149     const SCEV *Op = Trunc->getOperand();
00150     OS << "(trunc " << *Op->getType() << " " << *Op << " to "
00151        << *Trunc->getType() << ")";
00152     return;
00153   }
00154   case scZeroExtend: {
00155     const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this);
00156     const SCEV *Op = ZExt->getOperand();
00157     OS << "(zext " << *Op->getType() << " " << *Op << " to "
00158        << *ZExt->getType() << ")";
00159     return;
00160   }
00161   case scSignExtend: {
00162     const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this);
00163     const SCEV *Op = SExt->getOperand();
00164     OS << "(sext " << *Op->getType() << " " << *Op << " to "
00165        << *SExt->getType() << ")";
00166     return;
00167   }
00168   case scAddRecExpr: {
00169     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this);
00170     OS << "{" << *AR->getOperand(0);
00171     for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i)
00172       OS << ",+," << *AR->getOperand(i);
00173     OS << "}<";
00174     if (AR->getNoWrapFlags(FlagNUW))
00175       OS << "nuw><";
00176     if (AR->getNoWrapFlags(FlagNSW))
00177       OS << "nsw><";
00178     if (AR->getNoWrapFlags(FlagNW) &&
00179         !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)))
00180       OS << "nw><";
00181     AR->getLoop()->getHeader()->printAsOperand(OS, /*PrintType=*/false);
00182     OS << ">";
00183     return;
00184   }
00185   case scAddExpr:
00186   case scMulExpr:
00187   case scUMaxExpr:
00188   case scSMaxExpr: {
00189     const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
00190     const char *OpStr = nullptr;
00191     switch (NAry->getSCEVType()) {
00192     case scAddExpr: OpStr = " + "; break;
00193     case scMulExpr: OpStr = " * "; break;
00194     case scUMaxExpr: OpStr = " umax "; break;
00195     case scSMaxExpr: OpStr = " smax "; break;
00196     }
00197     OS << "(";
00198     for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
00199          I != E; ++I) {
00200       OS << **I;
00201       if (std::next(I) != E)
00202         OS << OpStr;
00203     }
00204     OS << ")";
00205     switch (NAry->getSCEVType()) {
00206     case scAddExpr:
00207     case scMulExpr:
00208       if (NAry->getNoWrapFlags(FlagNUW))
00209         OS << "<nuw>";
00210       if (NAry->getNoWrapFlags(FlagNSW))
00211         OS << "<nsw>";
00212     }
00213     return;
00214   }
00215   case scUDivExpr: {
00216     const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this);
00217     OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")";
00218     return;
00219   }
00220   case scUnknown: {
00221     const SCEVUnknown *U = cast<SCEVUnknown>(this);
00222     Type *AllocTy;
00223     if (U->isSizeOf(AllocTy)) {
00224       OS << "sizeof(" << *AllocTy << ")";
00225       return;
00226     }
00227     if (U->isAlignOf(AllocTy)) {
00228       OS << "alignof(" << *AllocTy << ")";
00229       return;
00230     }
00231 
00232     Type *CTy;
00233     Constant *FieldNo;
00234     if (U->isOffsetOf(CTy, FieldNo)) {
00235       OS << "offsetof(" << *CTy << ", ";
00236       FieldNo->printAsOperand(OS, false);
00237       OS << ")";
00238       return;
00239     }
00240 
00241     // Otherwise just print it normally.
00242     U->getValue()->printAsOperand(OS, false);
00243     return;
00244   }
00245   case scCouldNotCompute:
00246     OS << "***COULDNOTCOMPUTE***";
00247     return;
00248   }
00249   llvm_unreachable("Unknown SCEV kind!");
00250 }
00251 
00252 Type *SCEV::getType() const {
00253   switch (static_cast<SCEVTypes>(getSCEVType())) {
00254   case scConstant:
00255     return cast<SCEVConstant>(this)->getType();
00256   case scTruncate:
00257   case scZeroExtend:
00258   case scSignExtend:
00259     return cast<SCEVCastExpr>(this)->getType();
00260   case scAddRecExpr:
00261   case scMulExpr:
00262   case scUMaxExpr:
00263   case scSMaxExpr:
00264     return cast<SCEVNAryExpr>(this)->getType();
00265   case scAddExpr:
00266     return cast<SCEVAddExpr>(this)->getType();
00267   case scUDivExpr:
00268     return cast<SCEVUDivExpr>(this)->getType();
00269   case scUnknown:
00270     return cast<SCEVUnknown>(this)->getType();
00271   case scCouldNotCompute:
00272     llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
00273   }
00274   llvm_unreachable("Unknown SCEV kind!");
00275 }
00276 
00277 bool SCEV::isZero() const {
00278   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
00279     return SC->getValue()->isZero();
00280   return false;
00281 }
00282 
00283 bool SCEV::isOne() const {
00284   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
00285     return SC->getValue()->isOne();
00286   return false;
00287 }
00288 
00289 bool SCEV::isAllOnesValue() const {
00290   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
00291     return SC->getValue()->isAllOnesValue();
00292   return false;
00293 }
00294 
00295 /// isNonConstantNegative - Return true if the specified scev is negated, but
00296 /// not a constant.
00297 bool SCEV::isNonConstantNegative() const {
00298   const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this);
00299   if (!Mul) return false;
00300 
00301   // If there is a constant factor, it will be first.
00302   const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
00303   if (!SC) return false;
00304 
00305   // Return true if the value is negative, this matches things like (-42 * V).
00306   return SC->getValue()->getValue().isNegative();
00307 }
00308 
00309 SCEVCouldNotCompute::SCEVCouldNotCompute() :
00310   SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
00311 
00312 bool SCEVCouldNotCompute::classof(const SCEV *S) {
00313   return S->getSCEVType() == scCouldNotCompute;
00314 }
00315 
00316 const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
00317   FoldingSetNodeID ID;
00318   ID.AddInteger(scConstant);
00319   ID.AddPointer(V);
00320   void *IP = nullptr;
00321   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
00322   SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
00323   UniqueSCEVs.InsertNode(S, IP);
00324   return S;
00325 }
00326 
00327 const SCEV *ScalarEvolution::getConstant(const APInt &Val) {
00328   return getConstant(ConstantInt::get(getContext(), Val));
00329 }
00330 
00331 const SCEV *
00332 ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
00333   IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
00334   return getConstant(ConstantInt::get(ITy, V, isSigned));
00335 }
00336 
00337 SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
00338                            unsigned SCEVTy, const SCEV *op, Type *ty)
00339   : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
00340 
00341 SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
00342                                    const SCEV *op, Type *ty)
00343   : SCEVCastExpr(ID, scTruncate, op, ty) {
00344   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
00345          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
00346          "Cannot truncate non-integer value!");
00347 }
00348 
00349 SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
00350                                        const SCEV *op, Type *ty)
00351   : SCEVCastExpr(ID, scZeroExtend, op, ty) {
00352   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
00353          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
00354          "Cannot zero extend non-integer value!");
00355 }
00356 
00357 SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
00358                                        const SCEV *op, Type *ty)
00359   : SCEVCastExpr(ID, scSignExtend, op, ty) {
00360   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
00361          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
00362          "Cannot sign extend non-integer value!");
00363 }
00364 
00365 void SCEVUnknown::deleted() {
00366   // Clear this SCEVUnknown from various maps.
00367   SE->forgetMemoizedResults(this);
00368 
00369   // Remove this SCEVUnknown from the uniquing map.
00370   SE->UniqueSCEVs.RemoveNode(this);
00371 
00372   // Release the value.
00373   setValPtr(nullptr);
00374 }
00375 
00376 void SCEVUnknown::allUsesReplacedWith(Value *New) {
00377   // Clear this SCEVUnknown from various maps.
00378   SE->forgetMemoizedResults(this);
00379 
00380   // Remove this SCEVUnknown from the uniquing map.
00381   SE->UniqueSCEVs.RemoveNode(this);
00382 
00383   // Update this SCEVUnknown to point to the new value. This is needed
00384   // because there may still be outstanding SCEVs which still point to
00385   // this SCEVUnknown.
00386   setValPtr(New);
00387 }
00388 
00389 bool SCEVUnknown::isSizeOf(Type *&AllocTy) const {
00390   if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
00391     if (VCE->getOpcode() == Instruction::PtrToInt)
00392       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
00393         if (CE->getOpcode() == Instruction::GetElementPtr &&
00394             CE->getOperand(0)->isNullValue() &&
00395             CE->getNumOperands() == 2)
00396           if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1)))
00397             if (CI->isOne()) {
00398               AllocTy = cast<PointerType>(CE->getOperand(0)->getType())
00399                                  ->getElementType();
00400               return true;
00401             }
00402 
00403   return false;
00404 }
00405 
00406 bool SCEVUnknown::isAlignOf(Type *&AllocTy) const {
00407   if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
00408     if (VCE->getOpcode() == Instruction::PtrToInt)
00409       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
00410         if (CE->getOpcode() == Instruction::GetElementPtr &&
00411             CE->getOperand(0)->isNullValue()) {
00412           Type *Ty =
00413             cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
00414           if (StructType *STy = dyn_cast<StructType>(Ty))
00415             if (!STy->isPacked() &&
00416                 CE->getNumOperands() == 3 &&
00417                 CE->getOperand(1)->isNullValue()) {
00418               if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
00419                 if (CI->isOne() &&
00420                     STy->getNumElements() == 2 &&
00421                     STy->getElementType(0)->isIntegerTy(1)) {
00422                   AllocTy = STy->getElementType(1);
00423                   return true;
00424                 }
00425             }
00426         }
00427 
00428   return false;
00429 }
00430 
00431 bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
00432   if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
00433     if (VCE->getOpcode() == Instruction::PtrToInt)
00434       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
00435         if (CE->getOpcode() == Instruction::GetElementPtr &&
00436             CE->getNumOperands() == 3 &&
00437             CE->getOperand(0)->isNullValue() &&
00438             CE->getOperand(1)->isNullValue()) {
00439           Type *Ty =
00440             cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
00441           // Ignore vector types here so that ScalarEvolutionExpander doesn't
00442           // emit getelementptrs that index into vectors.
00443           if (Ty->isStructTy() || Ty->isArrayTy()) {
00444             CTy = Ty;
00445             FieldNo = CE->getOperand(2);
00446             return true;
00447           }
00448         }
00449 
00450   return false;
00451 }
00452 
00453 //===----------------------------------------------------------------------===//
00454 //                               SCEV Utilities
00455 //===----------------------------------------------------------------------===//
00456 
00457 namespace {
00458   /// SCEVComplexityCompare - Return true if the complexity of the LHS is less
00459   /// than the complexity of the RHS.  This comparator is used to canonicalize
00460   /// expressions.
00461   class SCEVComplexityCompare {
00462     const LoopInfo *const LI;
00463   public:
00464     explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
00465 
00466     // Return true or false if LHS is less than, or at least RHS, respectively.
00467     bool operator()(const SCEV *LHS, const SCEV *RHS) const {
00468       return compare(LHS, RHS) < 0;
00469     }
00470 
00471     // Return negative, zero, or positive, if LHS is less than, equal to, or
00472     // greater than RHS, respectively. A three-way result allows recursive
00473     // comparisons to be more efficient.
00474     int compare(const SCEV *LHS, const SCEV *RHS) const {
00475       // Fast-path: SCEVs are uniqued so we can do a quick equality check.
00476       if (LHS == RHS)
00477         return 0;
00478 
00479       // Primarily, sort the SCEVs by their getSCEVType().
00480       unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
00481       if (LType != RType)
00482         return (int)LType - (int)RType;
00483 
00484       // Aside from the getSCEVType() ordering, the particular ordering
00485       // isn't very important except that it's beneficial to be consistent,
00486       // so that (a + b) and (b + a) don't end up as different expressions.
00487       switch (static_cast<SCEVTypes>(LType)) {
00488       case scUnknown: {
00489         const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
00490         const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
00491 
00492         // Sort SCEVUnknown values with some loose heuristics. TODO: This is
00493         // not as complete as it could be.
00494         const Value *LV = LU->getValue(), *RV = RU->getValue();
00495 
00496         // Order pointer values after integer values. This helps SCEVExpander
00497         // form GEPs.
00498         bool LIsPointer = LV->getType()->isPointerTy(),
00499              RIsPointer = RV->getType()->isPointerTy();
00500         if (LIsPointer != RIsPointer)
00501           return (int)LIsPointer - (int)RIsPointer;
00502 
00503         // Compare getValueID values.
00504         unsigned LID = LV->getValueID(),
00505                  RID = RV->getValueID();
00506         if (LID != RID)
00507           return (int)LID - (int)RID;
00508 
00509         // Sort arguments by their position.
00510         if (const Argument *LA = dyn_cast<Argument>(LV)) {
00511           const Argument *RA = cast<Argument>(RV);
00512           unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
00513           return (int)LArgNo - (int)RArgNo;
00514         }
00515 
00516         // For instructions, compare their loop depth, and their operand
00517         // count.  This is pretty loose.
00518         if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
00519           const Instruction *RInst = cast<Instruction>(RV);
00520 
00521           // Compare loop depths.
00522           const BasicBlock *LParent = LInst->getParent(),
00523                            *RParent = RInst->getParent();
00524           if (LParent != RParent) {
00525             unsigned LDepth = LI->getLoopDepth(LParent),
00526                      RDepth = LI->getLoopDepth(RParent);
00527             if (LDepth != RDepth)
00528               return (int)LDepth - (int)RDepth;
00529           }
00530 
00531           // Compare the number of operands.
00532           unsigned LNumOps = LInst->getNumOperands(),
00533                    RNumOps = RInst->getNumOperands();
00534           return (int)LNumOps - (int)RNumOps;
00535         }
00536 
00537         return 0;
00538       }
00539 
00540       case scConstant: {
00541         const SCEVConstant *LC = cast<SCEVConstant>(LHS);
00542         const SCEVConstant *RC = cast<SCEVConstant>(RHS);
00543 
00544         // Compare constant values.
00545         const APInt &LA = LC->getValue()->getValue();
00546         const APInt &RA = RC->getValue()->getValue();
00547         unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
00548         if (LBitWidth != RBitWidth)
00549           return (int)LBitWidth - (int)RBitWidth;
00550         return LA.ult(RA) ? -1 : 1;
00551       }
00552 
00553       case scAddRecExpr: {
00554         const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
00555         const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
00556 
00557         // Compare addrec loop depths.
00558         const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
00559         if (LLoop != RLoop) {
00560           unsigned LDepth = LLoop->getLoopDepth(),
00561                    RDepth = RLoop->getLoopDepth();
00562           if (LDepth != RDepth)
00563             return (int)LDepth - (int)RDepth;
00564         }
00565 
00566         // Addrec complexity grows with operand count.
00567         unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
00568         if (LNumOps != RNumOps)
00569           return (int)LNumOps - (int)RNumOps;
00570 
00571         // Lexicographically compare.
00572         for (unsigned i = 0; i != LNumOps; ++i) {
00573           long X = compare(LA->getOperand(i), RA->getOperand(i));
00574           if (X != 0)
00575             return X;
00576         }
00577 
00578         return 0;
00579       }
00580 
00581       case scAddExpr:
00582       case scMulExpr:
00583       case scSMaxExpr:
00584       case scUMaxExpr: {
00585         const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
00586         const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
00587 
00588         // Lexicographically compare n-ary expressions.
00589         unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
00590         if (LNumOps != RNumOps)
00591           return (int)LNumOps - (int)RNumOps;
00592 
00593         for (unsigned i = 0; i != LNumOps; ++i) {
00594           if (i >= RNumOps)
00595             return 1;
00596           long X = compare(LC->getOperand(i), RC->getOperand(i));
00597           if (X != 0)
00598             return X;
00599         }
00600         return (int)LNumOps - (int)RNumOps;
00601       }
00602 
00603       case scUDivExpr: {
00604         const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
00605         const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
00606 
00607         // Lexicographically compare udiv expressions.
00608         long X = compare(LC->getLHS(), RC->getLHS());
00609         if (X != 0)
00610           return X;
00611         return compare(LC->getRHS(), RC->getRHS());
00612       }
00613 
00614       case scTruncate:
00615       case scZeroExtend:
00616       case scSignExtend: {
00617         const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
00618         const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
00619 
00620         // Compare cast expressions by operand.
00621         return compare(LC->getOperand(), RC->getOperand());
00622       }
00623 
00624       case scCouldNotCompute:
00625         llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
00626       }
00627       llvm_unreachable("Unknown SCEV kind!");
00628     }
00629   };
00630 }
00631 
00632 /// GroupByComplexity - Given a list of SCEV objects, order them by their
00633 /// complexity, and group objects of the same complexity together by value.
00634 /// When this routine is finished, we know that any duplicates in the vector are
00635 /// consecutive and that complexity is monotonically increasing.
00636 ///
00637 /// Note that we go take special precautions to ensure that we get deterministic
00638 /// results from this routine.  In other words, we don't want the results of
00639 /// this to depend on where the addresses of various SCEV objects happened to
00640 /// land in memory.
00641 ///
00642 static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
00643                               LoopInfo *LI) {
00644   if (Ops.size() < 2) return;  // Noop
00645   if (Ops.size() == 2) {
00646     // This is the common case, which also happens to be trivially simple.
00647     // Special case it.
00648     const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
00649     if (SCEVComplexityCompare(LI)(RHS, LHS))
00650       std::swap(LHS, RHS);
00651     return;
00652   }
00653 
00654   // Do the rough sort by complexity.
00655   std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI));
00656 
00657   // Now that we are sorted by complexity, group elements of the same
00658   // complexity.  Note that this is, at worst, N^2, but the vector is likely to
00659   // be extremely short in practice.  Note that we take this approach because we
00660   // do not want to depend on the addresses of the objects we are grouping.
00661   for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) {
00662     const SCEV *S = Ops[i];
00663     unsigned Complexity = S->getSCEVType();
00664 
00665     // If there are any objects of the same complexity and same value as this
00666     // one, group them.
00667     for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) {
00668       if (Ops[j] == S) { // Found a duplicate.
00669         // Move it to immediately after i'th element.
00670         std::swap(Ops[i+1], Ops[j]);
00671         ++i;   // no need to rescan it.
00672         if (i == e-2) return;  // Done!
00673       }
00674     }
00675   }
00676 }
00677 
00678 namespace {
00679 struct FindSCEVSize {
00680   int Size;
00681   FindSCEVSize() : Size(0) {}
00682 
00683   bool follow(const SCEV *S) {
00684     ++Size;
00685     // Keep looking at all operands of S.
00686     return true;
00687   }
00688   bool isDone() const {
00689     return false;
00690   }
00691 };
00692 }
00693 
00694 // Returns the size of the SCEV S.
00695 static inline int sizeOfSCEV(const SCEV *S) {
00696   FindSCEVSize F;
00697   SCEVTraversal<FindSCEVSize> ST(F);
00698   ST.visitAll(S);
00699   return F.Size;
00700 }
00701 
00702 namespace {
00703 
00704 struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> {
00705 public:
00706   // Computes the Quotient and Remainder of the division of Numerator by
00707   // Denominator.
00708   static void divide(ScalarEvolution &SE, const SCEV *Numerator,
00709                      const SCEV *Denominator, const SCEV **Quotient,
00710                      const SCEV **Remainder) {
00711     assert(Numerator && Denominator && "Uninitialized SCEV");
00712 
00713     SCEVDivision D(SE, Numerator, Denominator);
00714 
00715     // Check for the trivial case here to avoid having to check for it in the
00716     // rest of the code.
00717     if (Numerator == Denominator) {
00718       *Quotient = D.One;
00719       *Remainder = D.Zero;
00720       return;
00721     }
00722 
00723     if (Numerator->isZero()) {
00724       *Quotient = D.Zero;
00725       *Remainder = D.Zero;
00726       return;
00727     }
00728 
00729     // A simple case when N/1. The quotient is N.
00730     if (Denominator->isOne()) {
00731       *Quotient = Numerator;
00732       *Remainder = D.Zero;
00733       return;
00734     }
00735 
00736     // Split the Denominator when it is a product.
00737     if (const SCEVMulExpr *T = dyn_cast<const SCEVMulExpr>(Denominator)) {
00738       const SCEV *Q, *R;
00739       *Quotient = Numerator;
00740       for (const SCEV *Op : T->operands()) {
00741         divide(SE, *Quotient, Op, &Q, &R);
00742         *Quotient = Q;
00743 
00744         // Bail out when the Numerator is not divisible by one of the terms of
00745         // the Denominator.
00746         if (!R->isZero()) {
00747           *Quotient = D.Zero;
00748           *Remainder = Numerator;
00749           return;
00750         }
00751       }
00752       *Remainder = D.Zero;
00753       return;
00754     }
00755 
00756     D.visit(Numerator);
00757     *Quotient = D.Quotient;
00758     *Remainder = D.Remainder;
00759   }
00760 
00761   // Except in the trivial case described above, we do not know how to divide
00762   // Expr by Denominator for the following functions with empty implementation.
00763   void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {}
00764   void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {}
00765   void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {}
00766   void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
00767   void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
00768   void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
00769   void visitUnknown(const SCEVUnknown *Numerator) {}
00770   void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
00771 
00772   void visitConstant(const SCEVConstant *Numerator) {
00773     if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
00774       APInt NumeratorVal = Numerator->getValue()->getValue();
00775       APInt DenominatorVal = D->getValue()->getValue();
00776       uint32_t NumeratorBW = NumeratorVal.getBitWidth();
00777       uint32_t DenominatorBW = DenominatorVal.getBitWidth();
00778 
00779       if (NumeratorBW > DenominatorBW)
00780         DenominatorVal = DenominatorVal.sext(NumeratorBW);
00781       else if (NumeratorBW < DenominatorBW)
00782         NumeratorVal = NumeratorVal.sext(DenominatorBW);
00783 
00784       APInt QuotientVal(NumeratorVal.getBitWidth(), 0);
00785       APInt RemainderVal(NumeratorVal.getBitWidth(), 0);
00786       APInt::sdivrem(NumeratorVal, DenominatorVal, QuotientVal, RemainderVal);
00787       Quotient = SE.getConstant(QuotientVal);
00788       Remainder = SE.getConstant(RemainderVal);
00789       return;
00790     }
00791   }
00792 
00793   void visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
00794     const SCEV *StartQ, *StartR, *StepQ, *StepR;
00795     assert(Numerator->isAffine() && "Numerator should be affine");
00796     divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);
00797     divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);
00798     // Bail out if the types do not match.
00799     Type *Ty = Denominator->getType();
00800     if (Ty != StartQ->getType() || Ty != StartR->getType() ||
00801         Ty != StepQ->getType() || Ty != StepR->getType()) {
00802       Quotient = Zero;
00803       Remainder = Numerator;
00804       return;
00805     }
00806     Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
00807                                 Numerator->getNoWrapFlags());
00808     Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
00809                                  Numerator->getNoWrapFlags());
00810   }
00811 
00812   void visitAddExpr(const SCEVAddExpr *Numerator) {
00813     SmallVector<const SCEV *, 2> Qs, Rs;
00814     Type *Ty = Denominator->getType();
00815 
00816     for (const SCEV *Op : Numerator->operands()) {
00817       const SCEV *Q, *R;
00818       divide(SE, Op, Denominator, &Q, &R);
00819 
00820       // Bail out if types do not match.
00821       if (Ty != Q->getType() || Ty != R->getType()) {
00822         Quotient = Zero;
00823         Remainder = Numerator;
00824         return;
00825       }
00826 
00827       Qs.push_back(Q);
00828       Rs.push_back(R);
00829     }
00830 
00831     if (Qs.size() == 1) {
00832       Quotient = Qs[0];
00833       Remainder = Rs[0];
00834       return;
00835     }
00836 
00837     Quotient = SE.getAddExpr(Qs);
00838     Remainder = SE.getAddExpr(Rs);
00839   }
00840 
00841   void visitMulExpr(const SCEVMulExpr *Numerator) {
00842     SmallVector<const SCEV *, 2> Qs;
00843     Type *Ty = Denominator->getType();
00844 
00845     bool FoundDenominatorTerm = false;
00846     for (const SCEV *Op : Numerator->operands()) {
00847       // Bail out if types do not match.
00848       if (Ty != Op->getType()) {
00849         Quotient = Zero;
00850         Remainder = Numerator;
00851         return;
00852       }
00853 
00854       if (FoundDenominatorTerm) {
00855         Qs.push_back(Op);
00856         continue;
00857       }
00858 
00859       // Check whether Denominator divides one of the product operands.
00860       const SCEV *Q, *R;
00861       divide(SE, Op, Denominator, &Q, &R);
00862       if (!R->isZero()) {
00863         Qs.push_back(Op);
00864         continue;
00865       }
00866 
00867       // Bail out if types do not match.
00868       if (Ty != Q->getType()) {
00869         Quotient = Zero;
00870         Remainder = Numerator;
00871         return;
00872       }
00873 
00874       FoundDenominatorTerm = true;
00875       Qs.push_back(Q);
00876     }
00877 
00878     if (FoundDenominatorTerm) {
00879       Remainder = Zero;
00880       if (Qs.size() == 1)
00881         Quotient = Qs[0];
00882       else
00883         Quotient = SE.getMulExpr(Qs);
00884       return;
00885     }
00886 
00887     if (!isa<SCEVUnknown>(Denominator)) {
00888       Quotient = Zero;
00889       Remainder = Numerator;
00890       return;
00891     }
00892 
00893     // The Remainder is obtained by replacing Denominator by 0 in Numerator.
00894     ValueToValueMap RewriteMap;
00895     RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
00896         cast<SCEVConstant>(Zero)->getValue();
00897     Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
00898 
00899     if (Remainder->isZero()) {
00900       // The Quotient is obtained by replacing Denominator by 1 in Numerator.
00901       RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
00902           cast<SCEVConstant>(One)->getValue();
00903       Quotient =
00904           SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
00905       return;
00906     }
00907 
00908     // Quotient is (Numerator - Remainder) divided by Denominator.
00909     const SCEV *Q, *R;
00910     const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
00911     if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) {
00912       // This SCEV does not seem to simplify: fail the division here.
00913       Quotient = Zero;
00914       Remainder = Numerator;
00915       return;
00916     }
00917     divide(SE, Diff, Denominator, &Q, &R);
00918     assert(R == Zero &&
00919            "(Numerator - Remainder) should evenly divide Denominator");
00920     Quotient = Q;
00921   }
00922 
00923 private:
00924   SCEVDivision(ScalarEvolution &S, const SCEV *Numerator,
00925                const SCEV *Denominator)
00926       : SE(S), Denominator(Denominator) {
00927     Zero = SE.getConstant(Denominator->getType(), 0);
00928     One = SE.getConstant(Denominator->getType(), 1);
00929 
00930     // By default, we don't know how to divide Expr by Denominator.
00931     // Providing the default here simplifies the rest of the code.
00932     Quotient = Zero;
00933     Remainder = Numerator;
00934   }
00935 
00936   ScalarEvolution &SE;
00937   const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One;
00938 };
00939 
00940 }
00941 
00942 //===----------------------------------------------------------------------===//
00943 //                      Simple SCEV method implementations
00944 //===----------------------------------------------------------------------===//
00945 
00946 /// BinomialCoefficient - Compute BC(It, K).  The result has width W.
00947 /// Assume, K > 0.
00948 static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
00949                                        ScalarEvolution &SE,
00950                                        Type *ResultTy) {
00951   // Handle the simplest case efficiently.
00952   if (K == 1)
00953     return SE.getTruncateOrZeroExtend(It, ResultTy);
00954 
00955   // We are using the following formula for BC(It, K):
00956   //
00957   //   BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K!
00958   //
00959   // Suppose, W is the bitwidth of the return value.  We must be prepared for
00960   // overflow.  Hence, we must assure that the result of our computation is
00961   // equal to the accurate one modulo 2^W.  Unfortunately, division isn't
00962   // safe in modular arithmetic.
00963   //
00964   // However, this code doesn't use exactly that formula; the formula it uses
00965   // is something like the following, where T is the number of factors of 2 in
00966   // K! (i.e. trailing zeros in the binary representation of K!), and ^ is
00967   // exponentiation:
00968   //
00969   //   BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T)
00970   //
00971   // This formula is trivially equivalent to the previous formula.  However,
00972   // this formula can be implemented much more efficiently.  The trick is that
00973   // K! / 2^T is odd, and exact division by an odd number *is* safe in modular
00974   // arithmetic.  To do exact division in modular arithmetic, all we have
00975   // to do is multiply by the inverse.  Therefore, this step can be done at
00976   // width W.
00977   //
00978   // The next issue is how to safely do the division by 2^T.  The way this
00979   // is done is by doing the multiplication step at a width of at least W + T
00980   // bits.  This way, the bottom W+T bits of the product are accurate. Then,
00981   // when we perform the division by 2^T (which is equivalent to a right shift
00982   // by T), the bottom W bits are accurate.  Extra bits are okay; they'll get
00983   // truncated out after the division by 2^T.
00984   //
00985   // In comparison to just directly using the first formula, this technique
00986   // is much more efficient; using the first formula requires W * K bits,
00987   // but this formula less than W + K bits. Also, the first formula requires
00988   // a division step, whereas this formula only requires multiplies and shifts.
00989   //
00990   // It doesn't matter whether the subtraction step is done in the calculation
00991   // width or the input iteration count's width; if the subtraction overflows,
00992   // the result must be zero anyway.  We prefer here to do it in the width of
00993   // the induction variable because it helps a lot for certain cases; CodeGen
00994   // isn't smart enough to ignore the overflow, which leads to much less
00995   // efficient code if the width of the subtraction is wider than the native
00996   // register width.
00997   //
00998   // (It's possible to not widen at all by pulling out factors of 2 before
00999   // the multiplication; for example, K=2 can be calculated as
01000   // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires
01001   // extra arithmetic, so it's not an obvious win, and it gets
01002   // much more complicated for K > 3.)
01003 
01004   // Protection from insane SCEVs; this bound is conservative,
01005   // but it probably doesn't matter.
01006   if (K > 1000)
01007     return SE.getCouldNotCompute();
01008 
01009   unsigned W = SE.getTypeSizeInBits(ResultTy);
01010 
01011   // Calculate K! / 2^T and T; we divide out the factors of two before
01012   // multiplying for calculating K! / 2^T to avoid overflow.
01013   // Other overflow doesn't matter because we only care about the bottom
01014   // W bits of the result.
01015   APInt OddFactorial(W, 1);
01016   unsigned T = 1;
01017   for (unsigned i = 3; i <= K; ++i) {
01018     APInt Mult(W, i);
01019     unsigned TwoFactors = Mult.countTrailingZeros();
01020     T += TwoFactors;
01021     Mult = Mult.lshr(TwoFactors);
01022     OddFactorial *= Mult;
01023   }
01024 
01025   // We need at least W + T bits for the multiplication step
01026   unsigned CalculationBits = W + T;
01027 
01028   // Calculate 2^T, at width T+W.
01029   APInt DivFactor = APInt::getOneBitSet(CalculationBits, T);
01030 
01031   // Calculate the multiplicative inverse of K! / 2^T;
01032   // this multiplication factor will perform the exact division by
01033   // K! / 2^T.
01034   APInt Mod = APInt::getSignedMinValue(W+1);
01035   APInt MultiplyFactor = OddFactorial.zext(W+1);
01036   MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod);
01037   MultiplyFactor = MultiplyFactor.trunc(W);
01038 
01039   // Calculate the product, at width T+W
01040   IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
01041                                                       CalculationBits);
01042   const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
01043   for (unsigned i = 1; i != K; ++i) {
01044     const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i));
01045     Dividend = SE.getMulExpr(Dividend,
01046                              SE.getTruncateOrZeroExtend(S, CalculationTy));
01047   }
01048 
01049   // Divide by 2^T
01050   const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
01051 
01052   // Truncate the result, and divide by K! / 2^T.
01053 
01054   return SE.getMulExpr(SE.getConstant(MultiplyFactor),
01055                        SE.getTruncateOrZeroExtend(DivResult, ResultTy));
01056 }
01057 
01058 /// evaluateAtIteration - Return the value of this chain of recurrences at
01059 /// the specified iteration number.  We can evaluate this recurrence by
01060 /// multiplying each element in the chain by the binomial coefficient
01061 /// corresponding to it.  In other words, we can evaluate {A,+,B,+,C,+,D} as:
01062 ///
01063 ///   A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3)
01064 ///
01065 /// where BC(It, k) stands for binomial coefficient.
01066 ///
01067 const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
01068                                                 ScalarEvolution &SE) const {
01069   const SCEV *Result = getStart();
01070   for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
01071     // The computation is correct in the face of overflow provided that the
01072     // multiplication is performed _after_ the evaluation of the binomial
01073     // coefficient.
01074     const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType());
01075     if (isa<SCEVCouldNotCompute>(Coeff))
01076       return Coeff;
01077 
01078     Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff));
01079   }
01080   return Result;
01081 }
01082 
01083 //===----------------------------------------------------------------------===//
01084 //                    SCEV Expression folder implementations
01085 //===----------------------------------------------------------------------===//
01086 
01087 const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
01088                                              Type *Ty) {
01089   assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
01090          "This is not a truncating conversion!");
01091   assert(isSCEVable(Ty) &&
01092          "This is not a conversion to a SCEVable type!");
01093   Ty = getEffectiveSCEVType(Ty);
01094 
01095   FoldingSetNodeID ID;
01096   ID.AddInteger(scTruncate);
01097   ID.AddPointer(Op);
01098   ID.AddPointer(Ty);
01099   void *IP = nullptr;
01100   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
01101 
01102   // Fold if the operand is constant.
01103   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
01104     return getConstant(
01105       cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty)));
01106 
01107   // trunc(trunc(x)) --> trunc(x)
01108   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
01109     return getTruncateExpr(ST->getOperand(), Ty);
01110 
01111   // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
01112   if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
01113     return getTruncateOrSignExtend(SS->getOperand(), Ty);
01114 
01115   // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
01116   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
01117     return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
01118 
01119   // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can
01120   // eliminate all the truncates, or we replace other casts with truncates.
01121   if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) {
01122     SmallVector<const SCEV *, 4> Operands;
01123     bool hasTrunc = false;
01124     for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) {
01125       const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty);
01126       if (!isa<SCEVCastExpr>(SA->getOperand(i)))
01127         hasTrunc = isa<SCEVTruncateExpr>(S);
01128       Operands.push_back(S);
01129     }
01130     if (!hasTrunc)
01131       return getAddExpr(Operands);
01132     UniqueSCEVs.FindNodeOrInsertPos(ID, IP);  // Mutates IP, returns NULL.
01133   }
01134 
01135   // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
01136   // eliminate all the truncates, or we replace other casts with truncates.
01137   if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) {
01138     SmallVector<const SCEV *, 4> Operands;
01139     bool hasTrunc = false;
01140     for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) {
01141       const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty);
01142       if (!isa<SCEVCastExpr>(SM->getOperand(i)))
01143         hasTrunc = isa<SCEVTruncateExpr>(S);
01144       Operands.push_back(S);
01145     }
01146     if (!hasTrunc)
01147       return getMulExpr(Operands);
01148     UniqueSCEVs.FindNodeOrInsertPos(ID, IP);  // Mutates IP, returns NULL.
01149   }
01150 
01151   // If the input value is a chrec scev, truncate the chrec's operands.
01152   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
01153     SmallVector<const SCEV *, 4> Operands;
01154     for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
01155       Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty));
01156     return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
01157   }
01158 
01159   // The cast wasn't folded; create an explicit cast node. We can reuse
01160   // the existing insert position since if we get here, we won't have
01161   // made any changes which would invalidate it.
01162   SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
01163                                                  Op, Ty);
01164   UniqueSCEVs.InsertNode(S, IP);
01165   return S;
01166 }
01167 
01168 // Get the limit of a recurrence such that incrementing by Step cannot cause
01169 // signed overflow as long as the value of the recurrence within the
01170 // loop does not exceed this limit before incrementing.
01171 static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step,
01172                                                  ICmpInst::Predicate *Pred,
01173                                                  ScalarEvolution *SE) {
01174   unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
01175   if (SE->isKnownPositive(Step)) {
01176     *Pred = ICmpInst::ICMP_SLT;
01177     return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
01178                            SE->getSignedRange(Step).getSignedMax());
01179   }
01180   if (SE->isKnownNegative(Step)) {
01181     *Pred = ICmpInst::ICMP_SGT;
01182     return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
01183                            SE->getSignedRange(Step).getSignedMin());
01184   }
01185   return nullptr;
01186 }
01187 
01188 // Get the limit of a recurrence such that incrementing by Step cannot cause
01189 // unsigned overflow as long as the value of the recurrence within the loop does
01190 // not exceed this limit before incrementing.
01191 static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step,
01192                                                    ICmpInst::Predicate *Pred,
01193                                                    ScalarEvolution *SE) {
01194   unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
01195   *Pred = ICmpInst::ICMP_ULT;
01196 
01197   return SE->getConstant(APInt::getMinValue(BitWidth) -
01198                          SE->getUnsignedRange(Step).getUnsignedMax());
01199 }
01200 
01201 namespace {
01202 
01203 struct ExtendOpTraitsBase {
01204   typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *);
01205 };
01206 
01207 // Used to make code generic over signed and unsigned overflow.
01208 template <typename ExtendOp> struct ExtendOpTraits {
01209   // Members present:
01210   //
01211   // static const SCEV::NoWrapFlags WrapType;
01212   //
01213   // static const ExtendOpTraitsBase::GetExtendExprTy GetExtendExpr;
01214   //
01215   // static const SCEV *getOverflowLimitForStep(const SCEV *Step,
01216   //                                           ICmpInst::Predicate *Pred,
01217   //                                           ScalarEvolution *SE);
01218 };
01219 
01220 template <>
01221 struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase {
01222   static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW;
01223 
01224   static const GetExtendExprTy GetExtendExpr;
01225 
01226   static const SCEV *getOverflowLimitForStep(const SCEV *Step,
01227                                              ICmpInst::Predicate *Pred,
01228                                              ScalarEvolution *SE) {
01229     return getSignedOverflowLimitForStep(Step, Pred, SE);
01230   }
01231 };
01232 
01233 const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
01234     SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr;
01235 
01236 template <>
01237 struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
01238   static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW;
01239 
01240   static const GetExtendExprTy GetExtendExpr;
01241 
01242   static const SCEV *getOverflowLimitForStep(const SCEV *Step,
01243                                              ICmpInst::Predicate *Pred,
01244                                              ScalarEvolution *SE) {
01245     return getUnsignedOverflowLimitForStep(Step, Pred, SE);
01246   }
01247 };
01248 
01249 const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
01250     SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr;
01251 }
01252 
01253 // The recurrence AR has been shown to have no signed/unsigned wrap or something
01254 // close to it. Typically, if we can prove NSW/NUW for AR, then we can just as
01255 // easily prove NSW/NUW for its preincrement or postincrement sibling. This
01256 // allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step +
01257 // Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the
01258 // expression "Step + sext/zext(PreIncAR)" is congruent with
01259 // "sext/zext(PostIncAR)"
01260 template <typename ExtendOpTy>
01261 static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
01262                                         ScalarEvolution *SE) {
01263   auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
01264   auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
01265 
01266   const Loop *L = AR->getLoop();
01267   const SCEV *Start = AR->getStart();
01268   const SCEV *Step = AR->getStepRecurrence(*SE);
01269 
01270   // Check for a simple looking step prior to loop entry.
01271   const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
01272   if (!SA)
01273     return nullptr;
01274 
01275   // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
01276   // subtraction is expensive. For this purpose, perform a quick and dirty
01277   // difference, by checking for Step in the operand list.
01278   SmallVector<const SCEV *, 4> DiffOps;
01279   for (const SCEV *Op : SA->operands())
01280     if (Op != Step)
01281       DiffOps.push_back(Op);
01282 
01283   if (DiffOps.size() == SA->getNumOperands())
01284     return nullptr;
01285 
01286   // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` +
01287   // `Step`:
01288 
01289   // 1. NSW/NUW flags on the step increment.
01290   const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags());
01291   const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
01292       SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
01293 
01294   // "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies
01295   // "S+X does not sign/unsign-overflow".
01296   //
01297 
01298   const SCEV *BECount = SE->getBackedgeTakenCount(L);
01299   if (PreAR && PreAR->getNoWrapFlags(WrapType) &&
01300       !isa<SCEVCouldNotCompute>(BECount) && SE->isKnownPositive(BECount))
01301     return PreStart;
01302 
01303   // 2. Direct overflow check on the step operation's expression.
01304   unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
01305   Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
01306   const SCEV *OperandExtendedStart =
01307       SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy),
01308                      (SE->*GetExtendExpr)(Step, WideTy));
01309   if ((SE->*GetExtendExpr)(Start, WideTy) == OperandExtendedStart) {
01310     if (PreAR && AR->getNoWrapFlags(WrapType)) {
01311       // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW
01312       // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then
01313       // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`.  Cache this fact.
01314       const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(WrapType);
01315     }
01316     return PreStart;
01317   }
01318 
01319   // 3. Loop precondition.
01320   ICmpInst::Predicate Pred;
01321   const SCEV *OverflowLimit =
01322       ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE);
01323 
01324   if (OverflowLimit &&
01325       SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
01326     return PreStart;
01327   }
01328   return nullptr;
01329 }
01330 
01331 // Get the normalized zero or sign extended expression for this AddRec's Start.
01332 template <typename ExtendOpTy>
01333 static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty,
01334                                         ScalarEvolution *SE) {
01335   auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
01336 
01337   const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE);
01338   if (!PreStart)
01339     return (SE->*GetExtendExpr)(AR->getStart(), Ty);
01340 
01341   return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty),
01342                         (SE->*GetExtendExpr)(PreStart, Ty));
01343 }
01344 
01345 // Try to prove away overflow by looking at "nearby" add recurrences.  A
01346 // motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it
01347 // does not itself wrap then we can conclude that `{1,+,4}` is `nuw`.
01348 //
01349 // Formally:
01350 //
01351 //     {S,+,X} == {S-T,+,X} + T
01352 //  => Ext({S,+,X}) == Ext({S-T,+,X} + T)
01353 //
01354 // If ({S-T,+,X} + T) does not overflow  ... (1)
01355 //
01356 //  RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T)
01357 //
01358 // If {S-T,+,X} does not overflow  ... (2)
01359 //
01360 //  RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T)
01361 //      == {Ext(S-T)+Ext(T),+,Ext(X)}
01362 //
01363 // If (S-T)+T does not overflow  ... (3)
01364 //
01365 //  RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)}
01366 //      == {Ext(S),+,Ext(X)} == LHS
01367 //
01368 // Thus, if (1), (2) and (3) are true for some T, then
01369 //   Ext({S,+,X}) == {Ext(S),+,Ext(X)}
01370 //
01371 // (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T)
01372 // does not overflow" restricted to the 0th iteration.  Therefore we only need
01373 // to check for (1) and (2).
01374 //
01375 // In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T
01376 // is `Delta` (defined below).
01377 //
01378 template <typename ExtendOpTy>
01379 bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
01380                                                 const SCEV *Step,
01381                                                 const Loop *L) {
01382   auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
01383 
01384   // We restrict `Start` to a constant to prevent SCEV from spending too much
01385   // time here.  It is correct (but more expensive) to continue with a
01386   // non-constant `Start` and do a general SCEV subtraction to compute
01387   // `PreStart` below.
01388   //
01389   const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start);
01390   if (!StartC)
01391     return false;
01392 
01393   APInt StartAI = StartC->getValue()->getValue();
01394 
01395   for (unsigned Delta : {-2, -1, 1, 2}) {
01396     const SCEV *PreStart = getConstant(StartAI - Delta);
01397 
01398     // Give up if we don't already have the add recurrence we need because
01399     // actually constructing an add recurrence is relatively expensive.
01400     const SCEVAddRecExpr *PreAR = [&]() {
01401       FoldingSetNodeID ID;
01402       ID.AddInteger(scAddRecExpr);
01403       ID.AddPointer(PreStart);
01404       ID.AddPointer(Step);
01405       ID.AddPointer(L);
01406       void *IP = nullptr;
01407       return static_cast<SCEVAddRecExpr *>(
01408           this->UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
01409     }();
01410 
01411     if (PreAR && PreAR->getNoWrapFlags(WrapType)) {  // proves (2)
01412       const SCEV *DeltaS = getConstant(StartC->getType(), Delta);
01413       ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
01414       const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(
01415           DeltaS, &Pred, this);
01416       if (Limit && isKnownPredicate(Pred, PreAR, Limit))  // proves (1)
01417         return true;
01418     }
01419   }
01420 
01421   return false;
01422 }
01423 
01424 const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
01425                                                Type *Ty) {
01426   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
01427          "This is not an extending conversion!");
01428   assert(isSCEVable(Ty) &&
01429          "This is not a conversion to a SCEVable type!");
01430   Ty = getEffectiveSCEVType(Ty);
01431 
01432   // Fold if the operand is constant.
01433   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
01434     return getConstant(
01435       cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
01436 
01437   // zext(zext(x)) --> zext(x)
01438   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
01439     return getZeroExtendExpr(SZ->getOperand(), Ty);
01440 
01441   // Before doing any expensive analysis, check to see if we've already
01442   // computed a SCEV for this Op and Ty.
01443   FoldingSetNodeID ID;
01444   ID.AddInteger(scZeroExtend);
01445   ID.AddPointer(Op);
01446   ID.AddPointer(Ty);
01447   void *IP = nullptr;
01448   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
01449 
01450   // zext(trunc(x)) --> zext(x) or x or trunc(x)
01451   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
01452     // It's possible the bits taken off by the truncate were all zero bits. If
01453     // so, we should be able to simplify this further.
01454     const SCEV *X = ST->getOperand();
01455     ConstantRange CR = getUnsignedRange(X);
01456     unsigned TruncBits = getTypeSizeInBits(ST->getType());
01457     unsigned NewBits = getTypeSizeInBits(Ty);
01458     if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
01459             CR.zextOrTrunc(NewBits)))
01460       return getTruncateOrZeroExtend(X, Ty);
01461   }
01462 
01463   // If the input value is a chrec scev, and we can prove that the value
01464   // did not overflow the old, smaller, value, we can zero extend all of the
01465   // operands (often constants).  This allows analysis of something like
01466   // this:  for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
01467   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
01468     if (AR->isAffine()) {
01469       const SCEV *Start = AR->getStart();
01470       const SCEV *Step = AR->getStepRecurrence(*this);
01471       unsigned BitWidth = getTypeSizeInBits(AR->getType());
01472       const Loop *L = AR->getLoop();
01473 
01474       // If we have special knowledge that this addrec won't overflow,
01475       // we don't need to do any further analysis.
01476       if (AR->getNoWrapFlags(SCEV::FlagNUW))
01477         return getAddRecExpr(
01478             getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
01479             getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
01480 
01481       // Check whether the backedge-taken count is SCEVCouldNotCompute.
01482       // Note that this serves two purposes: It filters out loops that are
01483       // simply not analyzable, and it covers the case where this code is
01484       // being called from within backedge-taken count analysis, such that
01485       // attempting to ask for the backedge-taken count would likely result
01486       // in infinite recursion. In the later case, the analysis code will
01487       // cope with a conservative value, and it will take care to purge
01488       // that value once it has finished.
01489       const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
01490       if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
01491         // Manually compute the final value for AR, checking for
01492         // overflow.
01493 
01494         // Check whether the backedge-taken count can be losslessly casted to
01495         // the addrec's type. The count is always unsigned.
01496         const SCEV *CastedMaxBECount =
01497           getTruncateOrZeroExtend(MaxBECount, Start->getType());
01498         const SCEV *RecastedMaxBECount =
01499           getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
01500         if (MaxBECount == RecastedMaxBECount) {
01501           Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
01502           // Check whether Start+Step*MaxBECount has no unsigned overflow.
01503           const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
01504           const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul), WideTy);
01505           const SCEV *WideStart = getZeroExtendExpr(Start, WideTy);
01506           const SCEV *WideMaxBECount =
01507             getZeroExtendExpr(CastedMaxBECount, WideTy);
01508           const SCEV *OperandExtendedAdd =
01509             getAddExpr(WideStart,
01510                        getMulExpr(WideMaxBECount,
01511                                   getZeroExtendExpr(Step, WideTy)));
01512           if (ZAdd == OperandExtendedAdd) {
01513             // Cache knowledge of AR NUW, which is propagated to this AddRec.
01514             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
01515             // Return the expression with the addrec on the outside.
01516             return getAddRecExpr(
01517                 getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
01518                 getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
01519           }
01520           // Similar to above, only this time treat the step value as signed.
01521           // This covers loops that count down.
01522           OperandExtendedAdd =
01523             getAddExpr(WideStart,
01524                        getMulExpr(WideMaxBECount,
01525                                   getSignExtendExpr(Step, WideTy)));
01526           if (ZAdd == OperandExtendedAdd) {
01527             // Cache knowledge of AR NW, which is propagated to this AddRec.
01528             // Negative step causes unsigned wrap, but it still can't self-wrap.
01529             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
01530             // Return the expression with the addrec on the outside.
01531             return getAddRecExpr(
01532                 getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
01533                 getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
01534           }
01535         }
01536 
01537         // If the backedge is guarded by a comparison with the pre-inc value
01538         // the addrec is safe. Also, if the entry is guarded by a comparison
01539         // with the start value and the backedge is guarded by a comparison
01540         // with the post-inc value, the addrec is safe.
01541         if (isKnownPositive(Step)) {
01542           const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
01543                                       getUnsignedRange(Step).getUnsignedMax());
01544           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
01545               (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
01546                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
01547                                            AR->getPostIncExpr(*this), N))) {
01548             // Cache knowledge of AR NUW, which is propagated to this AddRec.
01549             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
01550             // Return the expression with the addrec on the outside.
01551             return getAddRecExpr(
01552                 getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
01553                 getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
01554           }
01555         } else if (isKnownNegative(Step)) {
01556           const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
01557                                       getSignedRange(Step).getSignedMin());
01558           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
01559               (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) &&
01560                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
01561                                            AR->getPostIncExpr(*this), N))) {
01562             // Cache knowledge of AR NW, which is propagated to this AddRec.
01563             // Negative step causes unsigned wrap, but it still can't self-wrap.
01564             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
01565             // Return the expression with the addrec on the outside.
01566             return getAddRecExpr(
01567                 getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
01568                 getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
01569           }
01570         }
01571       }
01572 
01573       if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
01574         const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
01575         return getAddRecExpr(
01576             getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
01577             getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
01578       }
01579     }
01580 
01581   // The cast wasn't folded; create an explicit cast node.
01582   // Recompute the insert position, as it may have been invalidated.
01583   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
01584   SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
01585                                                    Op, Ty);
01586   UniqueSCEVs.InsertNode(S, IP);
01587   return S;
01588 }
01589 
01590 const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
01591                                                Type *Ty) {
01592   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
01593          "This is not an extending conversion!");
01594   assert(isSCEVable(Ty) &&
01595          "This is not a conversion to a SCEVable type!");
01596   Ty = getEffectiveSCEVType(Ty);
01597 
01598   // Fold if the operand is constant.
01599   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
01600     return getConstant(
01601       cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
01602 
01603   // sext(sext(x)) --> sext(x)
01604   if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
01605     return getSignExtendExpr(SS->getOperand(), Ty);
01606 
01607   // sext(zext(x)) --> zext(x)
01608   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
01609     return getZeroExtendExpr(SZ->getOperand(), Ty);
01610 
01611   // Before doing any expensive analysis, check to see if we've already
01612   // computed a SCEV for this Op and Ty.
01613   FoldingSetNodeID ID;
01614   ID.AddInteger(scSignExtend);
01615   ID.AddPointer(Op);
01616   ID.AddPointer(Ty);
01617   void *IP = nullptr;
01618   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
01619 
01620   // If the input value is provably positive, build a zext instead.
01621   if (isKnownNonNegative(Op))
01622     return getZeroExtendExpr(Op, Ty);
01623 
01624   // sext(trunc(x)) --> sext(x) or x or trunc(x)
01625   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
01626     // It's possible the bits taken off by the truncate were all sign bits. If
01627     // so, we should be able to simplify this further.
01628     const SCEV *X = ST->getOperand();
01629     ConstantRange CR = getSignedRange(X);
01630     unsigned TruncBits = getTypeSizeInBits(ST->getType());
01631     unsigned NewBits = getTypeSizeInBits(Ty);
01632     if (CR.truncate(TruncBits).signExtend(NewBits).contains(
01633             CR.sextOrTrunc(NewBits)))
01634       return getTruncateOrSignExtend(X, Ty);
01635   }
01636 
01637   // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2
01638   if (auto SA = dyn_cast<SCEVAddExpr>(Op)) {
01639     if (SA->getNumOperands() == 2) {
01640       auto SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0));
01641       auto SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1));
01642       if (SMul && SC1) {
01643         if (auto SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) {
01644           const APInt &C1 = SC1->getValue()->getValue();
01645           const APInt &C2 = SC2->getValue()->getValue();
01646           if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&
01647               C2.ugt(C1) && C2.isPowerOf2())
01648             return getAddExpr(getSignExtendExpr(SC1, Ty),
01649                               getSignExtendExpr(SMul, Ty));
01650         }
01651       }
01652     }
01653   }
01654   // If the input value is a chrec scev, and we can prove that the value
01655   // did not overflow the old, smaller, value, we can sign extend all of the
01656   // operands (often constants).  This allows analysis of something like
01657   // this:  for (signed char X = 0; X < 100; ++X) { int Y = X; }
01658   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
01659     if (AR->isAffine()) {
01660       const SCEV *Start = AR->getStart();
01661       const SCEV *Step = AR->getStepRecurrence(*this);
01662       unsigned BitWidth = getTypeSizeInBits(AR->getType());
01663       const Loop *L = AR->getLoop();
01664 
01665       // If we have special knowledge that this addrec won't overflow,
01666       // we don't need to do any further analysis.
01667       if (AR->getNoWrapFlags(SCEV::FlagNSW))
01668         return getAddRecExpr(
01669             getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
01670             getSignExtendExpr(Step, Ty), L, SCEV::FlagNSW);
01671 
01672       // Check whether the backedge-taken count is SCEVCouldNotCompute.
01673       // Note that this serves two purposes: It filters out loops that are
01674       // simply not analyzable, and it covers the case where this code is
01675       // being called from within backedge-taken count analysis, such that
01676       // attempting to ask for the backedge-taken count would likely result
01677       // in infinite recursion. In the later case, the analysis code will
01678       // cope with a conservative value, and it will take care to purge
01679       // that value once it has finished.
01680       const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
01681       if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
01682         // Manually compute the final value for AR, checking for
01683         // overflow.
01684 
01685         // Check whether the backedge-taken count can be losslessly casted to
01686         // the addrec's type. The count is always unsigned.
01687         const SCEV *CastedMaxBECount =
01688           getTruncateOrZeroExtend(MaxBECount, Start->getType());
01689         const SCEV *RecastedMaxBECount =
01690           getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
01691         if (MaxBECount == RecastedMaxBECount) {
01692           Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
01693           // Check whether Start+Step*MaxBECount has no signed overflow.
01694           const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
01695           const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul), WideTy);
01696           const SCEV *WideStart = getSignExtendExpr(Start, WideTy);
01697           const SCEV *WideMaxBECount =
01698             getZeroExtendExpr(CastedMaxBECount, WideTy);
01699           const SCEV *OperandExtendedAdd =
01700             getAddExpr(WideStart,
01701                        getMulExpr(WideMaxBECount,
01702                                   getSignExtendExpr(Step, WideTy)));
01703           if (SAdd == OperandExtendedAdd) {
01704             // Cache knowledge of AR NSW, which is propagated to this AddRec.
01705             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
01706             // Return the expression with the addrec on the outside.
01707             return getAddRecExpr(
01708                 getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
01709                 getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
01710           }
01711           // Similar to above, only this time treat the step value as unsigned.
01712           // This covers loops that count up with an unsigned step.
01713           OperandExtendedAdd =
01714             getAddExpr(WideStart,
01715                        getMulExpr(WideMaxBECount,
01716                                   getZeroExtendExpr(Step, WideTy)));
01717           if (SAdd == OperandExtendedAdd) {
01718             // If AR wraps around then
01719             //
01720             //    abs(Step) * MaxBECount > unsigned-max(AR->getType())
01721             // => SAdd != OperandExtendedAdd
01722             //
01723             // Thus (AR is not NW => SAdd != OperandExtendedAdd) <=>
01724             // (SAdd == OperandExtendedAdd => AR is NW)
01725 
01726             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
01727 
01728             // Return the expression with the addrec on the outside.
01729             return getAddRecExpr(
01730                 getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
01731                 getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
01732           }
01733         }
01734 
01735         // If the backedge is guarded by a comparison with the pre-inc value
01736         // the addrec is safe. Also, if the entry is guarded by a comparison
01737         // with the start value and the backedge is guarded by a comparison
01738         // with the post-inc value, the addrec is safe.
01739         ICmpInst::Predicate Pred;
01740         const SCEV *OverflowLimit =
01741             getSignedOverflowLimitForStep(Step, &Pred, this);
01742         if (OverflowLimit &&
01743             (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
01744              (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
01745               isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this),
01746                                           OverflowLimit)))) {
01747           // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
01748           const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
01749           return getAddRecExpr(
01750               getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
01751               getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
01752         }
01753       }
01754       // If Start and Step are constants, check if we can apply this
01755       // transformation:
01756       // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2
01757       auto SC1 = dyn_cast<SCEVConstant>(Start);
01758       auto SC2 = dyn_cast<SCEVConstant>(Step);
01759       if (SC1 && SC2) {
01760         const APInt &C1 = SC1->getValue()->getValue();
01761         const APInt &C2 = SC2->getValue()->getValue();
01762         if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) &&
01763             C2.isPowerOf2()) {
01764           Start = getSignExtendExpr(Start, Ty);
01765           const SCEV *NewAR = getAddRecExpr(getConstant(AR->getType(), 0), Step,
01766                                             L, AR->getNoWrapFlags());
01767           return getAddExpr(Start, getSignExtendExpr(NewAR, Ty));
01768         }
01769       }
01770 
01771       if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
01772         const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
01773         return getAddRecExpr(
01774             getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
01775             getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
01776       }
01777     }
01778 
01779   // The cast wasn't folded; create an explicit cast node.
01780   // Recompute the insert position, as it may have been invalidated.
01781   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
01782   SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
01783                                                    Op, Ty);
01784   UniqueSCEVs.InsertNode(S, IP);
01785   return S;
01786 }
01787 
01788 /// getAnyExtendExpr - Return a SCEV for the given operand extended with
01789 /// unspecified bits out to the given type.
01790 ///
01791 const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
01792                                               Type *Ty) {
01793   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
01794          "This is not an extending conversion!");
01795   assert(isSCEVable(Ty) &&
01796          "This is not a conversion to a SCEVable type!");
01797   Ty = getEffectiveSCEVType(Ty);
01798 
01799   // Sign-extend negative constants.
01800   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
01801     if (SC->getValue()->getValue().isNegative())
01802       return getSignExtendExpr(Op, Ty);
01803 
01804   // Peel off a truncate cast.
01805   if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
01806     const SCEV *NewOp = T->getOperand();
01807     if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
01808       return getAnyExtendExpr(NewOp, Ty);
01809     return getTruncateOrNoop(NewOp, Ty);
01810   }
01811 
01812   // Next try a zext cast. If the cast is folded, use it.
01813   const SCEV *ZExt = getZeroExtendExpr(Op, Ty);
01814   if (!isa<SCEVZeroExtendExpr>(ZExt))
01815     return ZExt;
01816 
01817   // Next try a sext cast. If the cast is folded, use it.
01818   const SCEV *SExt = getSignExtendExpr(Op, Ty);
01819   if (!isa<SCEVSignExtendExpr>(SExt))
01820     return SExt;
01821 
01822   // Force the cast to be folded into the operands of an addrec.
01823   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
01824     SmallVector<const SCEV *, 4> Ops;
01825     for (const SCEV *Op : AR->operands())
01826       Ops.push_back(getAnyExtendExpr(Op, Ty));
01827     return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
01828   }
01829 
01830   // If the expression is obviously signed, use the sext cast value.
01831   if (isa<SCEVSMaxExpr>(Op))
01832     return SExt;
01833 
01834   // Absent any other information, use the zext cast value.
01835   return ZExt;
01836 }
01837 
01838 /// CollectAddOperandsWithScales - Process the given Ops list, which is
01839 /// a list of operands to be added under the given scale, update the given
01840 /// map. This is a helper function for getAddRecExpr. As an example of
01841 /// what it does, given a sequence of operands that would form an add
01842 /// expression like this:
01843 ///
01844 ///    m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r)
01845 ///
01846 /// where A and B are constants, update the map with these values:
01847 ///
01848 ///    (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0)
01849 ///
01850 /// and add 13 + A*B*29 to AccumulatedConstant.
01851 /// This will allow getAddRecExpr to produce this:
01852 ///
01853 ///    13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B)
01854 ///
01855 /// This form often exposes folding opportunities that are hidden in
01856 /// the original operand list.
01857 ///
01858 /// Return true iff it appears that any interesting folding opportunities
01859 /// may be exposed. This helps getAddRecExpr short-circuit extra work in
01860 /// the common case where no interesting opportunities are present, and
01861 /// is also used as a check to avoid infinite recursion.
01862 ///
01863 static bool
01864 CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
01865                              SmallVectorImpl<const SCEV *> &NewOps,
01866                              APInt &AccumulatedConstant,
01867                              const SCEV *const *Ops, size_t NumOperands,
01868                              const APInt &Scale,
01869                              ScalarEvolution &SE) {
01870   bool Interesting = false;
01871 
01872   // Iterate over the add operands. They are sorted, with constants first.
01873   unsigned i = 0;
01874   while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
01875     ++i;
01876     // Pull a buried constant out to the outside.
01877     if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
01878       Interesting = true;
01879     AccumulatedConstant += Scale * C->getValue()->getValue();
01880   }
01881 
01882   // Next comes everything else. We're especially interested in multiplies
01883   // here, but they're in the middle, so just visit the rest with one loop.
01884   for (; i != NumOperands; ++i) {
01885     const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
01886     if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
01887       APInt NewScale =
01888         Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue();
01889       if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
01890         // A multiplication of a constant with another add; recurse.
01891         const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
01892         Interesting |=
01893           CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
01894                                        Add->op_begin(), Add->getNumOperands(),
01895                                        NewScale, SE);
01896       } else {
01897         // A multiplication of a constant with some other value. Update
01898         // the map.
01899         SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
01900         const SCEV *Key = SE.getMulExpr(MulOps);
01901         std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
01902           M.insert(std::make_pair(Key, NewScale));
01903         if (Pair.second) {
01904           NewOps.push_back(Pair.first->first);
01905         } else {
01906           Pair.first->second += NewScale;
01907           // The map already had an entry for this value, which may indicate
01908           // a folding opportunity.
01909           Interesting = true;
01910         }
01911       }
01912     } else {
01913       // An ordinary operand. Update the map.
01914       std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
01915         M.insert(std::make_pair(Ops[i], Scale));
01916       if (Pair.second) {
01917         NewOps.push_back(Pair.first->first);
01918       } else {
01919         Pair.first->second += Scale;
01920         // The map already had an entry for this value, which may indicate
01921         // a folding opportunity.
01922         Interesting = true;
01923       }
01924     }
01925   }
01926 
01927   return Interesting;
01928 }
01929 
01930 namespace {
01931   struct APIntCompare {
01932     bool operator()(const APInt &LHS, const APInt &RHS) const {
01933       return LHS.ult(RHS);
01934     }
01935   };
01936 }
01937 
01938 // We're trying to construct a SCEV of type `Type' with `Ops' as operands and
01939 // `OldFlags' as can't-wrap behavior.  Infer a more aggressive set of
01940 // can't-overflow flags for the operation if possible.
01941 static SCEV::NoWrapFlags
01942 StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
01943                       const SmallVectorImpl<const SCEV *> &Ops,
01944                       SCEV::NoWrapFlags OldFlags) {
01945   using namespace std::placeholders;
01946 
01947   bool CanAnalyze =
01948       Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr;
01949   (void)CanAnalyze;
01950   assert(CanAnalyze && "don't call from other places!");
01951 
01952   int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
01953   SCEV::NoWrapFlags SignOrUnsignWrap =
01954       ScalarEvolution::maskFlags(OldFlags, SignOrUnsignMask);
01955 
01956   // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
01957   auto IsKnownNonNegative =
01958     std::bind(std::mem_fn(&ScalarEvolution::isKnownNonNegative), SE, _1);
01959 
01960   if (SignOrUnsignWrap == SCEV::FlagNSW &&
01961       std::all_of(Ops.begin(), Ops.end(), IsKnownNonNegative))
01962     return ScalarEvolution::setFlags(OldFlags,
01963                                      (SCEV::NoWrapFlags)SignOrUnsignMask);
01964 
01965   return OldFlags;
01966 }
01967 
01968 /// getAddExpr - Get a canonical add expression, or something simpler if
01969 /// possible.
01970 const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
01971                                         SCEV::NoWrapFlags Flags) {
01972   assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
01973          "only nuw or nsw allowed");
01974   assert(!Ops.empty() && "Cannot get empty add!");
01975   if (Ops.size() == 1) return Ops[0];
01976 #ifndef NDEBUG
01977   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
01978   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
01979     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
01980            "SCEVAddExpr operand types don't match!");
01981 #endif
01982 
01983   Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);
01984 
01985   // Sort by complexity, this groups all similar expression types together.
01986   GroupByComplexity(Ops, LI);
01987 
01988   // If there are any constants, fold them together.
01989   unsigned Idx = 0;
01990   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
01991     ++Idx;
01992     assert(Idx < Ops.size());
01993     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
01994       // We found two constants, fold them together!
01995       Ops[0] = getConstant(LHSC->getValue()->getValue() +
01996                            RHSC->getValue()->getValue());
01997       if (Ops.size() == 2) return Ops[0];
01998       Ops.erase(Ops.begin()+1);  // Erase the folded element
01999       LHSC = cast<SCEVConstant>(Ops[0]);
02000     }
02001 
02002     // If we are left with a constant zero being added, strip it off.
02003     if (LHSC->getValue()->isZero()) {
02004       Ops.erase(Ops.begin());
02005       --Idx;
02006     }
02007 
02008     if (Ops.size() == 1) return Ops[0];
02009   }
02010 
02011   // Okay, check to see if the same value occurs in the operand list more than
02012   // once.  If so, merge them together into an multiply expression.  Since we
02013   // sorted the list, these values are required to be adjacent.
02014   Type *Ty = Ops[0]->getType();
02015   bool FoundMatch = false;
02016   for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
02017     if (Ops[i] == Ops[i+1]) {      //  X + Y + Y  -->  X + Y*2
02018       // Scan ahead to count how many equal operands there are.
02019       unsigned Count = 2;
02020       while (i+Count != e && Ops[i+Count] == Ops[i])
02021         ++Count;
02022       // Merge the values into a multiply.
02023       const SCEV *Scale = getConstant(Ty, Count);
02024       const SCEV *Mul = getMulExpr(Scale, Ops[i]);
02025       if (Ops.size() == Count)
02026         return Mul;
02027       Ops[i] = Mul;
02028       Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
02029       --i; e -= Count - 1;
02030       FoundMatch = true;
02031     }
02032   if (FoundMatch)
02033     return getAddExpr(Ops, Flags);
02034 
02035   // Check for truncates. If all the operands are truncated from the same
02036   // type, see if factoring out the truncate would permit the result to be
02037   // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n)
02038   // if the contents of the resulting outer trunc fold to something simple.
02039   for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) {
02040     const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
02041     Type *DstType = Trunc->getType();
02042     Type *SrcType = Trunc->getOperand()->getType();
02043     SmallVector<const SCEV *, 8> LargeOps;
02044     bool Ok = true;
02045     // Check all the operands to see if they can be represented in the
02046     // source type of the truncate.
02047     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
02048       if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) {
02049         if (T->getOperand()->getType() != SrcType) {
02050           Ok = false;
02051           break;
02052         }
02053         LargeOps.push_back(T->getOperand());
02054       } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
02055         LargeOps.push_back(getAnyExtendExpr(C, SrcType));
02056       } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
02057         SmallVector<const SCEV *, 8> LargeMulOps;
02058         for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
02059           if (const SCEVTruncateExpr *T =
02060                 dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
02061             if (T->getOperand()->getType() != SrcType) {
02062               Ok = false;
02063               break;
02064             }
02065             LargeMulOps.push_back(T->getOperand());
02066           } else if (const SCEVConstant *C =
02067                        dyn_cast<SCEVConstant>(M->getOperand(j))) {
02068             LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
02069           } else {
02070             Ok = false;
02071             break;
02072           }
02073         }
02074         if (Ok)
02075           LargeOps.push_back(getMulExpr(LargeMulOps));
02076       } else {
02077         Ok = false;
02078         break;
02079       }
02080     }
02081     if (Ok) {
02082       // Evaluate the expression in the larger type.
02083       const SCEV *Fold = getAddExpr(LargeOps, Flags);
02084       // If it folds to something simple, use it. Otherwise, don't.
02085       if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
02086         return getTruncateExpr(Fold, DstType);
02087     }
02088   }
02089 
02090   // Skip past any other cast SCEVs.
02091   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
02092     ++Idx;
02093 
02094   // If there are add operands they would be next.
02095   if (Idx < Ops.size()) {
02096     bool DeletedAdd = false;
02097     while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
02098       // If we have an add, expand the add operands onto the end of the operands
02099       // list.
02100       Ops.erase(Ops.begin()+Idx);
02101       Ops.append(Add->op_begin(), Add->op_end());
02102       DeletedAdd = true;
02103     }
02104 
02105     // If we deleted at least one add, we added operands to the end of the list,
02106     // and they are not necessarily sorted.  Recurse to resort and resimplify
02107     // any operands we just acquired.
02108     if (DeletedAdd)
02109       return getAddExpr(Ops);
02110   }
02111 
02112   // Skip over the add expression until we get to a multiply.
02113   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
02114     ++Idx;
02115 
02116   // Check to see if there are any folding opportunities present with
02117   // operands multiplied by constant values.
02118   if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
02119     uint64_t BitWidth = getTypeSizeInBits(Ty);
02120     DenseMap<const SCEV *, APInt> M;
02121     SmallVector<const SCEV *, 8> NewOps;
02122     APInt AccumulatedConstant(BitWidth, 0);
02123     if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
02124                                      Ops.data(), Ops.size(),
02125                                      APInt(BitWidth, 1), *this)) {
02126       // Some interesting folding opportunity is present, so its worthwhile to
02127       // re-generate the operands list. Group the operands by constant scale,
02128       // to avoid multiplying by the same constant scale multiple times.
02129       std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
02130       for (SmallVectorImpl<const SCEV *>::const_iterator I = NewOps.begin(),
02131            E = NewOps.end(); I != E; ++I)
02132         MulOpLists[M.find(*I)->second].push_back(*I);
02133       // Re-generate the operands list.
02134       Ops.clear();
02135       if (AccumulatedConstant != 0)
02136         Ops.push_back(getConstant(AccumulatedConstant));
02137       for (std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare>::iterator
02138            I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I)
02139         if (I->first != 0)
02140           Ops.push_back(getMulExpr(getConstant(I->first),
02141                                    getAddExpr(I->second)));
02142       if (Ops.empty())
02143         return getConstant(Ty, 0);
02144       if (Ops.size() == 1)
02145         return Ops[0];
02146       return getAddExpr(Ops);
02147     }
02148   }
02149 
02150   // If we are adding something to a multiply expression, make sure the
02151   // something is not already an operand of the multiply.  If so, merge it into
02152   // the multiply.
02153   for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) {
02154     const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
02155     for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
02156       const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
02157       if (isa<SCEVConstant>(MulOpSCEV))
02158         continue;
02159       for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
02160         if (MulOpSCEV == Ops[AddOp]) {
02161           // Fold W + X + (X * Y * Z)  -->  W + (X * ((Y*Z)+1))
02162           const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
02163           if (Mul->getNumOperands() != 2) {
02164             // If the multiply has more than two operands, we must get the
02165             // Y*Z term.
02166             SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
02167                                                 Mul->op_begin()+MulOp);
02168             MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
02169             InnerMul = getMulExpr(MulOps);
02170           }
02171           const SCEV *One = getConstant(Ty, 1);
02172           const SCEV *AddOne = getAddExpr(One, InnerMul);
02173           const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
02174           if (Ops.size() == 2) return OuterMul;
02175           if (AddOp < Idx) {
02176             Ops.erase(Ops.begin()+AddOp);
02177             Ops.erase(Ops.begin()+Idx-1);
02178           } else {
02179             Ops.erase(Ops.begin()+Idx);
02180             Ops.erase(Ops.begin()+AddOp-1);
02181           }
02182           Ops.push_back(OuterMul);
02183           return getAddExpr(Ops);
02184         }
02185 
02186       // Check this multiply against other multiplies being added together.
02187       for (unsigned OtherMulIdx = Idx+1;
02188            OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]);
02189            ++OtherMulIdx) {
02190         const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
02191         // If MulOp occurs in OtherMul, we can fold the two multiplies
02192         // together.
02193         for (unsigned OMulOp = 0, e = OtherMul->getNumOperands();
02194              OMulOp != e; ++OMulOp)
02195           if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
02196             // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
02197             const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
02198             if (Mul->getNumOperands() != 2) {
02199               SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
02200                                                   Mul->op_begin()+MulOp);
02201               MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
02202               InnerMul1 = getMulExpr(MulOps);
02203             }
02204             const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
02205             if (OtherMul->getNumOperands() != 2) {
02206               SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
02207                                                   OtherMul->op_begin()+OMulOp);
02208               MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
02209               InnerMul2 = getMulExpr(MulOps);
02210             }
02211             const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
02212             const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
02213             if (Ops.size() == 2) return OuterMul;
02214             Ops.erase(Ops.begin()+Idx);
02215             Ops.erase(Ops.begin()+OtherMulIdx-1);
02216             Ops.push_back(OuterMul);
02217             return getAddExpr(Ops);
02218           }
02219       }
02220     }
02221   }
02222 
02223   // If there are any add recurrences in the operands list, see if any other
02224   // added values are loop invariant.  If so, we can fold them into the
02225   // recurrence.
02226   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
02227     ++Idx;
02228 
02229   // Scan over all recurrences, trying to fold loop invariants into them.
02230   for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
02231     // Scan all of the other operands to this add and add them to the vector if
02232     // they are loop invariant w.r.t. the recurrence.
02233     SmallVector<const SCEV *, 8> LIOps;
02234     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
02235     const Loop *AddRecLoop = AddRec->getLoop();
02236     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
02237       if (isLoopInvariant(Ops[i], AddRecLoop)) {
02238         LIOps.push_back(Ops[i]);
02239         Ops.erase(Ops.begin()+i);
02240         --i; --e;
02241       }
02242 
02243     // If we found some loop invariants, fold them into the recurrence.
02244     if (!LIOps.empty()) {
02245       //  NLI + LI + {Start,+,Step}  -->  NLI + {LI+Start,+,Step}
02246       LIOps.push_back(AddRec->getStart());
02247 
02248       SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
02249                                              AddRec->op_end());
02250       AddRecOps[0] = getAddExpr(LIOps);
02251 
02252       // Build the new addrec. Propagate the NUW and NSW flags if both the
02253       // outer add and the inner addrec are guaranteed to have no overflow.
02254       // Always propagate NW.
02255       Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW));
02256       const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags);
02257 
02258       // If all of the other operands were loop invariant, we are done.
02259       if (Ops.size() == 1) return NewRec;
02260 
02261       // Otherwise, add the folded AddRec by the non-invariant parts.
02262       for (unsigned i = 0;; ++i)
02263         if (Ops[i] == AddRec) {
02264           Ops[i] = NewRec;
02265           break;
02266         }
02267       return getAddExpr(Ops);
02268     }
02269 
02270     // Okay, if there weren't any loop invariants to be folded, check to see if
02271     // there are multiple AddRec's with the same loop induction variable being
02272     // added together.  If so, we can fold them.
02273     for (unsigned OtherIdx = Idx+1;
02274          OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
02275          ++OtherIdx)
02276       if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
02277         // Other + {A,+,B}<L> + {C,+,D}<L>  -->  Other + {A+C,+,B+D}<L>
02278         SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
02279                                                AddRec->op_end());
02280         for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
02281              ++OtherIdx)
02282           if (const SCEVAddRecExpr *OtherAddRec =
02283                 dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
02284             if (OtherAddRec->getLoop() == AddRecLoop) {
02285               for (unsigned i = 0, e = OtherAddRec->getNumOperands();
02286                    i != e; ++i) {
02287                 if (i >= AddRecOps.size()) {
02288                   AddRecOps.append(OtherAddRec->op_begin()+i,
02289                                    OtherAddRec->op_end());
02290                   break;
02291                 }
02292                 AddRecOps[i] = getAddExpr(AddRecOps[i],
02293                                           OtherAddRec->getOperand(i));
02294               }
02295               Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
02296             }
02297         // Step size has changed, so we cannot guarantee no self-wraparound.
02298         Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
02299         return getAddExpr(Ops);
02300       }
02301 
02302     // Otherwise couldn't fold anything into this recurrence.  Move onto the
02303     // next one.
02304   }
02305 
02306   // Okay, it looks like we really DO need an add expr.  Check to see if we
02307   // already have one, otherwise create a new one.
02308   FoldingSetNodeID ID;
02309   ID.AddInteger(scAddExpr);
02310   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
02311     ID.AddPointer(Ops[i]);
02312   void *IP = nullptr;
02313   SCEVAddExpr *S =
02314     static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
02315   if (!S) {
02316     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
02317     std::uninitialized_copy(Ops.begin(), Ops.end(), O);
02318     S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator),
02319                                         O, Ops.size());
02320     UniqueSCEVs.InsertNode(S, IP);
02321   }
02322   S->setNoWrapFlags(Flags);
02323   return S;
02324 }
02325 
02326 static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) {
02327   uint64_t k = i*j;
02328   if (j > 1 && k / j != i) Overflow = true;
02329   return k;
02330 }
02331 
02332 /// Compute the result of "n choose k", the binomial coefficient.  If an
02333 /// intermediate computation overflows, Overflow will be set and the return will
02334 /// be garbage. Overflow is not cleared on absence of overflow.
02335 static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
02336   // We use the multiplicative formula:
02337   //     n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 .
02338   // At each iteration, we take the n-th term of the numeral and divide by the
02339   // (k-n)th term of the denominator.  This division will always produce an
02340   // integral result, and helps reduce the chance of overflow in the
02341   // intermediate computations. However, we can still overflow even when the
02342   // final result would fit.
02343 
02344   if (n == 0 || n == k) return 1;
02345   if (k > n) return 0;
02346 
02347   if (k > n/2)
02348     k = n-k;
02349 
02350   uint64_t r = 1;
02351   for (uint64_t i = 1; i <= k; ++i) {
02352     r = umul_ov(r, n-(i-1), Overflow);
02353     r /= i;
02354   }
02355   return r;
02356 }
02357 
02358 /// Determine if any of the operands in this SCEV are a constant or if
02359 /// any of the add or multiply expressions in this SCEV contain a constant.
02360 static bool containsConstantSomewhere(const SCEV *StartExpr) {
02361   SmallVector<const SCEV *, 4> Ops;
02362   Ops.push_back(StartExpr);
02363   while (!Ops.empty()) {
02364     const SCEV *CurrentExpr = Ops.pop_back_val();
02365     if (isa<SCEVConstant>(*CurrentExpr))
02366       return true;
02367 
02368     if (isa<SCEVAddExpr>(*CurrentExpr) || isa<SCEVMulExpr>(*CurrentExpr)) {
02369       const auto *CurrentNAry = cast<SCEVNAryExpr>(CurrentExpr);
02370       Ops.append(CurrentNAry->op_begin(), CurrentNAry->op_end());
02371     }
02372   }
02373   return false;
02374 }
02375 
02376 /// getMulExpr - Get a canonical multiply expression, or something simpler if
02377 /// possible.
02378 const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
02379                                         SCEV::NoWrapFlags Flags) {
02380   assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) &&
02381          "only nuw or nsw allowed");
02382   assert(!Ops.empty() && "Cannot get empty mul!");
02383   if (Ops.size() == 1) return Ops[0];
02384 #ifndef NDEBUG
02385   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
02386   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
02387     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
02388            "SCEVMulExpr operand types don't match!");
02389 #endif
02390 
02391   Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
02392 
02393   // Sort by complexity, this groups all similar expression types together.
02394   GroupByComplexity(Ops, LI);
02395 
02396   // If there are any constants, fold them together.
02397   unsigned Idx = 0;
02398   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
02399 
02400     // C1*(C2+V) -> C1*C2 + C1*V
02401     if (Ops.size() == 2)
02402         if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
02403           // If any of Add's ops are Adds or Muls with a constant,
02404           // apply this transformation as well.
02405           if (Add->getNumOperands() == 2)
02406             if (containsConstantSomewhere(Add))
02407               return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)),
02408                                 getMulExpr(LHSC, Add->getOperand(1)));
02409 
02410     ++Idx;
02411     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
02412       // We found two constants, fold them together!
02413       ConstantInt *Fold = ConstantInt::get(getContext(),
02414                                            LHSC->getValue()->getValue() *
02415                                            RHSC->getValue()->getValue());
02416       Ops[0] = getConstant(Fold);
02417       Ops.erase(Ops.begin()+1);  // Erase the folded element
02418       if (Ops.size() == 1) return Ops[0];
02419       LHSC = cast<SCEVConstant>(Ops[0]);
02420     }
02421 
02422     // If we are left with a constant one being multiplied, strip it off.
02423     if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) {
02424       Ops.erase(Ops.begin());
02425       --Idx;
02426     } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
02427       // If we have a multiply of zero, it will always be zero.
02428       return Ops[0];
02429     } else if (Ops[0]->isAllOnesValue()) {
02430       // If we have a mul by -1 of an add, try distributing the -1 among the
02431       // add operands.
02432       if (Ops.size() == 2) {
02433         if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
02434           SmallVector<const SCEV *, 4> NewOps;
02435           bool AnyFolded = false;
02436           for (SCEVAddRecExpr::op_iterator I = Add->op_begin(),
02437                  E = Add->op_end(); I != E; ++I) {
02438             const SCEV *Mul = getMulExpr(Ops[0], *I);
02439             if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
02440             NewOps.push_back(Mul);
02441           }
02442           if (AnyFolded)
02443             return getAddExpr(NewOps);
02444         }
02445         else if (const SCEVAddRecExpr *
02446                  AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
02447           // Negation preserves a recurrence's no self-wrap property.
02448           SmallVector<const SCEV *, 4> Operands;
02449           for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(),
02450                  E = AddRec->op_end(); I != E; ++I) {
02451             Operands.push_back(getMulExpr(Ops[0], *I));
02452           }
02453           return getAddRecExpr(Operands, AddRec->getLoop(),
02454                                AddRec->getNoWrapFlags(SCEV::FlagNW));
02455         }
02456       }
02457     }
02458 
02459     if (Ops.size() == 1)
02460       return Ops[0];
02461   }
02462 
02463   // Skip over the add expression until we get to a multiply.
02464   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
02465     ++Idx;
02466 
02467   // If there are mul operands inline them all into this expression.
02468   if (Idx < Ops.size()) {
02469     bool DeletedMul = false;
02470     while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
02471       // If we have an mul, expand the mul operands onto the end of the operands
02472       // list.
02473       Ops.erase(Ops.begin()+Idx);
02474       Ops.append(Mul->op_begin(), Mul->op_end());
02475       DeletedMul = true;
02476     }
02477 
02478     // If we deleted at least one mul, we added operands to the end of the list,
02479     // and they are not necessarily sorted.  Recurse to resort and resimplify
02480     // any operands we just acquired.
02481     if (DeletedMul)
02482       return getMulExpr(Ops);
02483   }
02484 
02485   // If there are any add recurrences in the operands list, see if any other
02486   // added values are loop invariant.  If so, we can fold them into the
02487   // recurrence.
02488   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
02489     ++Idx;
02490 
02491   // Scan over all recurrences, trying to fold loop invariants into them.
02492   for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
02493     // Scan all of the other operands to this mul and add them to the vector if
02494     // they are loop invariant w.r.t. the recurrence.
02495     SmallVector<const SCEV *, 8> LIOps;
02496     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
02497     const Loop *AddRecLoop = AddRec->getLoop();
02498     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
02499       if (isLoopInvariant(Ops[i], AddRecLoop)) {
02500         LIOps.push_back(Ops[i]);
02501         Ops.erase(Ops.begin()+i);
02502         --i; --e;
02503       }
02504 
02505     // If we found some loop invariants, fold them into the recurrence.
02506     if (!LIOps.empty()) {
02507       //  NLI * LI * {Start,+,Step}  -->  NLI * {LI*Start,+,LI*Step}
02508       SmallVector<const SCEV *, 4> NewOps;
02509       NewOps.reserve(AddRec->getNumOperands());
02510       const SCEV *Scale = getMulExpr(LIOps);
02511       for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
02512         NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
02513 
02514       // Build the new addrec. Propagate the NUW and NSW flags if both the
02515       // outer mul and the inner addrec are guaranteed to have no overflow.
02516       //
02517       // No self-wrap cannot be guaranteed after changing the step size, but
02518       // will be inferred if either NUW or NSW is true.
02519       Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW));
02520       const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags);
02521 
02522       // If all of the other operands were loop invariant, we are done.
02523       if (Ops.size() == 1) return NewRec;
02524 
02525       // Otherwise, multiply the folded AddRec by the non-invariant parts.
02526       for (unsigned i = 0;; ++i)
02527         if (Ops[i] == AddRec) {
02528           Ops[i] = NewRec;
02529           break;
02530         }
02531       return getMulExpr(Ops);
02532     }
02533 
02534     // Okay, if there weren't any loop invariants to be folded, check to see if
02535     // there are multiple AddRec's with the same loop induction variable being
02536     // multiplied together.  If so, we can fold them.
02537 
02538     // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
02539     // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
02540     //       choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
02541     //   ]]],+,...up to x=2n}.
02542     // Note that the arguments to choose() are always integers with values
02543     // known at compile time, never SCEV objects.
02544     //
02545     // The implementation avoids pointless extra computations when the two
02546     // addrec's are of different length (mathematically, it's equivalent to
02547     // an infinite stream of zeros on the right).
02548     bool OpsModified = false;
02549     for (unsigned OtherIdx = Idx+1;
02550          OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
02551          ++OtherIdx) {
02552       const SCEVAddRecExpr *OtherAddRec =
02553         dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
02554       if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
02555         continue;
02556 
02557       bool Overflow = false;
02558       Type *Ty = AddRec->getType();
02559       bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
02560       SmallVector<const SCEV*, 7> AddRecOps;
02561       for (int x = 0, xe = AddRec->getNumOperands() +
02562              OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
02563         const SCEV *Term = getConstant(Ty, 0);
02564         for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
02565           uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
02566           for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
02567                  ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
02568                z < ze && !Overflow; ++z) {
02569             uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
02570             uint64_t Coeff;
02571             if (LargerThan64Bits)
02572               Coeff = umul_ov(Coeff1, Coeff2, Overflow);
02573             else
02574               Coeff = Coeff1*Coeff2;
02575             const SCEV *CoeffTerm = getConstant(Ty, Coeff);
02576             const SCEV *Term1 = AddRec->getOperand(y-z);
02577             const SCEV *Term2 = OtherAddRec->getOperand(z);
02578             Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
02579           }
02580         }
02581         AddRecOps.push_back(Term);
02582       }
02583       if (!Overflow) {
02584         const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
02585                                               SCEV::FlagAnyWrap);
02586         if (Ops.size() == 2) return NewAddRec;
02587         Ops[Idx] = NewAddRec;
02588         Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
02589         OpsModified = true;
02590         AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
02591         if (!AddRec)
02592           break;
02593       }
02594     }
02595     if (OpsModified)
02596       return getMulExpr(Ops);
02597 
02598     // Otherwise couldn't fold anything into this recurrence.  Move onto the
02599     // next one.
02600   }
02601 
02602   // Okay, it looks like we really DO need an mul expr.  Check to see if we
02603   // already have one, otherwise create a new one.
02604   FoldingSetNodeID ID;
02605   ID.AddInteger(scMulExpr);
02606   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
02607     ID.AddPointer(Ops[i]);
02608   void *IP = nullptr;
02609   SCEVMulExpr *S =
02610     static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
02611   if (!S) {
02612     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
02613     std::uninitialized_copy(Ops.begin(), Ops.end(), O);
02614     S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
02615                                         O, Ops.size());
02616     UniqueSCEVs.InsertNode(S, IP);
02617   }
02618   S->setNoWrapFlags(Flags);
02619   return S;
02620 }
02621 
02622 /// getUDivExpr - Get a canonical unsigned division expression, or something
02623 /// simpler if possible.
02624 const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
02625                                          const SCEV *RHS) {
02626   assert(getEffectiveSCEVType(LHS->getType()) ==
02627          getEffectiveSCEVType(RHS->getType()) &&
02628          "SCEVUDivExpr operand types don't match!");
02629 
02630   if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
02631     if (RHSC->getValue()->equalsInt(1))
02632       return LHS;                               // X udiv 1 --> x
02633     // If the denominator is zero, the result of the udiv is undefined. Don't
02634     // try to analyze it, because the resolution chosen here may differ from
02635     // the resolution chosen in other parts of the compiler.
02636     if (!RHSC->getValue()->isZero()) {
02637       // Determine if the division can be folded into the operands of
02638       // its operands.
02639       // TODO: Generalize this to non-constants by using known-bits information.
02640       Type *Ty = LHS->getType();
02641       unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
02642       unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
02643       // For non-power-of-two values, effectively round the value up to the
02644       // nearest power of two.
02645       if (!RHSC->getValue()->getValue().isPowerOf2())
02646         ++MaxShiftAmt;
02647       IntegerType *ExtTy =
02648         IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
02649       if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
02650         if (const SCEVConstant *Step =
02651             dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {
02652           // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
02653           const APInt &StepInt = Step->getValue()->getValue();
02654           const APInt &DivInt = RHSC->getValue()->getValue();
02655           if (!StepInt.urem(DivInt) &&
02656               getZeroExtendExpr(AR, ExtTy) ==
02657               getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
02658                             getZeroExtendExpr(Step, ExtTy),
02659                             AR->getLoop(), SCEV::FlagAnyWrap)) {
02660             SmallVector<const SCEV *, 4> Operands;
02661             for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
02662               Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
02663             return getAddRecExpr(Operands, AR->getLoop(),
02664                                  SCEV::FlagNW);
02665           }
02666           /// Get a canonical UDivExpr for a recurrence.
02667           /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
02668           // We can currently only fold X%N if X is constant.
02669           const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
02670           if (StartC && !DivInt.urem(StepInt) &&
02671               getZeroExtendExpr(AR, ExtTy) ==
02672               getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
02673                             getZeroExtendExpr(Step, ExtTy),
02674                             AR->getLoop(), SCEV::FlagAnyWrap)) {
02675             const APInt &StartInt = StartC->getValue()->getValue();
02676             const APInt &StartRem = StartInt.urem(StepInt);
02677             if (StartRem != 0)
02678               LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step,
02679                                   AR->getLoop(), SCEV::FlagNW);
02680           }
02681         }
02682       // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
02683       if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
02684         SmallVector<const SCEV *, 4> Operands;
02685         for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
02686           Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
02687         if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
02688           // Find an operand that's safely divisible.
02689           for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
02690             const SCEV *Op = M->getOperand(i);
02691             const SCEV *Div = getUDivExpr(Op, RHSC);
02692             if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
02693               Operands = SmallVector<const SCEV *, 4>(M->op_begin(),
02694                                                       M->op_end());
02695               Operands[i] = Div;
02696               return getMulExpr(Operands);
02697             }
02698           }
02699       }
02700       // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
02701       if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
02702         SmallVector<const SCEV *, 4> Operands;
02703         for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
02704           Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
02705         if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
02706           Operands.clear();
02707           for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
02708             const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
02709             if (isa<SCEVUDivExpr>(Op) ||
02710                 getMulExpr(Op, RHS) != A->getOperand(i))
02711               break;
02712             Operands.push_back(Op);
02713           }
02714           if (Operands.size() == A->getNumOperands())
02715             return getAddExpr(Operands);
02716         }
02717       }
02718 
02719       // Fold if both operands are constant.
02720       if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
02721         Constant *LHSCV = LHSC->getValue();
02722         Constant *RHSCV = RHSC->getValue();
02723         return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
02724                                                                    RHSCV)));
02725       }
02726     }
02727   }
02728 
02729   FoldingSetNodeID ID;
02730   ID.AddInteger(scUDivExpr);
02731   ID.AddPointer(LHS);
02732   ID.AddPointer(RHS);
02733   void *IP = nullptr;
02734   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
02735   SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
02736                                              LHS, RHS);
02737   UniqueSCEVs.InsertNode(S, IP);
02738   return S;
02739 }
02740 
02741 static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
02742   APInt A = C1->getValue()->getValue().abs();
02743   APInt B = C2->getValue()->getValue().abs();
02744   uint32_t ABW = A.getBitWidth();
02745   uint32_t BBW = B.getBitWidth();
02746 
02747   if (ABW > BBW)
02748     B = B.zext(ABW);
02749   else if (ABW < BBW)
02750     A = A.zext(BBW);
02751 
02752   return APIntOps::GreatestCommonDivisor(A, B);
02753 }
02754 
02755 /// getUDivExactExpr - Get a canonical unsigned division expression, or
02756 /// something simpler if possible. There is no representation for an exact udiv
02757 /// in SCEV IR, but we can attempt to remove factors from the LHS and RHS.
02758 /// We can't do this when it's not exact because the udiv may be clearing bits.
02759 const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,
02760                                               const SCEV *RHS) {
02761   // TODO: we could try to find factors in all sorts of things, but for now we
02762   // just deal with u/exact (multiply, constant). See SCEVDivision towards the
02763   // end of this file for inspiration.
02764 
02765   const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS);
02766   if (!Mul)
02767     return getUDivExpr(LHS, RHS);
02768 
02769   if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) {
02770     // If the mulexpr multiplies by a constant, then that constant must be the
02771     // first element of the mulexpr.
02772     if (const SCEVConstant *LHSCst =
02773             dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
02774       if (LHSCst == RHSCst) {
02775         SmallVector<const SCEV *, 2> Operands;
02776         Operands.append(Mul->op_begin() + 1, Mul->op_end());
02777         return getMulExpr(Operands);
02778       }
02779 
02780       // We can't just assume that LHSCst divides RHSCst cleanly, it could be
02781       // that there's a factor provided by one of the other terms. We need to
02782       // check.
02783       APInt Factor = gcd(LHSCst, RHSCst);
02784       if (!Factor.isIntN(1)) {
02785         LHSCst = cast<SCEVConstant>(
02786             getConstant(LHSCst->getValue()->getValue().udiv(Factor)));
02787         RHSCst = cast<SCEVConstant>(
02788             getConstant(RHSCst->getValue()->getValue().udiv(Factor)));
02789         SmallVector<const SCEV *, 2> Operands;
02790         Operands.push_back(LHSCst);
02791         Operands.append(Mul->op_begin() + 1, Mul->op_end());
02792         LHS = getMulExpr(Operands);
02793         RHS = RHSCst;
02794         Mul = dyn_cast<SCEVMulExpr>(LHS);
02795         if (!Mul)
02796           return getUDivExactExpr(LHS, RHS);
02797       }
02798     }
02799   }
02800 
02801   for (int i = 0, e = Mul->getNumOperands(); i != e; ++i) {
02802     if (Mul->getOperand(i) == RHS) {
02803       SmallVector<const SCEV *, 2> Operands;
02804       Operands.append(Mul->op_begin(), Mul->op_begin() + i);
02805       Operands.append(Mul->op_begin() + i + 1, Mul->op_end());
02806       return getMulExpr(Operands);
02807     }
02808   }
02809 
02810   return getUDivExpr(LHS, RHS);
02811 }
02812 
02813 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
02814 /// Simplify the expression as much as possible.
02815 const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step,
02816                                            const Loop *L,
02817                                            SCEV::NoWrapFlags Flags) {
02818   SmallVector<const SCEV *, 4> Operands;
02819   Operands.push_back(Start);
02820   if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
02821     if (StepChrec->getLoop() == L) {
02822       Operands.append(StepChrec->op_begin(), StepChrec->op_end());
02823       return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW));
02824     }
02825 
02826   Operands.push_back(Step);
02827   return getAddRecExpr(Operands, L, Flags);
02828 }
02829 
02830 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
02831 /// Simplify the expression as much as possible.
02832 const SCEV *
02833 ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
02834                                const Loop *L, SCEV::NoWrapFlags Flags) {
02835   if (Operands.size() == 1) return Operands[0];
02836 #ifndef NDEBUG
02837   Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
02838   for (unsigned i = 1, e = Operands.size(); i != e; ++i)
02839     assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
02840            "SCEVAddRecExpr operand types don't match!");
02841   for (unsigned i = 0, e = Operands.size(); i != e; ++i)
02842     assert(isLoopInvariant(Operands[i], L) &&
02843            "SCEVAddRecExpr operand is not loop-invariant!");
02844 #endif
02845 
02846   if (Operands.back()->isZero()) {
02847     Operands.pop_back();
02848     return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0}  -->  X
02849   }
02850 
02851   // It's tempting to want to call getMaxBackedgeTakenCount count here and
02852   // use that information to infer NUW and NSW flags. However, computing a
02853   // BE count requires calling getAddRecExpr, so we may not yet have a
02854   // meaningful BE count at this point (and if we don't, we'd be stuck
02855   // with a SCEVCouldNotCompute as the cached BE count).
02856 
02857   Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags);
02858 
02859   // Canonicalize nested AddRecs in by nesting them in order of loop depth.
02860   if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
02861     const Loop *NestedLoop = NestedAR->getLoop();
02862     if (L->contains(NestedLoop) ?
02863         (L->getLoopDepth() < NestedLoop->getLoopDepth()) :
02864         (!NestedLoop->contains(L) &&
02865          DT->dominates(L->getHeader(), NestedLoop->getHeader()))) {
02866       SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
02867                                                   NestedAR->op_end());
02868       Operands[0] = NestedAR->getStart();
02869       // AddRecs require their operands be loop-invariant with respect to their
02870       // loops. Don't perform this transformation if it would break this
02871       // requirement.
02872       bool AllInvariant = true;
02873       for (unsigned i = 0, e = Operands.size(); i != e; ++i)
02874         if (!isLoopInvariant(Operands[i], L)) {
02875           AllInvariant = false;
02876           break;
02877         }
02878       if (AllInvariant) {
02879         // Create a recurrence for the outer loop with the same step size.
02880         //
02881         // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the
02882         // inner recurrence has the same property.
02883         SCEV::NoWrapFlags OuterFlags =
02884           maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
02885 
02886         NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
02887         AllInvariant = true;
02888         for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i)
02889           if (!isLoopInvariant(NestedOperands[i], NestedLoop)) {
02890             AllInvariant = false;
02891             break;
02892           }
02893         if (AllInvariant) {
02894           // Ok, both add recurrences are valid after the transformation.
02895           //
02896           // The inner recurrence keeps its NW flag but only keeps NUW/NSW if
02897           // the outer recurrence has the same property.
02898           SCEV::NoWrapFlags InnerFlags =
02899             maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags);
02900           return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags);
02901         }
02902       }
02903       // Reset Operands to its original state.
02904       Operands[0] = NestedAR;
02905     }
02906   }
02907 
02908   // Okay, it looks like we really DO need an addrec expr.  Check to see if we
02909   // already have one, otherwise create a new one.
02910   FoldingSetNodeID ID;
02911   ID.AddInteger(scAddRecExpr);
02912   for (unsigned i = 0, e = Operands.size(); i != e; ++i)
02913     ID.AddPointer(Operands[i]);
02914   ID.AddPointer(L);
02915   void *IP = nullptr;
02916   SCEVAddRecExpr *S =
02917     static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
02918   if (!S) {
02919     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size());
02920     std::uninitialized_copy(Operands.begin(), Operands.end(), O);
02921     S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator),
02922                                            O, Operands.size(), L);
02923     UniqueSCEVs.InsertNode(S, IP);
02924   }
02925   S->setNoWrapFlags(Flags);
02926   return S;
02927 }
02928 
02929 const SCEV *
02930 ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr,
02931                             const SmallVectorImpl<const SCEV *> &IndexExprs,
02932                             bool InBounds) {
02933   // getSCEV(Base)->getType() has the same address space as Base->getType()
02934   // because SCEV::getType() preserves the address space.
02935   Type *IntPtrTy = getEffectiveSCEVType(BaseExpr->getType());
02936   // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP
02937   // instruction to its SCEV, because the Instruction may be guarded by control
02938   // flow and the no-overflow bits may not be valid for the expression in any
02939   // context.
02940   SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
02941 
02942   const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
02943   // The address space is unimportant. The first thing we do on CurTy is getting
02944   // its element type.
02945   Type *CurTy = PointerType::getUnqual(PointeeType);
02946   for (const SCEV *IndexExpr : IndexExprs) {
02947     // Compute the (potentially symbolic) offset in bytes for this index.
02948     if (StructType *STy = dyn_cast<StructType>(CurTy)) {
02949       // For a struct, add the member offset.
02950       ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue();
02951       unsigned FieldNo = Index->getZExtValue();
02952       const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo);
02953 
02954       // Add the field offset to the running total offset.
02955       TotalOffset = getAddExpr(TotalOffset, FieldOffset);
02956 
02957       // Update CurTy to the type of the field at Index.
02958       CurTy = STy->getTypeAtIndex(Index);
02959     } else {
02960       // Update CurTy to its element type.
02961       CurTy = cast<SequentialType>(CurTy)->getElementType();
02962       // For an array, add the element offset, explicitly scaled.
02963       const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, CurTy);
02964       // Getelementptr indices are signed.
02965       IndexExpr = getTruncateOrSignExtend(IndexExpr, IntPtrTy);
02966 
02967       // Multiply the index by the element size to compute the element offset.
02968       const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, Wrap);
02969 
02970       // Add the element offset to the running total offset.
02971       TotalOffset = getAddExpr(TotalOffset, LocalOffset);
02972     }
02973   }
02974 
02975   // Add the total offset from all the GEP indices to the base.
02976   return getAddExpr(BaseExpr, TotalOffset, Wrap);
02977 }
02978 
02979 const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
02980                                          const SCEV *RHS) {
02981   SmallVector<const SCEV *, 2> Ops;
02982   Ops.push_back(LHS);
02983   Ops.push_back(RHS);
02984   return getSMaxExpr(Ops);
02985 }
02986 
02987 const SCEV *
02988 ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
02989   assert(!Ops.empty() && "Cannot get empty smax!");
02990   if (Ops.size() == 1) return Ops[0];
02991 #ifndef NDEBUG
02992   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
02993   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
02994     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
02995            "SCEVSMaxExpr operand types don't match!");
02996 #endif
02997 
02998   // Sort by complexity, this groups all similar expression types together.
02999   GroupByComplexity(Ops, LI);
03000 
03001   // If there are any constants, fold them together.
03002   unsigned Idx = 0;
03003   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
03004     ++Idx;
03005     assert(Idx < Ops.size());
03006     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
03007       // We found two constants, fold them together!
03008       ConstantInt *Fold = ConstantInt::get(getContext(),
03009                               APIntOps::smax(LHSC->getValue()->getValue(),
03010                                              RHSC->getValue()->getValue()));
03011       Ops[0] = getConstant(Fold);
03012       Ops.erase(Ops.begin()+1);  // Erase the folded element
03013       if (Ops.size() == 1) return Ops[0];
03014       LHSC = cast<SCEVConstant>(Ops[0]);
03015     }
03016 
03017     // If we are left with a constant minimum-int, strip it off.
03018     if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
03019       Ops.erase(Ops.begin());
03020       --Idx;
03021     } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
03022       // If we have an smax with a constant maximum-int, it will always be
03023       // maximum-int.
03024       return Ops[0];
03025     }
03026 
03027     if (Ops.size() == 1) return Ops[0];
03028   }
03029 
03030   // Find the first SMax
03031   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
03032     ++Idx;
03033 
03034   // Check to see if one of the operands is an SMax. If so, expand its operands
03035   // onto our operand list, and recurse to simplify.
03036   if (Idx < Ops.size()) {
03037     bool DeletedSMax = false;
03038     while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
03039       Ops.erase(Ops.begin()+Idx);
03040       Ops.append(SMax->op_begin(), SMax->op_end());
03041       DeletedSMax = true;
03042     }
03043 
03044     if (DeletedSMax)
03045       return getSMaxExpr(Ops);
03046   }
03047 
03048   // Okay, check to see if the same value occurs in the operand list twice.  If
03049   // so, delete one.  Since we sorted the list, these values are required to
03050   // be adjacent.
03051   for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
03052     //  X smax Y smax Y  -->  X smax Y
03053     //  X smax Y         -->  X, if X is always greater than Y
03054     if (Ops[i] == Ops[i+1] ||
03055         isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
03056       Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
03057       --i; --e;
03058     } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
03059       Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
03060       --i; --e;
03061     }
03062 
03063   if (Ops.size() == 1) return Ops[0];
03064 
03065   assert(!Ops.empty() && "Reduced smax down to nothing!");
03066 
03067   // Okay, it looks like we really DO need an smax expr.  Check to see if we
03068   // already have one, otherwise create a new one.
03069   FoldingSetNodeID ID;
03070   ID.AddInteger(scSMaxExpr);
03071   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
03072     ID.AddPointer(Ops[i]);
03073   void *IP = nullptr;
03074   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
03075   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
03076   std::uninitialized_copy(Ops.begin(), Ops.end(), O);
03077   SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
03078                                              O, Ops.size());
03079   UniqueSCEVs.InsertNode(S, IP);
03080   return S;
03081 }
03082 
03083 const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
03084                                          const SCEV *RHS) {
03085   SmallVector<const SCEV *, 2> Ops;
03086   Ops.push_back(LHS);
03087   Ops.push_back(RHS);
03088   return getUMaxExpr(Ops);
03089 }
03090 
03091 const SCEV *
03092 ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
03093   assert(!Ops.empty() && "Cannot get empty umax!");
03094   if (Ops.size() == 1) return Ops[0];
03095 #ifndef NDEBUG
03096   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
03097   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
03098     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
03099            "SCEVUMaxExpr operand types don't match!");
03100 #endif
03101 
03102   // Sort by complexity, this groups all similar expression types together.
03103   GroupByComplexity(Ops, LI);
03104 
03105   // If there are any constants, fold them together.
03106   unsigned Idx = 0;
03107   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
03108     ++Idx;
03109     assert(Idx < Ops.size());
03110     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
03111       // We found two constants, fold them together!
03112       ConstantInt *Fold = ConstantInt::get(getContext(),
03113                               APIntOps::umax(LHSC->getValue()->getValue(),
03114                                              RHSC->getValue()->getValue()));
03115       Ops[0] = getConstant(Fold);
03116       Ops.erase(Ops.begin()+1);  // Erase the folded element
03117       if (Ops.size() == 1) return Ops[0];
03118       LHSC = cast<SCEVConstant>(Ops[0]);
03119     }
03120 
03121     // If we are left with a constant minimum-int, strip it off.
03122     if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
03123       Ops.erase(Ops.begin());
03124       --Idx;
03125     } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
03126       // If we have an umax with a constant maximum-int, it will always be
03127       // maximum-int.
03128       return Ops[0];
03129     }
03130 
03131     if (Ops.size() == 1) return Ops[0];
03132   }
03133 
03134   // Find the first UMax
03135   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
03136     ++Idx;
03137 
03138   // Check to see if one of the operands is a UMax. If so, expand its operands
03139   // onto our operand list, and recurse to simplify.
03140   if (Idx < Ops.size()) {
03141     bool DeletedUMax = false;
03142     while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
03143       Ops.erase(Ops.begin()+Idx);
03144       Ops.append(UMax->op_begin(), UMax->op_end());
03145       DeletedUMax = true;
03146     }
03147 
03148     if (DeletedUMax)
03149       return getUMaxExpr(Ops);
03150   }
03151 
03152   // Okay, check to see if the same value occurs in the operand list twice.  If
03153   // so, delete one.  Since we sorted the list, these values are required to
03154   // be adjacent.
03155   for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
03156     //  X umax Y umax Y  -->  X umax Y
03157     //  X umax Y         -->  X, if X is always greater than Y
03158     if (Ops[i] == Ops[i+1] ||
03159         isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) {
03160       Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
03161       --i; --e;
03162     } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) {
03163       Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
03164       --i; --e;
03165     }
03166 
03167   if (Ops.size() == 1) return Ops[0];
03168 
03169   assert(!Ops.empty() && "Reduced umax down to nothing!");
03170 
03171   // Okay, it looks like we really DO need a umax expr.  Check to see if we
03172   // already have one, otherwise create a new one.
03173   FoldingSetNodeID ID;
03174   ID.AddInteger(scUMaxExpr);
03175   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
03176     ID.AddPointer(Ops[i]);
03177   void *IP = nullptr;
03178   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
03179   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
03180   std::uninitialized_copy(Ops.begin(), Ops.end(), O);
03181   SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
03182                                              O, Ops.size());
03183   UniqueSCEVs.InsertNode(S, IP);
03184   return S;
03185 }
03186 
03187 const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
03188                                          const SCEV *RHS) {
03189   // ~smax(~x, ~y) == smin(x, y).
03190   return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
03191 }
03192 
03193 const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
03194                                          const SCEV *RHS) {
03195   // ~umax(~x, ~y) == umin(x, y)
03196   return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
03197 }
03198 
03199 const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
03200   // We can bypass creating a target-independent
03201   // constant expression and then folding it back into a ConstantInt.
03202   // This is just a compile-time optimization.
03203   return getConstant(IntTy,
03204                      F->getParent()->getDataLayout().getTypeAllocSize(AllocTy));
03205 }
03206 
03207 const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
03208                                              StructType *STy,
03209                                              unsigned FieldNo) {
03210   // We can bypass creating a target-independent
03211   // constant expression and then folding it back into a ConstantInt.
03212   // This is just a compile-time optimization.
03213   return getConstant(
03214       IntTy,
03215       F->getParent()->getDataLayout().getStructLayout(STy)->getElementOffset(
03216           FieldNo));
03217 }
03218 
03219 const SCEV *ScalarEvolution::getUnknown(Value *V) {
03220   // Don't attempt to do anything other than create a SCEVUnknown object
03221   // here.  createSCEV only calls getUnknown after checking for all other
03222   // interesting possibilities, and any other code that calls getUnknown
03223   // is doing so in order to hide a value from SCEV canonicalization.
03224 
03225   FoldingSetNodeID ID;
03226   ID.AddInteger(scUnknown);
03227   ID.AddPointer(V);
03228   void *IP = nullptr;
03229   if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
03230     assert(cast<SCEVUnknown>(S)->getValue() == V &&
03231            "Stale SCEVUnknown in uniquing map!");
03232     return S;
03233   }
03234   SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
03235                                             FirstUnknown);
03236   FirstUnknown = cast<SCEVUnknown>(S);
03237   UniqueSCEVs.InsertNode(S, IP);
03238   return S;
03239 }
03240 
03241 //===----------------------------------------------------------------------===//
03242 //            Basic SCEV Analysis and PHI Idiom Recognition Code
03243 //
03244 
03245 /// isSCEVable - Test if values of the given type are analyzable within
03246 /// the SCEV framework. This primarily includes integer types, and it
03247 /// can optionally include pointer types if the ScalarEvolution class
03248 /// has access to target-specific information.
03249 bool ScalarEvolution::isSCEVable(Type *Ty) const {
03250   // Integers and pointers are always SCEVable.
03251   return Ty->isIntegerTy() || Ty->isPointerTy();
03252 }
03253 
03254 /// getTypeSizeInBits - Return the size in bits of the specified type,
03255 /// for which isSCEVable must return true.
03256 uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
03257   assert(isSCEVable(Ty) && "Type is not SCEVable!");
03258   return F->getParent()->getDataLayout().getTypeSizeInBits(Ty);
03259 }
03260 
03261 /// getEffectiveSCEVType - Return a type with the same bitwidth as
03262 /// the given type and which represents how SCEV will treat the given
03263 /// type, for which isSCEVable must return true. For pointer types,
03264 /// this is the pointer-sized integer type.
03265 Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
03266   assert(isSCEVable(Ty) && "Type is not SCEVable!");
03267 
03268   if (Ty->isIntegerTy()) {
03269     return Ty;
03270   }
03271 
03272   // The only other support type is pointer.
03273   assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
03274   return F->getParent()->getDataLayout().getIntPtrType(Ty);
03275 }
03276 
03277 const SCEV *ScalarEvolution::getCouldNotCompute() {
03278   return &CouldNotCompute;
03279 }
03280 
03281 namespace {
03282   // Helper class working with SCEVTraversal to figure out if a SCEV contains
03283   // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne
03284   // is set iff if find such SCEVUnknown.
03285   //
03286   struct FindInvalidSCEVUnknown {
03287     bool FindOne;
03288     FindInvalidSCEVUnknown() { FindOne = false; }
03289     bool follow(const SCEV *S) {
03290       switch (static_cast<SCEVTypes>(S->getSCEVType())) {
03291       case scConstant:
03292         return false;
03293       case scUnknown:
03294         if (!cast<SCEVUnknown>(S)->getValue())
03295           FindOne = true;
03296         return false;
03297       default:
03298         return true;
03299       }
03300     }
03301     bool isDone() const { return FindOne; }
03302   };
03303 }
03304 
03305 bool ScalarEvolution::checkValidity(const SCEV *S) const {
03306   FindInvalidSCEVUnknown F;
03307   SCEVTraversal<FindInvalidSCEVUnknown> ST(F);
03308   ST.visitAll(S);
03309 
03310   return !F.FindOne;
03311 }
03312 
03313 /// getSCEV - Return an existing SCEV if it exists, otherwise analyze the
03314 /// expression and create a new one.
03315 const SCEV *ScalarEvolution::getSCEV(Value *V) {
03316   assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
03317 
03318   ValueExprMapType::iterator I = ValueExprMap.find_as(V);
03319   if (I != ValueExprMap.end()) {
03320     const SCEV *S = I->second;
03321     if (checkValidity(S))
03322       return S;
03323     else
03324       ValueExprMap.erase(I);
03325   }
03326   const SCEV *S = createSCEV(V);
03327 
03328   // The process of creating a SCEV for V may have caused other SCEVs
03329   // to have been created, so it's necessary to insert the new entry
03330   // from scratch, rather than trying to remember the insert position
03331   // above.
03332   ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
03333   return S;
03334 }
03335 
03336 /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
03337 ///
03338 const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
03339   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
03340     return getConstant(
03341                cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
03342 
03343   Type *Ty = V->getType();
03344   Ty = getEffectiveSCEVType(Ty);
03345   return getMulExpr(V,
03346                   getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))));
03347 }
03348 
03349 /// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
03350 const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
03351   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
03352     return getConstant(
03353                 cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
03354 
03355   Type *Ty = V->getType();
03356   Ty = getEffectiveSCEVType(Ty);
03357   const SCEV *AllOnes =
03358                    getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)));
03359   return getMinusSCEV(AllOnes, V);
03360 }
03361 
03362 /// getMinusSCEV - Return LHS-RHS.  Minus is represented in SCEV as A+B*-1.
03363 const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
03364                                           SCEV::NoWrapFlags Flags) {
03365   assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW");
03366 
03367   // Fast path: X - X --> 0.
03368   if (LHS == RHS)
03369     return getConstant(LHS->getType(), 0);
03370 
03371   // X - Y --> X + -Y.
03372   // X -(nsw || nuw) Y --> X + -Y.
03373   return getAddExpr(LHS, getNegativeSCEV(RHS));
03374 }
03375 
03376 /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
03377 /// input value to the specified type.  If the type must be extended, it is zero
03378 /// extended.
03379 const SCEV *
03380 ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) {
03381   Type *SrcTy = V->getType();
03382   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
03383          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
03384          "Cannot truncate or zero extend with non-integer arguments!");
03385   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
03386     return V;  // No conversion
03387   if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
03388     return getTruncateExpr(V, Ty);
03389   return getZeroExtendExpr(V, Ty);
03390 }
03391 
03392 /// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the
03393 /// input value to the specified type.  If the type must be extended, it is sign
03394 /// extended.
03395 const SCEV *
03396 ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
03397                                          Type *Ty) {
03398   Type *SrcTy = V->getType();
03399   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
03400          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
03401          "Cannot truncate or zero extend with non-integer arguments!");
03402   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
03403     return V;  // No conversion
03404   if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
03405     return getTruncateExpr(V, Ty);
03406   return getSignExtendExpr(V, Ty);
03407 }
03408 
03409 /// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the
03410 /// input value to the specified type.  If the type must be extended, it is zero
03411 /// extended.  The conversion must not be narrowing.
03412 const SCEV *
03413 ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) {
03414   Type *SrcTy = V->getType();
03415   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
03416          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
03417          "Cannot noop or zero extend with non-integer arguments!");
03418   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
03419          "getNoopOrZeroExtend cannot truncate!");
03420   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
03421     return V;  // No conversion
03422   return getZeroExtendExpr(V, Ty);
03423 }
03424 
03425 /// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the
03426 /// input value to the specified type.  If the type must be extended, it is sign
03427 /// extended.  The conversion must not be narrowing.
03428 const SCEV *
03429 ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) {
03430   Type *SrcTy = V->getType();
03431   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
03432          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
03433          "Cannot noop or sign extend with non-integer arguments!");
03434   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
03435          "getNoopOrSignExtend cannot truncate!");
03436   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
03437     return V;  // No conversion
03438   return getSignExtendExpr(V, Ty);
03439 }
03440 
03441 /// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of
03442 /// the input value to the specified type. If the type must be extended,
03443 /// it is extended with unspecified bits. The conversion must not be
03444 /// narrowing.
03445 const SCEV *
03446 ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) {
03447   Type *SrcTy = V->getType();
03448   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
03449          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
03450          "Cannot noop or any extend with non-integer arguments!");
03451   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
03452          "getNoopOrAnyExtend cannot truncate!");
03453   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
03454     return V;  // No conversion
03455   return getAnyExtendExpr(V, Ty);
03456 }
03457 
03458 /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
03459 /// input value to the specified type.  The conversion must not be widening.
03460 const SCEV *
03461 ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) {
03462   Type *SrcTy = V->getType();
03463   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
03464          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
03465          "Cannot truncate or noop with non-integer arguments!");
03466   assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
03467          "getTruncateOrNoop cannot extend!");
03468   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
03469     return V;  // No conversion
03470   return getTruncateExpr(V, Ty);
03471 }
03472 
03473 /// getUMaxFromMismatchedTypes - Promote the operands to the wider of
03474 /// the types using zero-extension, and then perform a umax operation
03475 /// with them.
03476 const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
03477                                                         const SCEV *RHS) {
03478   const SCEV *PromotedLHS = LHS;
03479   const SCEV *PromotedRHS = RHS;
03480 
03481   if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
03482     PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
03483   else
03484     PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
03485 
03486   return getUMaxExpr(PromotedLHS, PromotedRHS);
03487 }
03488 
03489 /// getUMinFromMismatchedTypes - Promote the operands to the wider of
03490 /// the types using zero-extension, and then perform a umin operation
03491 /// with them.
03492 const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
03493                                                         const SCEV *RHS) {
03494   const SCEV *PromotedLHS = LHS;
03495   const SCEV *PromotedRHS = RHS;
03496 
03497   if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
03498     PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
03499   else
03500     PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
03501 
03502   return getUMinExpr(PromotedLHS, PromotedRHS);
03503 }
03504 
03505 /// getPointerBase - Transitively follow the chain of pointer-type operands
03506 /// until reaching a SCEV that does not have a single pointer operand. This
03507 /// returns a SCEVUnknown pointer for well-formed pointer-type expressions,
03508 /// but corner cases do exist.
03509 const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
03510   // A pointer operand may evaluate to a nonpointer expression, such as null.
03511   if (!V->getType()->isPointerTy())
03512     return V;
03513 
03514   if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {
03515     return getPointerBase(Cast->getOperand());
03516   }
03517   else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
03518     const SCEV *PtrOp = nullptr;
03519     for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
03520          I != E; ++I) {
03521       if ((*I)->getType()->isPointerTy()) {
03522         // Cannot find the base of an expression with multiple pointer operands.
03523         if (PtrOp)
03524           return V;
03525         PtrOp = *I;
03526       }
03527     }
03528     if (!PtrOp)
03529       return V;
03530     return getPointerBase(PtrOp);
03531   }
03532   return V;
03533 }
03534 
03535 /// PushDefUseChildren - Push users of the given Instruction
03536 /// onto the given Worklist.
03537 static void
03538 PushDefUseChildren(Instruction *I,
03539                    SmallVectorImpl<Instruction *> &Worklist) {
03540   // Push the def-use children onto the Worklist stack.
03541   for (User *U : I->users())
03542     Worklist.push_back(cast<Instruction>(U));
03543 }
03544 
03545 /// ForgetSymbolicValue - This looks up computed SCEV values for all
03546 /// instructions that depend on the given instruction and removes them from
03547 /// the ValueExprMapType map if they reference SymName. This is used during PHI
03548 /// resolution.
03549 void
03550 ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
03551   SmallVector<Instruction *, 16> Worklist;
03552   PushDefUseChildren(PN, Worklist);
03553 
03554   SmallPtrSet<Instruction *, 8> Visited;
03555   Visited.insert(PN);
03556   while (!Worklist.empty()) {
03557     Instruction *I = Worklist.pop_back_val();
03558     if (!Visited.insert(I).second)
03559       continue;
03560 
03561     ValueExprMapType::iterator It =
03562       ValueExprMap.find_as(static_cast<Value *>(I));
03563     if (It != ValueExprMap.end()) {
03564       const SCEV *Old = It->second;
03565 
03566       // Short-circuit the def-use traversal if the symbolic name
03567       // ceases to appear in expressions.
03568       if (Old != SymName && !hasOperand(Old, SymName))
03569         continue;
03570 
03571       // SCEVUnknown for a PHI either means that it has an unrecognized
03572       // structure, it's a PHI that's in the progress of being computed
03573       // by createNodeForPHI, or it's a single-value PHI. In the first case,
03574       // additional loop trip count information isn't going to change anything.
03575       // In the second case, createNodeForPHI will perform the necessary
03576       // updates on its own when it gets to that point. In the third, we do
03577       // want to forget the SCEVUnknown.
03578       if (!isa<PHINode>(I) ||
03579           !isa<SCEVUnknown>(Old) ||
03580           (I != PN && Old == SymName)) {
03581         forgetMemoizedResults(Old);
03582         ValueExprMap.erase(It);
03583       }
03584     }
03585 
03586     PushDefUseChildren(I, Worklist);
03587   }
03588 }
03589 
03590 /// createNodeForPHI - PHI nodes have two cases.  Either the PHI node exists in
03591 /// a loop header, making it a potential recurrence, or it doesn't.
03592 ///
03593 const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
03594   if (const Loop *L = LI->getLoopFor(PN->getParent()))
03595     if (L->getHeader() == PN->getParent()) {
03596       // The loop may have multiple entrances or multiple exits; we can analyze
03597       // this phi as an addrec if it has a unique entry value and a unique
03598       // backedge value.
03599       Value *BEValueV = nullptr, *StartValueV = nullptr;
03600       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
03601         Value *V = PN->getIncomingValue(i);
03602         if (L->contains(PN->getIncomingBlock(i))) {
03603           if (!BEValueV) {
03604             BEValueV = V;
03605           } else if (BEValueV != V) {
03606             BEValueV = nullptr;
03607             break;
03608           }
03609         } else if (!StartValueV) {
03610           StartValueV = V;
03611         } else if (StartValueV != V) {
03612           StartValueV = nullptr;
03613           break;
03614         }
03615       }
03616       if (BEValueV && StartValueV) {
03617         // While we are analyzing this PHI node, handle its value symbolically.
03618         const SCEV *SymbolicName = getUnknown(PN);
03619         assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
03620                "PHI node already processed?");
03621         ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
03622 
03623         // Using this symbolic name for the PHI, analyze the value coming around
03624         // the back-edge.
03625         const SCEV *BEValue = getSCEV(BEValueV);
03626 
03627         // NOTE: If BEValue is loop invariant, we know that the PHI node just
03628         // has a special value for the first iteration of the loop.
03629 
03630         // If the value coming around the backedge is an add with the symbolic
03631         // value we just inserted, then we found a simple induction variable!
03632         if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
03633           // If there is a single occurrence of the symbolic value, replace it
03634           // with a recurrence.
03635           unsigned FoundIndex = Add->getNumOperands();
03636           for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
03637             if (Add->getOperand(i) == SymbolicName)
03638               if (FoundIndex == e) {
03639                 FoundIndex = i;
03640                 break;
03641               }
03642 
03643           if (FoundIndex != Add->getNumOperands()) {
03644             // Create an add with everything but the specified operand.
03645             SmallVector<const SCEV *, 8> Ops;
03646             for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
03647               if (i != FoundIndex)
03648                 Ops.push_back(Add->getOperand(i));
03649             const SCEV *Accum = getAddExpr(Ops);
03650 
03651             // This is not a valid addrec if the step amount is varying each
03652             // loop iteration, but is not itself an addrec in this loop.
03653             if (isLoopInvariant(Accum, L) ||
03654                 (isa<SCEVAddRecExpr>(Accum) &&
03655                  cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
03656               SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
03657 
03658               // If the increment doesn't overflow, then neither the addrec nor
03659               // the post-increment will overflow.
03660               if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
03661                 if (OBO->getOperand(0) == PN) {
03662                   if (OBO->hasNoUnsignedWrap())
03663                     Flags = setFlags(Flags, SCEV::FlagNUW);
03664                   if (OBO->hasNoSignedWrap())
03665                     Flags = setFlags(Flags, SCEV::FlagNSW);
03666                 }
03667               } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
03668                 // If the increment is an inbounds GEP, then we know the address
03669                 // space cannot be wrapped around. We cannot make any guarantee
03670                 // about signed or unsigned overflow because pointers are
03671                 // unsigned but we may have a negative index from the base
03672                 // pointer. We can guarantee that no unsigned wrap occurs if the
03673                 // indices form a positive value.
03674                 if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
03675                   Flags = setFlags(Flags, SCEV::FlagNW);
03676 
03677                   const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
03678                   if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
03679                     Flags = setFlags(Flags, SCEV::FlagNUW);
03680                 }
03681 
03682                 // We cannot transfer nuw and nsw flags from subtraction
03683                 // operations -- sub nuw X, Y is not the same as add nuw X, -Y
03684                 // for instance.
03685               }
03686 
03687               const SCEV *StartVal = getSCEV(StartValueV);
03688               const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
03689 
03690               // Since the no-wrap flags are on the increment, they apply to the
03691               // post-incremented value as well.
03692               if (isLoopInvariant(Accum, L))
03693                 (void)getAddRecExpr(getAddExpr(StartVal, Accum),
03694                                     Accum, L, Flags);
03695 
03696               // Okay, for the entire analysis of this edge we assumed the PHI
03697               // to be symbolic.  We now need to go back and purge all of the
03698               // entries for the scalars that use the symbolic expression.
03699               ForgetSymbolicName(PN, SymbolicName);
03700               ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
03701               return PHISCEV;
03702             }
03703           }
03704         } else if (const SCEVAddRecExpr *AddRec =
03705                      dyn_cast<SCEVAddRecExpr>(BEValue)) {
03706           // Otherwise, this could be a loop like this:
03707           //     i = 0;  for (j = 1; ..; ++j) { ....  i = j; }
03708           // In this case, j = {1,+,1}  and BEValue is j.
03709           // Because the other in-value of i (0) fits the evolution of BEValue
03710           // i really is an addrec evolution.
03711           if (AddRec->getLoop() == L && AddRec->isAffine()) {
03712             const SCEV *StartVal = getSCEV(StartValueV);
03713 
03714             // If StartVal = j.start - j.stride, we can use StartVal as the
03715             // initial step of the addrec evolution.
03716             if (StartVal == getMinusSCEV(AddRec->getOperand(0),
03717                                          AddRec->getOperand(1))) {
03718               // FIXME: For constant StartVal, we should be able to infer
03719               // no-wrap flags.
03720               const SCEV *PHISCEV =
03721                 getAddRecExpr(StartVal, AddRec->getOperand(1), L,
03722                               SCEV::FlagAnyWrap);
03723 
03724               // Okay, for the entire analysis of this edge we assumed the PHI
03725               // to be symbolic.  We now need to go back and purge all of the
03726               // entries for the scalars that use the symbolic expression.
03727               ForgetSymbolicName(PN, SymbolicName);
03728               ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
03729               return PHISCEV;
03730             }
03731           }
03732         }
03733       }
03734     }
03735 
03736   // If the PHI has a single incoming value, follow that value, unless the
03737   // PHI's incoming blocks are in a different loop, in which case doing so
03738   // risks breaking LCSSA form. Instcombine would normally zap these, but
03739   // it doesn't have DominatorTree information, so it may miss cases.
03740   if (Value *V =
03741           SimplifyInstruction(PN, F->getParent()->getDataLayout(), TLI, DT, AC))
03742     if (LI->replacementPreservesLCSSAForm(PN, V))
03743       return getSCEV(V);
03744 
03745   // If it's not a loop phi, we can't handle it yet.
03746   return getUnknown(PN);
03747 }
03748 
03749 /// createNodeForGEP - Expand GEP instructions into add and multiply
03750 /// operations. This allows them to be analyzed by regular SCEV code.
03751 ///
03752 const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
03753   Value *Base = GEP->getOperand(0);
03754   // Don't attempt to analyze GEPs over unsized objects.
03755   if (!Base->getType()->getPointerElementType()->isSized())
03756     return getUnknown(GEP);
03757 
03758   SmallVector<const SCEV *, 4> IndexExprs;
03759   for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index)
03760     IndexExprs.push_back(getSCEV(*Index));
03761   return getGEPExpr(GEP->getSourceElementType(), getSCEV(Base), IndexExprs,
03762                     GEP->isInBounds());
03763 }
03764 
03765 /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
03766 /// guaranteed to end in (at every loop iteration).  It is, at the same time,
03767 /// the minimum number of times S is divisible by 2.  For example, given {4,+,8}
03768 /// it returns 2.  If S is guaranteed to be 0, it returns the bitwidth of S.
03769 uint32_t
03770 ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
03771   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
03772     return C->getValue()->getValue().countTrailingZeros();
03773 
03774   if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
03775     return std::min(GetMinTrailingZeros(T->getOperand()),
03776                     (uint32_t)getTypeSizeInBits(T->getType()));
03777 
03778   if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
03779     uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
03780     return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
03781              getTypeSizeInBits(E->getType()) : OpRes;
03782   }
03783 
03784   if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
03785     uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
03786     return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
03787              getTypeSizeInBits(E->getType()) : OpRes;
03788   }
03789 
03790   if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
03791     // The result is the min of all operands results.
03792     uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
03793     for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
03794       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
03795     return MinOpRes;
03796   }
03797 
03798   if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
03799     // The result is the sum of all operands results.
03800     uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0));
03801     uint32_t BitWidth = getTypeSizeInBits(M->getType());
03802     for (unsigned i = 1, e = M->getNumOperands();
03803          SumOpRes != BitWidth && i != e; ++i)
03804       SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)),
03805                           BitWidth);
03806     return SumOpRes;
03807   }
03808 
03809   if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
03810     // The result is the min of all operands results.
03811     uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
03812     for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
03813       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
03814     return MinOpRes;
03815   }
03816 
03817   if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) {
03818     // The result is the min of all operands results.
03819     uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
03820     for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
03821       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
03822     return MinOpRes;
03823   }
03824 
03825   if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) {
03826     // The result is the min of all operands results.
03827     uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
03828     for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
03829       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
03830     return MinOpRes;
03831   }
03832 
03833   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
03834     // For a SCEVUnknown, ask ValueTracking.
03835     unsigned BitWidth = getTypeSizeInBits(U->getType());
03836     APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
03837     computeKnownBits(U->getValue(), Zeros, Ones,
03838                      F->getParent()->getDataLayout(), 0, AC, nullptr, DT);
03839     return Zeros.countTrailingOnes();
03840   }
03841 
03842   // SCEVUDivExpr
03843   return 0;
03844 }
03845 
03846 /// GetRangeFromMetadata - Helper method to assign a range to V from
03847 /// metadata present in the IR.
03848 static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
03849   if (Instruction *I = dyn_cast<Instruction>(V)) {
03850     if (MDNode *MD = I->getMetadata(LLVMContext::MD_range)) {
03851       ConstantRange TotalRange(
03852           cast<IntegerType>(I->getType())->getBitWidth(), false);
03853 
03854       unsigned NumRanges = MD->getNumOperands() / 2;
03855       assert(NumRanges >= 1);
03856 
03857       for (unsigned i = 0; i < NumRanges; ++i) {
03858         ConstantInt *Lower =
03859             mdconst::extract<ConstantInt>(MD->getOperand(2 * i + 0));
03860         ConstantInt *Upper =
03861             mdconst::extract<ConstantInt>(MD->getOperand(2 * i + 1));
03862         ConstantRange Range(Lower->getValue(), Upper->getValue());
03863         TotalRange = TotalRange.unionWith(Range);
03864       }
03865 
03866       return TotalRange;
03867     }
03868   }
03869 
03870   return None;
03871 }
03872 
03873 /// getRange - Determine the range for a particular SCEV.  If SignHint is
03874 /// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges
03875 /// with a "cleaner" unsigned (resp. signed) representation.
03876 ///
03877 ConstantRange
03878 ScalarEvolution::getRange(const SCEV *S,
03879                           ScalarEvolution::RangeSignHint SignHint) {
03880   DenseMap<const SCEV *, ConstantRange> &Cache =
03881       SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges
03882                                                        : SignedRanges;
03883 
03884   // See if we've computed this range already.
03885   DenseMap<const SCEV *, ConstantRange>::iterator I = Cache.find(S);
03886   if (I != Cache.end())
03887     return I->second;
03888 
03889   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
03890     return setRange(C, SignHint, ConstantRange(C->getValue()->getValue()));
03891 
03892   unsigned BitWidth = getTypeSizeInBits(S->getType());
03893   ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
03894 
03895   // If the value has known zeros, the maximum value will have those known zeros
03896   // as well.
03897   uint32_t TZ = GetMinTrailingZeros(S);
03898   if (TZ != 0) {
03899     if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED)
03900       ConservativeResult =
03901           ConstantRange(APInt::getMinValue(BitWidth),
03902                         APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
03903     else
03904       ConservativeResult = ConstantRange(
03905           APInt::getSignedMinValue(BitWidth),
03906           APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
03907   }
03908 
03909   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
03910     ConstantRange X = getRange(Add->getOperand(0), SignHint);
03911     for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
03912       X = X.add(getRange(Add->getOperand(i), SignHint));
03913     return setRange(Add, SignHint, ConservativeResult.intersectWith(X));
03914   }
03915 
03916   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
03917     ConstantRange X = getRange(Mul->getOperand(0), SignHint);
03918     for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
03919       X = X.multiply(getRange(Mul->getOperand(i), SignHint));
03920     return setRange(Mul, SignHint, ConservativeResult.intersectWith(X));
03921   }
03922 
03923   if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
03924     ConstantRange X = getRange(SMax->getOperand(0), SignHint);
03925     for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
03926       X = X.smax(getRange(SMax->getOperand(i), SignHint));
03927     return setRange(SMax, SignHint, ConservativeResult.intersectWith(X));
03928   }
03929 
03930   if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
03931     ConstantRange X = getRange(UMax->getOperand(0), SignHint);
03932     for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
03933       X = X.umax(getRange(UMax->getOperand(i), SignHint));
03934     return setRange(UMax, SignHint, ConservativeResult.intersectWith(X));
03935   }
03936 
03937   if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
03938     ConstantRange X = getRange(UDiv->getLHS(), SignHint);
03939     ConstantRange Y = getRange(UDiv->getRHS(), SignHint);
03940     return setRange(UDiv, SignHint,
03941                     ConservativeResult.intersectWith(X.udiv(Y)));
03942   }
03943 
03944   if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
03945     ConstantRange X = getRange(ZExt->getOperand(), SignHint);
03946     return setRange(ZExt, SignHint,
03947                     ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
03948   }
03949 
03950   if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
03951     ConstantRange X = getRange(SExt->getOperand(), SignHint);
03952     return setRange(SExt, SignHint,
03953                     ConservativeResult.intersectWith(X.signExtend(BitWidth)));
03954   }
03955 
03956   if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
03957     ConstantRange X = getRange(Trunc->getOperand(), SignHint);
03958     return setRange(Trunc, SignHint,
03959                     ConservativeResult.intersectWith(X.truncate(BitWidth)));
03960   }
03961 
03962   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
03963     // If there's no unsigned wrap, the value will never be less than its
03964     // initial value.
03965     if (AddRec->getNoWrapFlags(SCEV::FlagNUW))
03966       if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
03967         if (!C->getValue()->isZero())
03968           ConservativeResult =
03969             ConservativeResult.intersectWith(
03970               ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0)));
03971 
03972     // If there's no signed wrap, and all the operands have the same sign or
03973     // zero, the value won't ever change sign.
03974     if (AddRec->getNoWrapFlags(SCEV::FlagNSW)) {
03975       bool AllNonNeg = true;
03976       bool AllNonPos = true;
03977       for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
03978         if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false;
03979         if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false;
03980       }
03981       if (AllNonNeg)
03982         ConservativeResult = ConservativeResult.intersectWith(
03983           ConstantRange(APInt(BitWidth, 0),
03984                         APInt::getSignedMinValue(BitWidth)));
03985       else if (AllNonPos)
03986         ConservativeResult = ConservativeResult.intersectWith(
03987           ConstantRange(APInt::getSignedMinValue(BitWidth),
03988                         APInt(BitWidth, 1)));
03989     }
03990 
03991     // TODO: non-affine addrec
03992     if (AddRec->isAffine()) {
03993       Type *Ty = AddRec->getType();
03994       const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
03995       if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
03996           getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
03997 
03998         // Check for overflow.  This must be done with ConstantRange arithmetic
03999         // because we could be called from within the ScalarEvolution overflow
04000         // checking code.
04001 
04002         MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
04003         ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
04004         ConstantRange ZExtMaxBECountRange =
04005             MaxBECountRange.zextOrTrunc(BitWidth * 2 + 1);
04006 
04007         const SCEV *Start = AddRec->getStart();
04008         const SCEV *Step = AddRec->getStepRecurrence(*this);
04009         ConstantRange StepSRange = getSignedRange(Step);
04010         ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2 + 1);
04011 
04012         ConstantRange StartURange = getUnsignedRange(Start);
04013         ConstantRange EndURange =
04014             StartURange.add(MaxBECountRange.multiply(StepSRange));
04015 
04016         // Check for unsigned overflow.
04017         ConstantRange ZExtStartURange =
04018             StartURange.zextOrTrunc(BitWidth * 2 + 1);
04019         ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2 + 1);
04020         if (ZExtStartURange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
04021             ZExtEndURange) {
04022           APInt Min = APIntOps::umin(StartURange.getUnsignedMin(),
04023                                      EndURange.getUnsignedMin());
04024           APInt Max = APIntOps::umax(StartURange.getUnsignedMax(),
04025                                      EndURange.getUnsignedMax());
04026           bool IsFullRange = Min.isMinValue() && Max.isMaxValue();
04027           if (!IsFullRange)
04028             ConservativeResult =
04029                 ConservativeResult.intersectWith(ConstantRange(Min, Max + 1));
04030         }
04031 
04032         ConstantRange StartSRange = getSignedRange(Start);
04033         ConstantRange EndSRange =
04034             StartSRange.add(MaxBECountRange.multiply(StepSRange));
04035 
04036         // Check for signed overflow. This must be done with ConstantRange
04037         // arithmetic because we could be called from within the ScalarEvolution
04038         // overflow checking code.
04039         ConstantRange SExtStartSRange =
04040             StartSRange.sextOrTrunc(BitWidth * 2 + 1);
04041         ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2 + 1);
04042         if (SExtStartSRange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
04043             SExtEndSRange) {
04044           APInt Min = APIntOps::smin(StartSRange.getSignedMin(),
04045                                      EndSRange.getSignedMin());
04046           APInt Max = APIntOps::smax(StartSRange.getSignedMax(),
04047                                      EndSRange.getSignedMax());
04048           bool IsFullRange = Min.isMinSignedValue() && Max.isMaxSignedValue();
04049           if (!IsFullRange)
04050             ConservativeResult =
04051                 ConservativeResult.intersectWith(ConstantRange(Min, Max + 1));
04052         }
04053       }
04054     }
04055 
04056     return setRange(AddRec, SignHint, ConservativeResult);
04057   }
04058 
04059   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
04060     // Check if the IR explicitly contains !range metadata.
04061     Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
04062     if (MDRange.hasValue())
04063       ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue());
04064 
04065     // Split here to avoid paying the compile-time cost of calling both
04066     // computeKnownBits and ComputeNumSignBits.  This restriction can be lifted
04067     // if needed.
04068     const DataLayout &DL = F->getParent()->getDataLayout();
04069     if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) {
04070       // For a SCEVUnknown, ask ValueTracking.
04071       APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
04072       computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT);
04073       if (Ones != ~Zeros + 1)
04074         ConservativeResult =
04075             ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1));
04076     } else {
04077       assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&
04078              "generalize as needed!");
04079       unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AC, nullptr, DT);
04080       if (NS > 1)
04081         ConservativeResult = ConservativeResult.intersectWith(
04082             ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
04083                           APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1));
04084     }
04085 
04086     return setRange(U, SignHint, ConservativeResult);
04087   }
04088 
04089   return setRange(S, SignHint, ConservativeResult);
04090 }
04091 
04092 /// createSCEV - We know that there is no SCEV for the specified value.
04093 /// Analyze the expression.
04094 ///
04095 const SCEV *ScalarEvolution::createSCEV(Value *V) {
04096   if (!isSCEVable(V->getType()))
04097     return getUnknown(V);
04098 
04099   unsigned Opcode = Instruction::UserOp1;
04100   if (Instruction *I = dyn_cast<Instruction>(V)) {
04101     Opcode = I->getOpcode();
04102 
04103     // Don't attempt to analyze instructions in blocks that aren't
04104     // reachable. Such instructions don't matter, and they aren't required
04105     // to obey basic rules for definitions dominating uses which this
04106     // analysis depends on.
04107     if (!DT->isReachableFromEntry(I->getParent()))
04108       return getUnknown(V);
04109   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
04110     Opcode = CE->getOpcode();
04111   else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
04112     return getConstant(CI);
04113   else if (isa<ConstantPointerNull>(V))
04114     return getConstant(V->getType(), 0);
04115   else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
04116     return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee());
04117   else
04118     return getUnknown(V);
04119 
04120   Operator *U = cast<Operator>(V);
04121   switch (Opcode) {
04122   case Instruction::Add: {
04123     // The simple thing to do would be to just call getSCEV on both operands
04124     // and call getAddExpr with the result. However if we're looking at a
04125     // bunch of things all added together, this can be quite inefficient,
04126     // because it leads to N-1 getAddExpr calls for N ultimate operands.
04127     // Instead, gather up all the operands and make a single getAddExpr call.
04128     // LLVM IR canonical form means we need only traverse the left operands.
04129     //
04130     // Don't apply this instruction's NSW or NUW flags to the new
04131     // expression. The instruction may be guarded by control flow that the
04132     // no-wrap behavior depends on. Non-control-equivalent instructions can be
04133     // mapped to the same SCEV expression, and it would be incorrect to transfer
04134     // NSW/NUW semantics to those operations.
04135     SmallVector<const SCEV *, 4> AddOps;
04136     AddOps.push_back(getSCEV(U->getOperand(1)));
04137     for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
04138       unsigned Opcode = Op->getValueID() - Value::InstructionVal;
04139       if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
04140         break;
04141       U = cast<Operator>(Op);
04142       const SCEV *Op1 = getSCEV(U->getOperand(1));
04143       if (Opcode == Instruction::Sub)
04144         AddOps.push_back(getNegativeSCEV(Op1));
04145       else
04146         AddOps.push_back(Op1);
04147     }
04148     AddOps.push_back(getSCEV(U->getOperand(0)));
04149     return getAddExpr(AddOps);
04150   }
04151   case Instruction::Mul: {
04152     // Don't transfer NSW/NUW for the same reason as AddExpr.
04153     SmallVector<const SCEV *, 4> MulOps;
04154     MulOps.push_back(getSCEV(U->getOperand(1)));
04155     for (Value *Op = U->getOperand(0);
04156          Op->getValueID() == Instruction::Mul + Value::InstructionVal;
04157          Op = U->getOperand(0)) {
04158       U = cast<Operator>(Op);
04159       MulOps.push_back(getSCEV(U->getOperand(1)));
04160     }
04161     MulOps.push_back(getSCEV(U->getOperand(0)));
04162     return getMulExpr(MulOps);
04163   }
04164   case Instruction::UDiv:
04165     return getUDivExpr(getSCEV(U->getOperand(0)),
04166                        getSCEV(U->getOperand(1)));
04167   case Instruction::Sub:
04168     return getMinusSCEV(getSCEV(U->getOperand(0)),
04169                         getSCEV(U->getOperand(1)));
04170   case Instruction::And:
04171     // For an expression like x&255 that merely masks off the high bits,
04172     // use zext(trunc(x)) as the SCEV expression.
04173     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
04174       if (CI->isNullValue())
04175         return getSCEV(U->getOperand(1));
04176       if (CI->isAllOnesValue())
04177         return getSCEV(U->getOperand(0));
04178       const APInt &A = CI->getValue();
04179 
04180       // Instcombine's ShrinkDemandedConstant may strip bits out of
04181       // constants, obscuring what would otherwise be a low-bits mask.
04182       // Use computeKnownBits to compute what ShrinkDemandedConstant
04183       // knew about to reconstruct a low-bits mask value.
04184       unsigned LZ = A.countLeadingZeros();
04185       unsigned TZ = A.countTrailingZeros();
04186       unsigned BitWidth = A.getBitWidth();
04187       APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
04188       computeKnownBits(U->getOperand(0), KnownZero, KnownOne,
04189                        F->getParent()->getDataLayout(), 0, AC, nullptr, DT);
04190 
04191       APInt EffectiveMask =
04192           APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
04193       if ((LZ != 0 || TZ != 0) && !((~A & ~KnownZero) & EffectiveMask)) {
04194         const SCEV *MulCount = getConstant(
04195             ConstantInt::get(getContext(), APInt::getOneBitSet(BitWidth, TZ)));
04196         return getMulExpr(
04197             getZeroExtendExpr(
04198                 getTruncateExpr(
04199                     getUDivExactExpr(getSCEV(U->getOperand(0)), MulCount),
04200                     IntegerType::get(getContext(), BitWidth - LZ - TZ)),
04201                 U->getType()),
04202             MulCount);
04203       }
04204     }
04205     break;
04206 
04207   case Instruction::Or:
04208     // If the RHS of the Or is a constant, we may have something like:
04209     // X*4+1 which got turned into X*4|1.  Handle this as an Add so loop
04210     // optimizations will transparently handle this case.
04211     //
04212     // In order for this transformation to be safe, the LHS must be of the
04213     // form X*(2^n) and the Or constant must be less than 2^n.
04214     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
04215       const SCEV *LHS = getSCEV(U->getOperand(0));
04216       const APInt &CIVal = CI->getValue();
04217       if (GetMinTrailingZeros(LHS) >=
04218           (CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
04219         // Build a plain add SCEV.
04220         const SCEV *S = getAddExpr(LHS, getSCEV(CI));
04221         // If the LHS of the add was an addrec and it has no-wrap flags,
04222         // transfer the no-wrap flags, since an or won't introduce a wrap.
04223         if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) {
04224           const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS);
04225           const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags(
04226             OldAR->getNoWrapFlags());
04227         }
04228         return S;
04229       }
04230     }
04231     break;
04232   case Instruction::Xor:
04233     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
04234       // If the RHS of the xor is a signbit, then this is just an add.
04235       // Instcombine turns add of signbit into xor as a strength reduction step.
04236       if (CI->getValue().isSignBit())
04237         return getAddExpr(getSCEV(U->getOperand(0)),
04238                           getSCEV(U->getOperand(1)));
04239 
04240       // If the RHS of xor is -1, then this is a not operation.
04241       if (CI->isAllOnesValue())
04242         return getNotSCEV(getSCEV(U->getOperand(0)));
04243 
04244       // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask.
04245       // This is a variant of the check for xor with -1, and it handles
04246       // the case where instcombine has trimmed non-demanded bits out
04247       // of an xor with -1.
04248       if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0)))
04249         if (ConstantInt *LCI = dyn_cast<ConstantInt>(BO->getOperand(1)))
04250           if (BO->getOpcode() == Instruction::And &&
04251               LCI->getValue() == CI->getValue())
04252             if (const SCEVZeroExtendExpr *Z =
04253                   dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) {
04254               Type *UTy = U->getType();
04255               const SCEV *Z0 = Z->getOperand();
04256               Type *Z0Ty = Z0->getType();
04257               unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
04258 
04259               // If C is a low-bits mask, the zero extend is serving to
04260               // mask off the high bits. Complement the operand and
04261               // re-apply the zext.
04262               if (APIntOps::isMask(Z0TySize, CI->getValue()))
04263                 return getZeroExtendExpr(getNotSCEV(Z0), UTy);
04264 
04265               // If C is a single bit, it may be in the sign-bit position
04266               // before the zero-extend. In this case, represent the xor
04267               // using an add, which is equivalent, and re-apply the zext.
04268               APInt Trunc = CI->getValue().trunc(Z0TySize);
04269               if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
04270                   Trunc.isSignBit())
04271                 return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
04272                                          UTy);
04273             }
04274     }
04275     break;
04276 
04277   case Instruction::Shl:
04278     // Turn shift left of a constant amount into a multiply.
04279     if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
04280       uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
04281 
04282       // If the shift count is not less than the bitwidth, the result of
04283       // the shift is undefined. Don't try to analyze it, because the
04284       // resolution chosen here may differ from the resolution chosen in
04285       // other parts of the compiler.
04286       if (SA->getValue().uge(BitWidth))
04287         break;
04288 
04289       Constant *X = ConstantInt::get(getContext(),
04290         APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
04291       return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
04292     }
04293     break;
04294 
04295   case Instruction::LShr:
04296     // Turn logical shift right of a constant into a unsigned divide.
04297     if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
04298       uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
04299 
04300       // If the shift count is not less than the bitwidth, the result of
04301       // the shift is undefined. Don't try to analyze it, because the
04302       // resolution chosen here may differ from the resolution chosen in
04303       // other parts of the compiler.
04304       if (SA->getValue().uge(BitWidth))
04305         break;
04306 
04307       Constant *X = ConstantInt::get(getContext(),
04308         APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
04309       return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X));
04310     }
04311     break;
04312 
04313   case Instruction::AShr:
04314     // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression.
04315     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1)))
04316       if (Operator *L = dyn_cast<Operator>(U->getOperand(0)))
04317         if (L->getOpcode() == Instruction::Shl &&
04318             L->getOperand(1) == U->getOperand(1)) {
04319           uint64_t BitWidth = getTypeSizeInBits(U->getType());
04320 
04321           // If the shift count is not less than the bitwidth, the result of
04322           // the shift is undefined. Don't try to analyze it, because the
04323           // resolution chosen here may differ from the resolution chosen in
04324           // other parts of the compiler.
04325           if (CI->getValue().uge(BitWidth))
04326             break;
04327 
04328           uint64_t Amt = BitWidth - CI->getZExtValue();
04329           if (Amt == BitWidth)
04330             return getSCEV(L->getOperand(0));       // shift by zero --> noop
04331           return
04332             getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)),
04333                                               IntegerType::get(getContext(),
04334                                                                Amt)),
04335                               U->getType());
04336         }
04337     break;
04338 
04339   case Instruction::Trunc:
04340     return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType());
04341 
04342   case Instruction::ZExt:
04343     return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType());
04344 
04345   case Instruction::SExt:
04346     return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType());
04347 
04348   case Instruction::BitCast:
04349     // BitCasts are no-op casts so we just eliminate the cast.
04350     if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType()))
04351       return getSCEV(U->getOperand(0));
04352     break;
04353 
04354   // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can
04355   // lead to pointer expressions which cannot safely be expanded to GEPs,
04356   // because ScalarEvolution doesn't respect the GEP aliasing rules when
04357   // simplifying integer expressions.
04358 
04359   case Instruction::GetElementPtr:
04360     return createNodeForGEP(cast<GEPOperator>(U));
04361 
04362   case Instruction::PHI:
04363     return createNodeForPHI(cast<PHINode>(U));
04364 
04365   case Instruction::Select:
04366     // This could be a smax or umax that was lowered earlier.
04367     // Try to recover it.
04368     if (ICmpInst *ICI = dyn_cast<ICmpInst>(U->getOperand(0))) {
04369       Value *LHS = ICI->getOperand(0);
04370       Value *RHS = ICI->getOperand(1);
04371       switch (ICI->getPredicate()) {
04372       case ICmpInst::ICMP_SLT:
04373       case ICmpInst::ICMP_SLE:
04374         std::swap(LHS, RHS);
04375         // fall through
04376       case ICmpInst::ICMP_SGT:
04377       case ICmpInst::ICMP_SGE:
04378         // a >s b ? a+x : b+x  ->  smax(a, b)+x
04379         // a >s b ? b+x : a+x  ->  smin(a, b)+x
04380         if (getTypeSizeInBits(LHS->getType()) <=
04381             getTypeSizeInBits(U->getType())) {
04382           const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), U->getType());
04383           const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), U->getType());
04384           const SCEV *LA = getSCEV(U->getOperand(1));
04385           const SCEV *RA = getSCEV(U->getOperand(2));
04386           const SCEV *LDiff = getMinusSCEV(LA, LS);
04387           const SCEV *RDiff = getMinusSCEV(RA, RS);
04388           if (LDiff == RDiff)
04389             return getAddExpr(getSMaxExpr(LS, RS), LDiff);
04390           LDiff = getMinusSCEV(LA, RS);
04391           RDiff = getMinusSCEV(RA, LS);
04392           if (LDiff == RDiff)
04393             return getAddExpr(getSMinExpr(LS, RS), LDiff);
04394         }
04395         break;
04396       case ICmpInst::ICMP_ULT:
04397       case ICmpInst::ICMP_ULE:
04398         std::swap(LHS, RHS);
04399         // fall through
04400       case ICmpInst::ICMP_UGT:
04401       case ICmpInst::ICMP_UGE:
04402         // a >u b ? a+x : b+x  ->  umax(a, b)+x
04403         // a >u b ? b+x : a+x  ->  umin(a, b)+x
04404         if (getTypeSizeInBits(LHS->getType()) <=
04405             getTypeSizeInBits(U->getType())) {
04406           const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
04407           const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), U->getType());
04408           const SCEV *LA = getSCEV(U->getOperand(1));
04409           const SCEV *RA = getSCEV(U->getOperand(2));
04410           const SCEV *LDiff = getMinusSCEV(LA, LS);
04411           const SCEV *RDiff = getMinusSCEV(RA, RS);
04412           if (LDiff == RDiff)
04413             return getAddExpr(getUMaxExpr(LS, RS), LDiff);
04414           LDiff = getMinusSCEV(LA, RS);
04415           RDiff = getMinusSCEV(RA, LS);
04416           if (LDiff == RDiff)
04417             return getAddExpr(getUMinExpr(LS, RS), LDiff);
04418         }
04419         break;
04420       case ICmpInst::ICMP_NE:
04421         // n != 0 ? n+x : 1+x  ->  umax(n, 1)+x
04422         if (getTypeSizeInBits(LHS->getType()) <=
04423                 getTypeSizeInBits(U->getType()) &&
04424             isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
04425           const SCEV *One = getConstant(U->getType(), 1);
04426           const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
04427           const SCEV *LA = getSCEV(U->getOperand(1));
04428           const SCEV *RA = getSCEV(U->getOperand(2));
04429           const SCEV *LDiff = getMinusSCEV(LA, LS);
04430           const SCEV *RDiff = getMinusSCEV(RA, One);
04431           if (LDiff == RDiff)
04432             return getAddExpr(getUMaxExpr(One, LS), LDiff);
04433         }
04434         break;
04435       case ICmpInst::ICMP_EQ:
04436         // n == 0 ? 1+x : n+x  ->  umax(n, 1)+x
04437         if (getTypeSizeInBits(LHS->getType()) <=
04438                 getTypeSizeInBits(U->getType()) &&
04439             isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
04440           const SCEV *One = getConstant(U->getType(), 1);
04441           const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
04442           const SCEV *LA = getSCEV(U->getOperand(1));
04443           const SCEV *RA = getSCEV(U->getOperand(2));
04444           const SCEV *LDiff = getMinusSCEV(LA, One);
04445           const SCEV *RDiff = getMinusSCEV(RA, LS);
04446           if (LDiff == RDiff)
04447             return getAddExpr(getUMaxExpr(One, LS), LDiff);
04448         }
04449         break;
04450       default:
04451         break;
04452       }
04453     }
04454 
04455   default: // We cannot analyze this expression.
04456     break;
04457   }
04458 
04459   return getUnknown(V);
04460 }
04461 
04462 
04463 
04464 //===----------------------------------------------------------------------===//
04465 //                   Iteration Count Computation Code
04466 //
04467 
04468 unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L) {
04469   if (BasicBlock *ExitingBB = L->getExitingBlock())
04470     return getSmallConstantTripCount(L, ExitingBB);
04471 
04472   // No trip count information for multiple exits.
04473   return 0;
04474 }
04475 
04476 /// getSmallConstantTripCount - Returns the maximum trip count of this loop as a
04477 /// normal unsigned value. Returns 0 if the trip count is unknown or not
04478 /// constant. Will also return 0 if the maximum trip count is very large (>=
04479 /// 2^32).
04480 ///
04481 /// This "trip count" assumes that control exits via ExitingBlock. More
04482 /// precisely, it is the number of times that control may reach ExitingBlock
04483 /// before taking the branch. For loops with multiple exits, it may not be the
04484 /// number times that the loop header executes because the loop may exit
04485 /// prematurely via another branch.
04486 unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
04487                                                     BasicBlock *ExitingBlock) {
04488   assert(ExitingBlock && "Must pass a non-null exiting block!");
04489   assert(L->isLoopExiting(ExitingBlock) &&
04490          "Exiting block must actually branch out of the loop!");
04491   const SCEVConstant *ExitCount =
04492       dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
04493   if (!ExitCount)
04494     return 0;
04495 
04496   ConstantInt *ExitConst = ExitCount->getValue();
04497 
04498   // Guard against huge trip counts.
04499   if (ExitConst->getValue().getActiveBits() > 32)
04500     return 0;
04501 
04502   // In case of integer overflow, this returns 0, which is correct.
04503   return ((unsigned)ExitConst->getZExtValue()) + 1;
04504 }
04505 
04506 unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L) {
04507   if (BasicBlock *ExitingBB = L->getExitingBlock())
04508     return getSmallConstantTripMultiple(L, ExitingBB);
04509 
04510   // No trip multiple information for multiple exits.
04511   return 0;
04512 }
04513 
04514 /// getSmallConstantTripMultiple - Returns the largest constant divisor of the
04515 /// trip count of this loop as a normal unsigned value, if possible. This
04516 /// means that the actual trip count is always a multiple of the returned
04517 /// value (don't forget the trip count could very well be zero as well!).
04518 ///
04519 /// Returns 1 if the trip count is unknown or not guaranteed to be the
04520 /// multiple of a constant (which is also the case if the trip count is simply
04521 /// constant, use getSmallConstantTripCount for that case), Will also return 1
04522 /// if the trip count is very large (>= 2^32).
04523 ///
04524 /// As explained in the comments for getSmallConstantTripCount, this assumes
04525 /// that control exits the loop via ExitingBlock.
04526 unsigned
04527 ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
04528                                               BasicBlock *ExitingBlock) {
04529   assert(ExitingBlock && "Must pass a non-null exiting block!");
04530   assert(L->isLoopExiting(ExitingBlock) &&
04531          "Exiting block must actually branch out of the loop!");
04532   const SCEV *ExitCount = getExitCount(L, ExitingBlock);
04533   if (ExitCount == getCouldNotCompute())
04534     return 1;
04535 
04536   // Get the trip count from the BE count by adding 1.
04537   const SCEV *TCMul = getAddExpr(ExitCount,
04538                                  getConstant(ExitCount->getType(), 1));
04539   // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt
04540   // to factor simple cases.
04541   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul))
04542     TCMul = Mul->getOperand(0);
04543 
04544   const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul);
04545   if (!MulC)
04546     return 1;
04547 
04548   ConstantInt *Result = MulC->getValue();
04549 
04550   // Guard against huge trip counts (this requires checking
04551   // for zero to handle the case where the trip count == -1 and the
04552   // addition wraps).
04553   if (!Result || Result->getValue().getActiveBits() > 32 ||
04554       Result->getValue().getActiveBits() == 0)
04555     return 1;
04556 
04557   return (unsigned)Result->getZExtValue();
04558 }
04559 
04560 // getExitCount - Get the expression for the number of loop iterations for which
04561 // this loop is guaranteed not to exit via ExitingBlock. Otherwise return
04562 // SCEVCouldNotCompute.
04563 const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) {
04564   return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
04565 }
04566 
04567 /// getBackedgeTakenCount - If the specified loop has a predictable
04568 /// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
04569 /// object. The backedge-taken count is the number of times the loop header
04570 /// will be branched to from within the loop. This is one less than the
04571 /// trip count of the loop, since it doesn't count the first iteration,
04572 /// when the header is branched to from outside the loop.
04573 ///
04574 /// Note that it is not valid to call this method on a loop without a
04575 /// loop-invariant backedge-taken count (see
04576 /// hasLoopInvariantBackedgeTakenCount).
04577 ///
04578 const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
04579   return getBackedgeTakenInfo(L).getExact(this);
04580 }
04581 
04582 /// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
04583 /// return the least SCEV value that is known never to be less than the
04584 /// actual backedge taken count.
04585 const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
04586   return getBackedgeTakenInfo(L).getMax(this);
04587 }
04588 
04589 /// PushLoopPHIs - Push PHI nodes in the header of the given loop
04590 /// onto the given Worklist.
04591 static void
04592 PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
04593   BasicBlock *Header = L->getHeader();
04594 
04595   // Push all Loop-header PHIs onto the Worklist stack.
04596   for (BasicBlock::iterator I = Header->begin();
04597        PHINode *PN = dyn_cast<PHINode>(I); ++I)
04598     Worklist.push_back(PN);
04599 }
04600 
04601 const ScalarEvolution::BackedgeTakenInfo &
04602 ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
04603   // Initially insert an invalid entry for this loop. If the insertion
04604   // succeeds, proceed to actually compute a backedge-taken count and
04605   // update the value. The temporary CouldNotCompute value tells SCEV
04606   // code elsewhere that it shouldn't attempt to request a new
04607   // backedge-taken count, which could result in infinite recursion.
04608   std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
04609     BackedgeTakenCounts.insert(std::make_pair(L, BackedgeTakenInfo()));
04610   if (!Pair.second)
04611     return Pair.first->second;
04612 
04613   // ComputeBackedgeTakenCount may allocate memory for its result. Inserting it
04614   // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result
04615   // must be cleared in this scope.
04616   BackedgeTakenInfo Result = ComputeBackedgeTakenCount(L);
04617 
04618   if (Result.getExact(this) != getCouldNotCompute()) {
04619     assert(isLoopInvariant(Result.getExact(this), L) &&
04620            isLoopInvariant(Result.getMax(this), L) &&
04621            "Computed backedge-taken count isn't loop invariant for loop!");
04622     ++NumTripCountsComputed;
04623   }
04624   else if (Result.getMax(this) == getCouldNotCompute() &&
04625            isa<PHINode>(L->getHeader()->begin())) {
04626     // Only count loops that have phi nodes as not being computable.
04627     ++NumTripCountsNotComputed;
04628   }
04629 
04630   // Now that we know more about the trip count for this loop, forget any
04631   // existing SCEV values for PHI nodes in this loop since they are only
04632   // conservative estimates made without the benefit of trip count
04633   // information. This is similar to the code in forgetLoop, except that
04634   // it handles SCEVUnknown PHI nodes specially.
04635   if (Result.hasAnyInfo()) {
04636     SmallVector<Instruction *, 16> Worklist;
04637     PushLoopPHIs(L, Worklist);
04638 
04639     SmallPtrSet<Instruction *, 8> Visited;
04640     while (!Worklist.empty()) {
04641       Instruction *I = Worklist.pop_back_val();
04642       if (!Visited.insert(I).second)
04643         continue;
04644 
04645       ValueExprMapType::iterator It =
04646         ValueExprMap.find_as(static_cast<Value *>(I));
04647       if (It != ValueExprMap.end()) {
04648         const SCEV *Old = It->second;
04649 
04650         // SCEVUnknown for a PHI either means that it has an unrecognized
04651         // structure, or it's a PHI that's in the progress of being computed
04652         // by createNodeForPHI.  In the former case, additional loop trip
04653         // count information isn't going to change anything. In the later
04654         // case, createNodeForPHI will perform the necessary updates on its
04655         // own when it gets to that point.
04656         if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
04657           forgetMemoizedResults(Old);
04658           ValueExprMap.erase(It);
04659         }
04660         if (PHINode *PN = dyn_cast<PHINode>(I))
04661           ConstantEvolutionLoopExitValue.erase(PN);
04662       }
04663 
04664       PushDefUseChildren(I, Worklist);
04665     }
04666   }
04667 
04668   // Re-lookup the insert position, since the call to
04669   // ComputeBackedgeTakenCount above could result in a
04670   // recusive call to getBackedgeTakenInfo (on a different
04671   // loop), which would invalidate the iterator computed
04672   // earlier.
04673   return BackedgeTakenCounts.find(L)->second = Result;
04674 }
04675 
04676 /// forgetLoop - This method should be called by the client when it has
04677 /// changed a loop in a way that may effect ScalarEvolution's ability to
04678 /// compute a trip count, or if the loop is deleted.
04679 void ScalarEvolution::forgetLoop(const Loop *L) {
04680   // Drop any stored trip count value.
04681   DenseMap<const Loop*, BackedgeTakenInfo>::iterator BTCPos =
04682     BackedgeTakenCounts.find(L);
04683   if (BTCPos != BackedgeTakenCounts.end()) {
04684     BTCPos->second.clear();
04685     BackedgeTakenCounts.erase(BTCPos);
04686   }
04687 
04688   // Drop information about expressions based on loop-header PHIs.
04689   SmallVector<Instruction *, 16> Worklist;
04690   PushLoopPHIs(L, Worklist);
04691 
04692   SmallPtrSet<Instruction *, 8> Visited;
04693   while (!Worklist.empty()) {
04694     Instruction *I = Worklist.pop_back_val();
04695     if (!Visited.insert(I).second)
04696       continue;
04697 
04698     ValueExprMapType::iterator It =
04699       ValueExprMap.find_as(static_cast<Value *>(I));
04700     if (It != ValueExprMap.end()) {
04701       forgetMemoizedResults(It->second);
04702       ValueExprMap.erase(It);
04703       if (PHINode *PN = dyn_cast<PHINode>(I))
04704         ConstantEvolutionLoopExitValue.erase(PN);
04705     }
04706 
04707     PushDefUseChildren(I, Worklist);
04708   }
04709 
04710   // Forget all contained loops too, to avoid dangling entries in the
04711   // ValuesAtScopes map.
04712   for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
04713     forgetLoop(*I);
04714 }
04715 
04716 /// forgetValue - This method should be called by the client when it has
04717 /// changed a value in a way that may effect its value, or which may
04718 /// disconnect it from a def-use chain linking it to a loop.
04719 void ScalarEvolution::forgetValue(Value *V) {
04720   Instruction *I = dyn_cast<Instruction>(V);
04721   if (!I) return;
04722 
04723   // Drop information about expressions based on loop-header PHIs.
04724   SmallVector<Instruction *, 16> Worklist;
04725   Worklist.push_back(I);
04726 
04727   SmallPtrSet<Instruction *, 8> Visited;
04728   while (!Worklist.empty()) {
04729     I = Worklist.pop_back_val();
04730     if (!Visited.insert(I).second)
04731       continue;
04732 
04733     ValueExprMapType::iterator It =
04734       ValueExprMap.find_as(static_cast<Value *>(I));
04735     if (It != ValueExprMap.end()) {
04736       forgetMemoizedResults(It->second);
04737       ValueExprMap.erase(It);
04738       if (PHINode *PN = dyn_cast<PHINode>(I))
04739         ConstantEvolutionLoopExitValue.erase(PN);
04740     }
04741 
04742     PushDefUseChildren(I, Worklist);
04743   }
04744 }
04745 
04746 /// getExact - Get the exact loop backedge taken count considering all loop
04747 /// exits. A computable result can only be return for loops with a single exit.
04748 /// Returning the minimum taken count among all exits is incorrect because one
04749 /// of the loop's exit limit's may have been skipped. HowFarToZero assumes that
04750 /// the limit of each loop test is never skipped. This is a valid assumption as
04751 /// long as the loop exits via that test. For precise results, it is the
04752 /// caller's responsibility to specify the relevant loop exit using
04753 /// getExact(ExitingBlock, SE).
04754 const SCEV *
04755 ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
04756   // If any exits were not computable, the loop is not computable.
04757   if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute();
04758 
04759   // We need exactly one computable exit.
04760   if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
04761   assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
04762 
04763   const SCEV *BECount = nullptr;
04764   for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
04765        ENT != nullptr; ENT = ENT->getNextExit()) {
04766 
04767     assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
04768 
04769     if (!BECount)
04770       BECount = ENT->ExactNotTaken;
04771     else if (BECount != ENT->ExactNotTaken)
04772       return SE->getCouldNotCompute();
04773   }
04774   assert(BECount && "Invalid not taken count for loop exit");
04775   return BECount;
04776 }
04777 
04778 /// getExact - Get the exact not taken count for this loop exit.
04779 const SCEV *
04780 ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
04781                                              ScalarEvolution *SE) const {
04782   for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
04783        ENT != nullptr; ENT = ENT->getNextExit()) {
04784 
04785     if (ENT->ExitingBlock == ExitingBlock)
04786       return ENT->ExactNotTaken;
04787   }
04788   return SE->getCouldNotCompute();
04789 }
04790 
04791 /// getMax - Get the max backedge taken count for the loop.
04792 const SCEV *
04793 ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
04794   return Max ? Max : SE->getCouldNotCompute();
04795 }
04796 
04797 bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
04798                                                     ScalarEvolution *SE) const {
04799   if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S))
04800     return true;
04801 
04802   if (!ExitNotTaken.ExitingBlock)
04803     return false;
04804 
04805   for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
04806        ENT != nullptr; ENT = ENT->getNextExit()) {
04807 
04808     if (ENT->ExactNotTaken != SE->getCouldNotCompute()
04809         && SE->hasOperand(ENT->ExactNotTaken, S)) {
04810       return true;
04811     }
04812   }
04813   return false;
04814 }
04815 
04816 /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
04817 /// computable exit into a persistent ExitNotTakenInfo array.
04818 ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
04819   SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts,
04820   bool Complete, const SCEV *MaxCount) : Max(MaxCount) {
04821 
04822   if (!Complete)
04823     ExitNotTaken.setIncomplete();
04824 
04825   unsigned NumExits = ExitCounts.size();
04826   if (NumExits == 0) return;
04827 
04828   ExitNotTaken.ExitingBlock = ExitCounts[0].first;
04829   ExitNotTaken.ExactNotTaken = ExitCounts[0].second;
04830   if (NumExits == 1) return;
04831 
04832   // Handle the rare case of multiple computable exits.
04833   ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits-1];
04834 
04835   ExitNotTakenInfo *PrevENT = &ExitNotTaken;
04836   for (unsigned i = 1; i < NumExits; ++i, PrevENT = ENT, ++ENT) {
04837     PrevENT->setNextExit(ENT);
04838     ENT->ExitingBlock = ExitCounts[i].first;
04839     ENT->ExactNotTaken = ExitCounts[i].second;
04840   }
04841 }
04842 
04843 /// clear - Invalidate this result and free the ExitNotTakenInfo array.
04844 void ScalarEvolution::BackedgeTakenInfo::clear() {
04845   ExitNotTaken.ExitingBlock = nullptr;
04846   ExitNotTaken.ExactNotTaken = nullptr;
04847   delete[] ExitNotTaken.getNextExit();
04848 }
04849 
04850 /// ComputeBackedgeTakenCount - Compute the number of times the backedge
04851 /// of the specified loop will execute.
04852 ScalarEvolution::BackedgeTakenInfo
04853 ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
04854   SmallVector<BasicBlock *, 8> ExitingBlocks;
04855   L->getExitingBlocks(ExitingBlocks);
04856 
04857   SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
04858   bool CouldComputeBECount = true;
04859   BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
04860   const SCEV *MustExitMaxBECount = nullptr;
04861   const SCEV *MayExitMaxBECount = nullptr;
04862 
04863   // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts
04864   // and compute maxBECount.
04865   for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
04866     BasicBlock *ExitBB = ExitingBlocks[i];
04867     ExitLimit EL = ComputeExitLimit(L, ExitBB);
04868 
04869     // 1. For each exit that can be computed, add an entry to ExitCounts.
04870     // CouldComputeBECount is true only if all exits can be computed.
04871     if (EL.Exact == getCouldNotCompute())
04872       // We couldn't compute an exact value for this exit, so
04873       // we won't be able to compute an exact value for the loop.
04874       CouldComputeBECount = false;
04875     else
04876       ExitCounts.push_back(std::make_pair(ExitBB, EL.Exact));
04877 
04878     // 2. Derive the loop's MaxBECount from each exit's max number of
04879     // non-exiting iterations. Partition the loop exits into two kinds:
04880     // LoopMustExits and LoopMayExits.
04881     //
04882     // If the exit dominates the loop latch, it is a LoopMustExit otherwise it
04883     // is a LoopMayExit.  If any computable LoopMustExit is found, then
04884     // MaxBECount is the minimum EL.Max of computable LoopMustExits. Otherwise,
04885     // MaxBECount is conservatively the maximum EL.Max, where CouldNotCompute is
04886     // considered greater than any computable EL.Max.
04887     if (EL.Max != getCouldNotCompute() && Latch &&
04888         DT->dominates(ExitBB, Latch)) {
04889       if (!MustExitMaxBECount)
04890         MustExitMaxBECount = EL.Max;
04891       else {
04892         MustExitMaxBECount =
04893           getUMinFromMismatchedTypes(MustExitMaxBECount, EL.Max);
04894       }
04895     } else if (MayExitMaxBECount != getCouldNotCompute()) {
04896       if (!MayExitMaxBECount || EL.Max == getCouldNotCompute())
04897         MayExitMaxBECount = EL.Max;
04898       else {
04899         MayExitMaxBECount =
04900           getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.Max);
04901       }
04902     }
04903   }
04904   const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount :
04905     (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute());
04906   return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
04907 }
04908 
04909 /// ComputeExitLimit - Compute the number of times the backedge of the specified
04910 /// loop will execute if it exits via the specified block.
04911 ScalarEvolution::ExitLimit
04912 ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
04913 
04914   // Okay, we've chosen an exiting block.  See what condition causes us to
04915   // exit at this block and remember the exit block and whether all other targets
04916   // lead to the loop header.
04917   bool MustExecuteLoopHeader = true;
04918   BasicBlock *Exit = nullptr;
04919   for (succ_iterator SI = succ_begin(ExitingBlock), SE = succ_end(ExitingBlock);
04920        SI != SE; ++SI)
04921     if (!L->contains(*SI)) {
04922       if (Exit) // Multiple exit successors.
04923         return getCouldNotCompute();
04924       Exit = *SI;
04925     } else if (*SI != L->getHeader()) {
04926       MustExecuteLoopHeader = false;
04927     }
04928 
04929   // At this point, we know we have a conditional branch that determines whether
04930   // the loop is exited.  However, we don't know if the branch is executed each
04931   // time through the loop.  If not, then the execution count of the branch will
04932   // not be equal to the trip count of the loop.
04933   //
04934   // Currently we check for this by checking to see if the Exit branch goes to
04935   // the loop header.  If so, we know it will always execute the same number of
04936   // times as the loop.  We also handle the case where the exit block *is* the
04937   // loop header.  This is common for un-rotated loops.
04938   //
04939   // If both of those tests fail, walk up the unique predecessor chain to the
04940   // header, stopping if there is an edge that doesn't exit the loop. If the
04941   // header is reached, the execution count of the branch will be equal to the
04942   // trip count of the loop.
04943   //
04944   //  More extensive analysis could be done to handle more cases here.
04945   //
04946   if (!MustExecuteLoopHeader && ExitingBlock != L->getHeader()) {
04947     // The simple checks failed, try climbing the unique predecessor chain
04948     // up to the header.
04949     bool Ok = false;
04950     for (BasicBlock *BB = ExitingBlock; BB; ) {
04951       BasicBlock *Pred = BB->getUniquePredecessor();
04952       if (!Pred)
04953         return getCouldNotCompute();
04954       TerminatorInst *PredTerm = Pred->getTerminator();
04955       for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) {
04956         BasicBlock *PredSucc = PredTerm->getSuccessor(i);
04957         if (PredSucc == BB)
04958           continue;
04959         // If the predecessor has a successor that isn't BB and isn't
04960         // outside the loop, assume the worst.
04961         if (L->contains(PredSucc))
04962           return getCouldNotCompute();
04963       }
04964       if (Pred == L->getHeader()) {
04965         Ok = true;
04966         break;
04967       }
04968       BB = Pred;
04969     }
04970     if (!Ok)
04971       return getCouldNotCompute();
04972   }
04973 
04974   bool IsOnlyExit = (L->getExitingBlock() != nullptr);
04975   TerminatorInst *Term = ExitingBlock->getTerminator();
04976   if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
04977     assert(BI->isConditional() && "If unconditional, it can't be in loop!");
04978     // Proceed to the next level to examine the exit condition expression.
04979     return ComputeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0),
04980                                     BI->getSuccessor(1),
04981                                     /*ControlsExit=*/IsOnlyExit);
04982   }
04983 
04984   if (SwitchInst *SI = dyn_cast<SwitchInst>(Term))
04985     return ComputeExitLimitFromSingleExitSwitch(L, SI, Exit,
04986                                                 /*ControlsExit=*/IsOnlyExit);
04987 
04988   return getCouldNotCompute();
04989 }
04990 
04991 /// ComputeExitLimitFromCond - Compute the number of times the
04992 /// backedge of the specified loop will execute if its exit condition
04993 /// were a conditional branch of ExitCond, TBB, and FBB.
04994 ///
04995 /// @param ControlsExit is true if ExitCond directly controls the exit
04996 /// branch. In this case, we can assume that the loop exits only if the
04997 /// condition is true and can infer that failing to meet the condition prior to
04998 /// integer wraparound results in undefined behavior.
04999 ScalarEvolution::ExitLimit
05000 ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
05001                                           Value *ExitCond,
05002                                           BasicBlock *TBB,
05003                                           BasicBlock *FBB,
05004                                           bool ControlsExit) {
05005   // Check if the controlling expression for this loop is an And or Or.
05006   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
05007     if (BO->getOpcode() == Instruction::And) {
05008       // Recurse on the operands of the and.
05009       bool EitherMayExit = L->contains(TBB);
05010       ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
05011                                                ControlsExit && !EitherMayExit);
05012       ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
05013                                                ControlsExit && !EitherMayExit);
05014       const SCEV *BECount = getCouldNotCompute();
05015       const SCEV *MaxBECount = getCouldNotCompute();
05016       if (EitherMayExit) {
05017         // Both conditions must be true for the loop to continue executing.
05018         // Choose the less conservative count.
05019         if (EL0.Exact == getCouldNotCompute() ||
05020             EL1.Exact == getCouldNotCompute())
05021           BECount = getCouldNotCompute();
05022         else
05023           BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
05024         if (EL0.Max == getCouldNotCompute())
05025           MaxBECount = EL1.Max;
05026         else if (EL1.Max == getCouldNotCompute())
05027           MaxBECount = EL0.Max;
05028         else
05029           MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
05030       } else {
05031         // Both conditions must be true at the same time for the loop to exit.
05032         // For now, be conservative.
05033         assert(L->contains(FBB) && "Loop block has no successor in loop!");
05034         if (EL0.Max == EL1.Max)
05035           MaxBECount = EL0.Max;
05036         if (EL0.Exact == EL1.Exact)
05037           BECount = EL0.Exact;
05038       }
05039 
05040       return ExitLimit(BECount, MaxBECount);
05041     }
05042     if (BO->getOpcode() == Instruction::Or) {
05043       // Recurse on the operands of the or.
05044       bool EitherMayExit = L->contains(FBB);
05045       ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
05046                                                ControlsExit && !EitherMayExit);
05047       ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
05048                                                ControlsExit && !EitherMayExit);
05049       const SCEV *BECount = getCouldNotCompute();
05050       const SCEV *MaxBECount = getCouldNotCompute();
05051       if (EitherMayExit) {
05052         // Both conditions must be false for the loop to continue executing.
05053         // Choose the less conservative count.
05054         if (EL0.Exact == getCouldNotCompute() ||
05055             EL1.Exact == getCouldNotCompute())
05056           BECount = getCouldNotCompute();
05057         else
05058           BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
05059         if (EL0.Max == getCouldNotCompute())
05060           MaxBECount = EL1.Max;
05061         else if (EL1.Max == getCouldNotCompute())
05062           MaxBECount = EL0.Max;
05063         else
05064           MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
05065       } else {
05066         // Both conditions must be false at the same time for the loop to exit.
05067         // For now, be conservative.
05068         assert(L->contains(TBB) && "Loop block has no successor in loop!");
05069         if (EL0.Max == EL1.Max)
05070           MaxBECount = EL0.Max;
05071         if (EL0.Exact == EL1.Exact)
05072           BECount = EL0.Exact;
05073       }
05074 
05075       return ExitLimit(BECount, MaxBECount);
05076     }
05077   }
05078 
05079   // With an icmp, it may be feasible to compute an exact backedge-taken count.
05080   // Proceed to the next level to examine the icmp.
05081   if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
05082     return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);
05083 
05084   // Check for a constant condition. These are normally stripped out by
05085   // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
05086   // preserve the CFG and is temporarily leaving constant conditions
05087   // in place.
05088   if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
05089     if (L->contains(FBB) == !CI->getZExtValue())
05090       // The backedge is always taken.
05091       return getCouldNotCompute();
05092     else
05093       // The backedge is never taken.
05094       return getConstant(CI->getType(), 0);
05095   }
05096 
05097   // If it's not an integer or pointer comparison then compute it the hard way.
05098   return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
05099 }
05100 
05101 /// ComputeExitLimitFromICmp - Compute the number of times the
05102 /// backedge of the specified loop will execute if its exit condition
05103 /// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
05104 ScalarEvolution::ExitLimit
05105 ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
05106                                           ICmpInst *ExitCond,
05107                                           BasicBlock *TBB,
05108                                           BasicBlock *FBB,
05109                                           bool ControlsExit) {
05110 
05111   // If the condition was exit on true, convert the condition to exit on false
05112   ICmpInst::Predicate Cond;
05113   if (!L->contains(FBB))
05114     Cond = ExitCond->getPredicate();
05115   else
05116     Cond = ExitCond->getInversePredicate();
05117 
05118   // Handle common loops like: for (X = "string"; *X; ++X)
05119   if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
05120     if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
05121       ExitLimit ItCnt =
05122         ComputeLoadConstantCompareExitLimit(LI, RHS, L, Cond);
05123       if (ItCnt.hasAnyInfo())
05124         return ItCnt;
05125     }
05126 
05127   const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
05128   const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
05129 
05130   // Try to evaluate any dependencies out of the loop.
05131   LHS = getSCEVAtScope(LHS, L);
05132   RHS = getSCEVAtScope(RHS, L);
05133 
05134   // At this point, we would like to compute how many iterations of the
05135   // loop the predicate will return true for these inputs.
05136   if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) {
05137     // If there is a loop-invariant, force it into the RHS.
05138     std::swap(LHS, RHS);
05139     Cond = ICmpInst::getSwappedPredicate(Cond);
05140   }
05141 
05142   // Simplify the operands before analyzing them.
05143   (void)SimplifyICmpOperands(Cond, LHS, RHS);
05144 
05145   // If we have a comparison of a chrec against a constant, try to use value
05146   // ranges to answer this query.
05147   if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS))
05148     if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS))
05149       if (AddRec->getLoop() == L) {
05150         // Form the constant range.
05151         ConstantRange CompRange(
05152             ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue()));
05153 
05154         const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
05155         if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
05156       }
05157 
05158   switch (Cond) {
05159   case ICmpInst::ICMP_NE: {                     // while (X != Y)
05160     // Convert to: while (X-Y != 0)
05161     ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
05162     if (EL.hasAnyInfo()) return EL;
05163     break;
05164   }
05165   case ICmpInst::ICMP_EQ: {                     // while (X == Y)
05166     // Convert to: while (X-Y == 0)
05167     ExitLimit EL = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
05168     if (EL.hasAnyInfo()) return EL;
05169     break;
05170   }
05171   case ICmpInst::ICMP_SLT:
05172   case ICmpInst::ICMP_ULT: {                    // while (X < Y)
05173     bool IsSigned = Cond == ICmpInst::ICMP_SLT;
05174     ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit);
05175     if (EL.hasAnyInfo()) return EL;
05176     break;
05177   }
05178   case ICmpInst::ICMP_SGT:
05179   case ICmpInst::ICMP_UGT: {                    // while (X > Y)
05180     bool IsSigned = Cond == ICmpInst::ICMP_SGT;
05181     ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit);
05182     if (EL.hasAnyInfo()) return EL;
05183     break;
05184   }
05185   default:
05186 #if 0
05187     dbgs() << "ComputeBackedgeTakenCount ";
05188     if (ExitCond->getOperand(0)->getType()->isUnsigned())
05189       dbgs() << "[unsigned] ";
05190     dbgs() << *LHS << "   "
05191          << Instruction::getOpcodeName(Instruction::ICmp)
05192          << "   " << *RHS << "\n";
05193 #endif
05194     break;
05195   }
05196   return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
05197 }
05198 
05199 ScalarEvolution::ExitLimit
05200 ScalarEvolution::ComputeExitLimitFromSingleExitSwitch(const Loop *L,
05201                                                       SwitchInst *Switch,
05202                                                       BasicBlock *ExitingBlock,
05203                                                       bool ControlsExit) {
05204   assert(!L->contains(ExitingBlock) && "Not an exiting block!");
05205 
05206   // Give up if the exit is the default dest of a switch.
05207   if (Switch->getDefaultDest() == ExitingBlock)
05208     return getCouldNotCompute();
05209 
05210   assert(L->contains(Switch->getDefaultDest()) &&
05211          "Default case must not exit the loop!");
05212   const SCEV *LHS = getSCEVAtScope(Switch->getCondition(), L);
05213   const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock));
05214 
05215   // while (X != Y) --> while (X-Y != 0)
05216   ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
05217   if (EL.hasAnyInfo())
05218     return EL;
05219 
05220   return getCouldNotCompute();
05221 }
05222 
05223 static ConstantInt *
05224 EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
05225                                 ScalarEvolution &SE) {
05226   const SCEV *InVal = SE.getConstant(C);
05227   const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE);
05228   assert(isa<SCEVConstant>(Val) &&
05229          "Evaluation of SCEV at constant didn't fold correctly?");
05230   return cast<SCEVConstant>(Val)->getValue();
05231 }
05232 
05233 /// ComputeLoadConstantCompareExitLimit - Given an exit condition of
05234 /// 'icmp op load X, cst', try to see if we can compute the backedge
05235 /// execution count.
05236 ScalarEvolution::ExitLimit
05237 ScalarEvolution::ComputeLoadConstantCompareExitLimit(
05238   LoadInst *LI,
05239   Constant *RHS,
05240   const Loop *L,
05241   ICmpInst::Predicate predicate) {
05242 
05243   if (LI->isVolatile()) return getCouldNotCompute();
05244 
05245   // Check to see if the loaded pointer is a getelementptr of a global.
05246   // TODO: Use SCEV instead of manually grubbing with GEPs.
05247   GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
05248   if (!GEP) return getCouldNotCompute();
05249 
05250   // Make sure that it is really a constant global we are gepping, with an
05251   // initializer, and make sure the first IDX is really 0.
05252   GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
05253   if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
05254       GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) ||
05255       !cast<Constant>(GEP->getOperand(1))->isNullValue())
05256     return getCouldNotCompute();
05257 
05258   // Okay, we allow one non-constant index into the GEP instruction.
05259   Value *VarIdx = nullptr;
05260   std::vector<Constant*> Indexes;
05261   unsigned VarIdxNum = 0;
05262   for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
05263     if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
05264       Indexes.push_back(CI);
05265     } else if (!isa<ConstantInt>(GEP->getOperand(i))) {
05266       if (VarIdx) return getCouldNotCompute();  // Multiple non-constant idx's.
05267       VarIdx = GEP->getOperand(i);
05268       VarIdxNum = i-2;
05269       Indexes.push_back(nullptr);
05270     }
05271 
05272   // Loop-invariant loads may be a byproduct of loop optimization. Skip them.
05273   if (!VarIdx)
05274     return getCouldNotCompute();
05275 
05276   // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
05277   // Check to see if X is a loop variant variable value now.
05278   const SCEV *Idx = getSCEV(VarIdx);
05279   Idx = getSCEVAtScope(Idx, L);
05280 
05281   // We can only recognize very limited forms of loop index expressions, in
05282   // particular, only affine AddRec's like {C1,+,C2}.
05283   const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx);
05284   if (!IdxExpr || !IdxExpr->isAffine() || isLoopInvariant(IdxExpr, L) ||
05285       !isa<SCEVConstant>(IdxExpr->getOperand(0)) ||
05286       !isa<SCEVConstant>(IdxExpr->getOperand(1)))
05287     return getCouldNotCompute();
05288 
05289   unsigned MaxSteps = MaxBruteForceIterations;
05290   for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
05291     ConstantInt *ItCst = ConstantInt::get(
05292                            cast<IntegerType>(IdxExpr->getType()), IterationNum);
05293     ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
05294 
05295     // Form the GEP offset.
05296     Indexes[VarIdxNum] = Val;
05297 
05298     Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(),
05299                                                          Indexes);
05300     if (!Result) break;  // Cannot compute!
05301 
05302     // Evaluate the condition for this iteration.
05303     Result = ConstantExpr::getICmp(predicate, Result, RHS);
05304     if (!isa<ConstantInt>(Result)) break;  // Couldn't decide for sure
05305     if (cast<ConstantInt>(Result)->getValue().isMinValue()) {
05306 #if 0
05307       dbgs() << "\n***\n*** Computed loop count " << *ItCst
05308              << "\n*** From global " << *GV << "*** BB: " << *L->getHeader()
05309              << "***\n";
05310 #endif
05311       ++NumArrayLenItCounts;
05312       return getConstant(ItCst);   // Found terminating iteration!
05313     }
05314   }
05315   return getCouldNotCompute();
05316 }
05317 
05318 
05319 /// CanConstantFold - Return true if we can constant fold an instruction of the
05320 /// specified type, assuming that all operands were constants.
05321 static bool CanConstantFold(const Instruction *I) {
05322   if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
05323       isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
05324       isa<LoadInst>(I))
05325     return true;
05326 
05327   if (const CallInst *CI = dyn_cast<CallInst>(I))
05328     if (const Function *F = CI->getCalledFunction())
05329       return canConstantFoldCallTo(F);
05330   return false;
05331 }
05332 
05333 /// Determine whether this instruction can constant evolve within this loop
05334 /// assuming its operands can all constant evolve.
05335 static bool canConstantEvolve(Instruction *I, const Loop *L) {
05336   // An instruction outside of the loop can't be derived from a loop PHI.
05337   if (!L->contains(I)) return false;
05338 
05339   if (isa<PHINode>(I)) {
05340     // We don't currently keep track of the control flow needed to evaluate
05341     // PHIs, so we cannot handle PHIs inside of loops.
05342     return L->getHeader() == I->getParent();
05343   }
05344 
05345   // If we won't be able to constant fold this expression even if the operands
05346   // are constants, bail early.
05347   return CanConstantFold(I);
05348 }
05349 
05350 /// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by
05351 /// recursing through each instruction operand until reaching a loop header phi.
05352 static PHINode *
05353 getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
05354                                DenseMap<Instruction *, PHINode *> &PHIMap) {
05355 
05356   // Otherwise, we can evaluate this instruction if all of its operands are
05357   // constant or derived from a PHI node themselves.
05358   PHINode *PHI = nullptr;
05359   for (Instruction::op_iterator OpI = UseInst->op_begin(),
05360          OpE = UseInst->op_end(); OpI != OpE; ++OpI) {
05361 
05362     if (isa<Constant>(*OpI)) continue;
05363 
05364     Instruction *OpInst = dyn_cast<Instruction>(*OpI);
05365     if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr;
05366 
05367     PHINode *P = dyn_cast<PHINode>(OpInst);
05368     if (!P)
05369       // If this operand is already visited, reuse the prior result.
05370       // We may have P != PHI if this is the deepest point at which the
05371       // inconsistent paths meet.
05372       P = PHIMap.lookup(OpInst);
05373     if (!P) {
05374       // Recurse and memoize the results, whether a phi is found or not.
05375       // This recursive call invalidates pointers into PHIMap.
05376       P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap);
05377       PHIMap[OpInst] = P;
05378     }
05379     if (!P)
05380       return nullptr;  // Not evolving from PHI
05381     if (PHI && PHI != P)
05382       return nullptr;  // Evolving from multiple different PHIs.
05383     PHI = P;
05384   }
05385   // This is a expression evolving from a constant PHI!
05386   return PHI;
05387 }
05388 
05389 /// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
05390 /// in the loop that V is derived from.  We allow arbitrary operations along the
05391 /// way, but the operands of an operation must either be constants or a value
05392 /// derived from a constant PHI.  If this expression does not fit with these
05393 /// constraints, return null.
05394 static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
05395   Instruction *I = dyn_cast<Instruction>(V);
05396   if (!I || !canConstantEvolve(I, L)) return nullptr;
05397 
05398   if (PHINode *PN = dyn_cast<PHINode>(I)) {
05399     return PN;
05400   }
05401 
05402   // Record non-constant instructions contained by the loop.
05403   DenseMap<Instruction *, PHINode *> PHIMap;
05404   return getConstantEvolvingPHIOperands(I, L, PHIMap);
05405 }
05406 
05407 /// EvaluateExpression - Given an expression that passes the
05408 /// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
05409 /// in the loop has the value PHIVal.  If we can't fold this expression for some
05410 /// reason, return null.
05411 static Constant *EvaluateExpression(Value *V, const Loop *L,
05412                                     DenseMap<Instruction *, Constant *> &Vals,
05413                                     const DataLayout &DL,
05414                                     const TargetLibraryInfo *TLI) {
05415   // Convenient constant check, but redundant for recursive calls.
05416   if (Constant *C = dyn_cast<Constant>(V)) return C;
05417   Instruction *I = dyn_cast<Instruction>(V);
05418   if (!I) return nullptr;
05419 
05420   if (Constant *C = Vals.lookup(I)) return C;
05421 
05422   // An instruction inside the loop depends on a value outside the loop that we
05423   // weren't given a mapping for, or a value such as a call inside the loop.
05424   if (!canConstantEvolve(I, L)) return nullptr;
05425 
05426   // An unmapped PHI can be due to a branch or another loop inside this loop,
05427   // or due to this not being the initial iteration through a loop where we
05428   // couldn't compute the evolution of this particular PHI last time.
05429   if (isa<PHINode>(I)) return nullptr;
05430 
05431   std::vector<Constant*> Operands(I->getNumOperands());
05432 
05433   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
05434     Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i));
05435     if (!Operand) {
05436       Operands[i] = dyn_cast<Constant>(I->getOperand(i));
05437       if (!Operands[i]) return nullptr;
05438       continue;
05439     }
05440     Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI);
05441     Vals[Operand] = C;
05442     if (!C) return nullptr;
05443     Operands[i] = C;
05444   }
05445 
05446   if (CmpInst *CI = dyn_cast<CmpInst>(I))
05447     return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
05448                                            Operands[1], DL, TLI);
05449   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
05450     if (!LI->isVolatile())
05451       return ConstantFoldLoadFromConstPtr(Operands[0], DL);
05452   }
05453   return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, DL,
05454                                   TLI);
05455 }
05456 
05457 /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
05458 /// in the header of its containing loop, we know the loop executes a
05459 /// constant number of times, and the PHI node is just a recurrence
05460 /// involving constants, fold it.
05461 Constant *
05462 ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
05463                                                    const APInt &BEs,
05464                                                    const Loop *L) {
05465   DenseMap<PHINode*, Constant*>::const_iterator I =
05466     ConstantEvolutionLoopExitValue.find(PN);
05467   if (I != ConstantEvolutionLoopExitValue.end())
05468     return I->second;
05469 
05470   if (BEs.ugt(MaxBruteForceIterations))
05471     return ConstantEvolutionLoopExitValue[PN] = nullptr;  // Not going to evaluate it.
05472 
05473   Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
05474 
05475   DenseMap<Instruction *, Constant *> CurrentIterVals;
05476   BasicBlock *Header = L->getHeader();
05477   assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
05478 
05479   // Since the loop is canonicalized, the PHI node must have two entries.  One
05480   // entry must be a constant (coming in from outside of the loop), and the
05481   // second must be derived from the same PHI.
05482   bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
05483   PHINode *PHI = nullptr;
05484   for (BasicBlock::iterator I = Header->begin();
05485        (PHI = dyn_cast<PHINode>(I)); ++I) {
05486     Constant *StartCST =
05487       dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
05488     if (!StartCST) continue;
05489     CurrentIterVals[PHI] = StartCST;
05490   }
05491   if (!CurrentIterVals.count(PN))
05492     return RetVal = nullptr;
05493 
05494   Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
05495 
05496   // Execute the loop symbolically to determine the exit value.
05497   if (BEs.getActiveBits() >= 32)
05498     return RetVal = nullptr; // More than 2^32-1 iterations?? Not doing it!
05499 
05500   unsigned NumIterations = BEs.getZExtValue(); // must be in range
05501   unsigned IterationNum = 0;
05502   const DataLayout &DL = F->getParent()->getDataLayout();
05503   for (; ; ++IterationNum) {
05504     if (IterationNum == NumIterations)
05505       return RetVal = CurrentIterVals[PN];  // Got exit value!
05506 
05507     // Compute the value of the PHIs for the next iteration.
05508     // EvaluateExpression adds non-phi values to the CurrentIterVals map.
05509     DenseMap<Instruction *, Constant *> NextIterVals;
05510     Constant *NextPHI =
05511         EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
05512     if (!NextPHI)
05513       return nullptr;        // Couldn't evaluate!
05514     NextIterVals[PN] = NextPHI;
05515 
05516     bool StoppedEvolving = NextPHI == CurrentIterVals[PN];
05517 
05518     // Also evaluate the other PHI nodes.  However, we don't get to stop if we
05519     // cease to be able to evaluate one of them or if they stop evolving,
05520     // because that doesn't necessarily prevent us from computing PN.
05521     SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute;
05522     for (DenseMap<Instruction *, Constant *>::const_iterator
05523            I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
05524       PHINode *PHI = dyn_cast<PHINode>(I->first);
05525       if (!PHI || PHI == PN || PHI->getParent() != Header) continue;
05526       PHIsToCompute.push_back(std::make_pair(PHI, I->second));
05527     }
05528     // We use two distinct loops because EvaluateExpression may invalidate any
05529     // iterators into CurrentIterVals.
05530     for (SmallVectorImpl<std::pair<PHINode *, Constant*> >::const_iterator
05531              I = PHIsToCompute.begin(), E = PHIsToCompute.end(); I != E; ++I) {
05532       PHINode *PHI = I->first;
05533       Constant *&NextPHI = NextIterVals[PHI];
05534       if (!NextPHI) {   // Not already computed.
05535         Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
05536         NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
05537       }
05538       if (NextPHI != I->second)
05539         StoppedEvolving = false;
05540     }
05541 
05542     // If all entries in CurrentIterVals == NextIterVals then we can stop
05543     // iterating, the loop can't continue to change.
05544     if (StoppedEvolving)
05545       return RetVal = CurrentIterVals[PN];
05546 
05547     CurrentIterVals.swap(NextIterVals);
05548   }
05549 }
05550 
05551 /// ComputeExitCountExhaustively - If the loop is known to execute a
05552 /// constant number of times (the condition evolves only from constants),
05553 /// try to evaluate a few iterations of the loop until we get the exit
05554 /// condition gets a value of ExitWhen (true or false).  If we cannot
05555 /// evaluate the trip count of the loop, return getCouldNotCompute().
05556 const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
05557                                                           Value *Cond,
05558                                                           bool ExitWhen) {
05559   PHINode *PN = getConstantEvolvingPHI(Cond, L);
05560   if (!PN) return getCouldNotCompute();
05561 
05562   // If the loop is canonicalized, the PHI will have exactly two entries.
05563   // That's the only form we support here.
05564   if (PN->getNumIncomingValues() != 2) return getCouldNotCompute();
05565 
05566   DenseMap<Instruction *, Constant *> CurrentIterVals;
05567   BasicBlock *Header = L->getHeader();
05568   assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
05569 
05570   // One entry must be a constant (coming in from outside of the loop), and the
05571   // second must be derived from the same PHI.
05572   bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
05573   PHINode *PHI = nullptr;
05574   for (BasicBlock::iterator I = Header->begin();
05575        (PHI = dyn_cast<PHINode>(I)); ++I) {
05576     Constant *StartCST =
05577       dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
05578     if (!StartCST) continue;
05579     CurrentIterVals[PHI] = StartCST;
05580   }
05581   if (!CurrentIterVals.count(PN))
05582     return getCouldNotCompute();
05583 
05584   // Okay, we find a PHI node that defines the trip count of this loop.  Execute
05585   // the loop symbolically to determine when the condition gets a value of
05586   // "ExitWhen".
05587   unsigned MaxIterations = MaxBruteForceIterations;   // Limit analysis.
05588   const DataLayout &DL = F->getParent()->getDataLayout();
05589   for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
05590     ConstantInt *CondVal = dyn_cast_or_null<ConstantInt>(
05591         EvaluateExpression(Cond, L, CurrentIterVals, DL, TLI));
05592 
05593     // Couldn't symbolically evaluate.
05594     if (!CondVal) return getCouldNotCompute();
05595 
05596     if (CondVal->getValue() == uint64_t(ExitWhen)) {
05597       ++NumBruteForceTripCountsComputed;
05598       return getConstant(Type::getInt32Ty(getContext()), IterationNum);
05599     }
05600 
05601     // Update all the PHI nodes for the next iteration.
05602     DenseMap<Instruction *, Constant *> NextIterVals;
05603 
05604     // Create a list of which PHIs we need to compute. We want to do this before
05605     // calling EvaluateExpression on them because that may invalidate iterators
05606     // into CurrentIterVals.
05607     SmallVector<PHINode *, 8> PHIsToCompute;
05608     for (DenseMap<Instruction *, Constant *>::const_iterator
05609            I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
05610       PHINode *PHI = dyn_cast<PHINode>(I->first);
05611       if (!PHI || PHI->getParent() != Header) continue;
05612       PHIsToCompute.push_back(PHI);
05613     }
05614     for (SmallVectorImpl<PHINode *>::const_iterator I = PHIsToCompute.begin(),
05615              E = PHIsToCompute.end(); I != E; ++I) {
05616       PHINode *PHI = *I;
05617       Constant *&NextPHI = NextIterVals[PHI];
05618       if (NextPHI) continue;    // Already computed!
05619 
05620       Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
05621       NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
05622     }
05623     CurrentIterVals.swap(NextIterVals);
05624   }
05625 
05626   // Too many iterations were needed to evaluate.
05627   return getCouldNotCompute();
05628 }
05629 
05630 /// getSCEVAtScope - Return a SCEV expression for the specified value
05631 /// at the specified scope in the program.  The L value specifies a loop
05632 /// nest to evaluate the expression at, where null is the top-level or a
05633 /// specified loop is immediately inside of the loop.
05634 ///
05635 /// This method can be used to compute the exit value for a variable defined
05636 /// in a loop by querying what the value will hold in the parent loop.
05637 ///
05638 /// In the case that a relevant loop exit value cannot be computed, the
05639 /// original value V is returned.
05640 const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
05641   // Check to see if we've folded this expression at this loop before.
05642   SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values = ValuesAtScopes[V];
05643   for (unsigned u = 0; u < Values.size(); u++) {
05644     if (Values[u].first == L)
05645       return Values[u].second ? Values[u].second : V;
05646   }
05647   Values.push_back(std::make_pair(L, static_cast<const SCEV *>(nullptr)));
05648   // Otherwise compute it.
05649   const SCEV *C = computeSCEVAtScope(V, L);
05650   SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values2 = ValuesAtScopes[V];
05651   for (unsigned u = Values2.size(); u > 0; u--) {
05652     if (Values2[u - 1].first == L) {
05653       Values2[u - 1].second = C;
05654       break;
05655     }
05656   }
05657   return C;
05658 }
05659 
05660 /// This builds up a Constant using the ConstantExpr interface.  That way, we
05661 /// will return Constants for objects which aren't represented by a
05662 /// SCEVConstant, because SCEVConstant is restricted to ConstantInt.
05663 /// Returns NULL if the SCEV isn't representable as a Constant.
05664 static Constant *BuildConstantFromSCEV(const SCEV *V) {
05665   switch (static_cast<SCEVTypes>(V->getSCEVType())) {
05666     case scCouldNotCompute:
05667     case scAddRecExpr:
05668       break;
05669     case scConstant:
05670       return cast<SCEVConstant>(V)->getValue();
05671     case scUnknown:
05672       return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue());
05673     case scSignExtend: {
05674       const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V);
05675       if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand()))
05676         return ConstantExpr::getSExt(CastOp, SS->getType());
05677       break;
05678     }
05679     case scZeroExtend: {
05680       const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V);
05681       if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand()))
05682         return ConstantExpr::getZExt(CastOp, SZ->getType());
05683       break;
05684     }
05685     case scTruncate: {
05686       const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V);
05687       if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand()))
05688         return ConstantExpr::getTrunc(CastOp, ST->getType());
05689       break;
05690     }
05691     case scAddExpr: {
05692       const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
05693       if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
05694         if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
05695           unsigned AS = PTy->getAddressSpace();
05696           Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
05697           C = ConstantExpr::getBitCast(C, DestPtrTy);
05698         }
05699         for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
05700           Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
05701           if (!C2) return nullptr;
05702 
05703           // First pointer!
05704           if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
05705             unsigned AS = C2->getType()->getPointerAddressSpace();
05706             std::swap(C, C2);
05707             Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
05708             // The offsets have been converted to bytes.  We can add bytes to an
05709             // i8* by GEP with the byte count in the first index.
05710             C = ConstantExpr::getBitCast(C, DestPtrTy);
05711           }
05712 
05713           // Don't bother trying to sum two pointers. We probably can't
05714           // statically compute a load that results from it anyway.
05715           if (C2->getType()->isPointerTy())
05716             return nullptr;
05717 
05718           if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
05719             if (PTy->getElementType()->isStructTy())
05720               C2 = ConstantExpr::getIntegerCast(
05721                   C2, Type::getInt32Ty(C->getContext()), true);
05722             C = ConstantExpr::getGetElementPtr(PTy->getElementType(), C, C2);
05723           } else
05724             C = ConstantExpr::getAdd(C, C2);
05725         }
05726         return C;
05727       }
05728       break;
05729     }
05730     case scMulExpr: {
05731       const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
05732       if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
05733         // Don't bother with pointers at all.
05734         if (C->getType()->isPointerTy()) return nullptr;
05735         for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
05736           Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
05737           if (!C2 || C2->getType()->isPointerTy()) return nullptr;
05738           C = ConstantExpr::getMul(C, C2);
05739         }
05740         return C;
05741       }
05742       break;
05743     }
05744     case scUDivExpr: {
05745       const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V);
05746       if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS()))
05747         if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS()))
05748           if (LHS->getType() == RHS->getType())
05749             return ConstantExpr::getUDiv(LHS, RHS);
05750       break;
05751     }
05752     case scSMaxExpr:
05753     case scUMaxExpr:
05754       break; // TODO: smax, umax.
05755   }
05756   return nullptr;
05757 }
05758 
05759 const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
05760   if (isa<SCEVConstant>(V)) return V;
05761 
05762   // If this instruction is evolved from a constant-evolving PHI, compute the
05763   // exit value from the loop without using SCEVs.
05764   if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
05765     if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
05766       const Loop *LI = (*this->LI)[I->getParent()];
05767       if (LI && LI->getParentLoop() == L)  // Looking for loop exit value.
05768         if (PHINode *PN = dyn_cast<PHINode>(I))
05769           if (PN->getParent() == LI->getHeader()) {
05770             // Okay, there is no closed form solution for the PHI node.  Check
05771             // to see if the loop that contains it has a known backedge-taken
05772             // count.  If so, we may be able to force computation of the exit
05773             // value.
05774             const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
05775             if (const SCEVConstant *BTCC =
05776                   dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
05777               // Okay, we know how many times the containing loop executes.  If
05778               // this is a constant evolving PHI node, get the final value at
05779               // the specified iteration number.
05780               Constant *RV = getConstantEvolutionLoopExitValue(PN,
05781                                                    BTCC->getValue()->getValue(),
05782                                                                LI);
05783               if (RV) return getSCEV(RV);
05784             }
05785           }
05786 
05787       // Okay, this is an expression that we cannot symbolically evaluate
05788       // into a SCEV.  Check to see if it's possible to symbolically evaluate
05789       // the arguments into constants, and if so, try to constant propagate the
05790       // result.  This is particularly useful for computing loop exit values.
05791       if (CanConstantFold(I)) {
05792         SmallVector<Constant *, 4> Operands;
05793         bool MadeImprovement = false;
05794         for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
05795           Value *Op = I->getOperand(i);
05796           if (Constant *C = dyn_cast<Constant>(Op)) {
05797             Operands.push_back(C);
05798             continue;
05799           }
05800 
05801           // If any of the operands is non-constant and if they are
05802           // non-integer and non-pointer, don't even try to analyze them
05803           // with scev techniques.
05804           if (!isSCEVable(Op->getType()))
05805             return V;
05806 
05807           const SCEV *OrigV = getSCEV(Op);
05808           const SCEV *OpV = getSCEVAtScope(OrigV, L);
05809           MadeImprovement |= OrigV != OpV;
05810 
05811           Constant *C = BuildConstantFromSCEV(OpV);
05812           if (!C) return V;
05813           if (C->getType() != Op->getType())
05814             C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
05815                                                               Op->getType(),
05816                                                               false),
05817                                       C, Op->getType());
05818           Operands.push_back(C);
05819         }
05820 
05821         // Check to see if getSCEVAtScope actually made an improvement.
05822         if (MadeImprovement) {
05823           Constant *C = nullptr;
05824           const DataLayout &DL = F->getParent()->getDataLayout();
05825           if (const CmpInst *CI = dyn_cast<CmpInst>(I))
05826             C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
05827                                                 Operands[1], DL, TLI);
05828           else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
05829             if (!LI->isVolatile())
05830               C = ConstantFoldLoadFromConstPtr(Operands[0], DL);
05831           } else
05832             C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands,
05833                                          DL, TLI);
05834           if (!C) return V;
05835           return getSCEV(C);
05836         }
05837       }
05838     }
05839 
05840     // This is some other type of SCEVUnknown, just return it.
05841     return V;
05842   }
05843 
05844   if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) {
05845     // Avoid performing the look-up in the common case where the specified
05846     // expression has no loop-variant portions.
05847     for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) {
05848       const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
05849       if (OpAtScope != Comm->getOperand(i)) {
05850         // Okay, at least one of these operands is loop variant but might be
05851         // foldable.  Build a new instance of the folded commutative expression.
05852         SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(),
05853                                             Comm->op_begin()+i);
05854         NewOps.push_back(OpAtScope);
05855 
05856         for (++i; i != e; ++i) {
05857           OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
05858           NewOps.push_back(OpAtScope);
05859         }
05860         if (isa<SCEVAddExpr>(Comm))
05861           return getAddExpr(NewOps);
05862         if (isa<SCEVMulExpr>(Comm))
05863           return getMulExpr(NewOps);
05864         if (isa<SCEVSMaxExpr>(Comm))
05865           return getSMaxExpr(NewOps);
05866         if (isa<SCEVUMaxExpr>(Comm))
05867           return getUMaxExpr(NewOps);
05868         llvm_unreachable("Unknown commutative SCEV type!");
05869       }
05870     }
05871     // If we got here, all operands are loop invariant.
05872     return Comm;
05873   }
05874 
05875   if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) {
05876     const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L);
05877     const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L);
05878     if (LHS == Div->getLHS() && RHS == Div->getRHS())
05879       return Div;   // must be loop invariant
05880     return getUDivExpr(LHS, RHS);
05881   }
05882 
05883   // If this is a loop recurrence for a loop that does not contain L, then we
05884   // are dealing with the final value computed by the loop.
05885   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
05886     // First, attempt to evaluate each operand.
05887     // Avoid performing the look-up in the common case where the specified
05888     // expression has no loop-variant portions.
05889     for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
05890       const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L);
05891       if (OpAtScope == AddRec->getOperand(i))
05892         continue;
05893 
05894       // Okay, at least one of these operands is loop variant but might be
05895       // foldable.  Build a new instance of the folded commutative expression.
05896       SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(),
05897                                           AddRec->op_begin()+i);
05898       NewOps.push_back(OpAtScope);
05899       for (++i; i != e; ++i)
05900         NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L));
05901 
05902       const SCEV *FoldedRec =
05903         getAddRecExpr(NewOps, AddRec->getLoop(),
05904                       AddRec->getNoWrapFlags(SCEV::FlagNW));
05905       AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec);
05906       // The addrec may be folded to a nonrecurrence, for example, if the
05907       // induction variable is multiplied by zero after constant folding. Go
05908       // ahead and return the folded value.
05909       if (!AddRec)
05910         return FoldedRec;
05911       break;
05912     }
05913 
05914     // If the scope is outside the addrec's loop, evaluate it by using the
05915     // loop exit value of the addrec.
05916     if (!AddRec->getLoop()->contains(L)) {
05917       // To evaluate this recurrence, we need to know how many times the AddRec
05918       // loop iterates.  Compute this now.
05919       const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
05920       if (BackedgeTakenCount == getCouldNotCompute()) return AddRec;
05921 
05922       // Then, evaluate the AddRec.
05923       return AddRec->evaluateAtIteration(BackedgeTakenCount, *this);
05924     }
05925 
05926     return AddRec;
05927   }
05928 
05929   if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) {
05930     const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
05931     if (Op == Cast->getOperand())
05932       return Cast;  // must be loop invariant
05933     return getZeroExtendExpr(Op, Cast->getType());
05934   }
05935 
05936   if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) {
05937     const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
05938     if (Op == Cast->getOperand())
05939       return Cast;  // must be loop invariant
05940     return getSignExtendExpr(Op, Cast->getType());
05941   }
05942 
05943   if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) {
05944     const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
05945     if (Op == Cast->getOperand())
05946       return Cast;  // must be loop invariant
05947     return getTruncateExpr(Op, Cast->getType());
05948   }
05949 
05950   llvm_unreachable("Unknown SCEV type!");
05951 }
05952 
05953 /// getSCEVAtScope - This is a convenience function which does
05954 /// getSCEVAtScope(getSCEV(V), L).
05955 const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
05956   return getSCEVAtScope(getSCEV(V), L);
05957 }
05958 
05959 /// SolveLinEquationWithOverflow - Finds the minimum unsigned root of the
05960 /// following equation:
05961 ///
05962 ///     A * X = B (mod N)
05963 ///
05964 /// where N = 2^BW and BW is the common bit width of A and B. The signedness of
05965 /// A and B isn't important.
05966 ///
05967 /// If the equation does not have a solution, SCEVCouldNotCompute is returned.
05968 static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
05969                                                ScalarEvolution &SE) {
05970   uint32_t BW = A.getBitWidth();
05971   assert(BW == B.getBitWidth() && "Bit widths must be the same.");
05972   assert(A != 0 && "A must be non-zero.");
05973 
05974   // 1. D = gcd(A, N)
05975   //
05976   // The gcd of A and N may have only one prime factor: 2. The number of
05977   // trailing zeros in A is its multiplicity
05978   uint32_t Mult2 = A.countTrailingZeros();
05979   // D = 2^Mult2
05980 
05981   // 2. Check if B is divisible by D.
05982   //
05983   // B is divisible by D if and only if the multiplicity of prime factor 2 for B
05984   // is not less than multiplicity of this prime factor for D.
05985   if (B.countTrailingZeros() < Mult2)
05986     return SE.getCouldNotCompute();
05987 
05988   // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic
05989   // modulo (N / D).
05990   //
05991   // (N / D) may need BW+1 bits in its representation.  Hence, we'll use this
05992   // bit width during computations.
05993   APInt AD = A.lshr(Mult2).zext(BW + 1);  // AD = A / D
05994   APInt Mod(BW + 1, 0);
05995   Mod.setBit(BW - Mult2);  // Mod = N / D
05996   APInt I = AD.multiplicativeInverse(Mod);
05997 
05998   // 4. Compute the minimum unsigned root of the equation:
05999   // I * (B / D) mod (N / D)
06000   APInt Result = (I * B.lshr(Mult2).zext(BW + 1)).urem(Mod);
06001 
06002   // The result is guaranteed to be less than 2^BW so we may truncate it to BW
06003   // bits.
06004   return SE.getConstant(Result.trunc(BW));
06005 }
06006 
06007 /// SolveQuadraticEquation - Find the roots of the quadratic equation for the
06008 /// given quadratic chrec {L,+,M,+,N}.  This returns either the two roots (which
06009 /// might be the same) or two SCEVCouldNotCompute objects.
06010 ///
06011 static std::pair<const SCEV *,const SCEV *>
06012 SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
06013   assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
06014   const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
06015   const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1));
06016   const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2));
06017 
06018   // We currently can only solve this if the coefficients are constants.
06019   if (!LC || !MC || !NC) {
06020     const SCEV *CNC = SE.getCouldNotCompute();
06021     return std::make_pair(CNC, CNC);
06022   }
06023 
06024   uint32_t BitWidth = LC->getValue()->getValue().getBitWidth();
06025   const APInt &L = LC->getValue()->getValue();
06026   const APInt &M = MC->getValue()->getValue();
06027   const APInt &N = NC->getValue()->getValue();
06028   APInt Two(BitWidth, 2);
06029   APInt Four(BitWidth, 4);
06030 
06031   {
06032     using namespace APIntOps;
06033     const APInt& C = L;
06034     // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
06035     // The B coefficient is M-N/2
06036     APInt B(M);
06037     B -= sdiv(N,Two);
06038 
06039     // The A coefficient is N/2
06040     APInt A(N.sdiv(Two));
06041 
06042     // Compute the B^2-4ac term.
06043     APInt SqrtTerm(B);
06044     SqrtTerm *= B;
06045     SqrtTerm -= Four * (A * C);
06046 
06047     if (SqrtTerm.