LCOV - llvm-toolchain.info - lib/Analysis/ScalarEvolutionExpander.cpp

LCOV - code coverage report

Current view:	top level - lib/Analysis - ScalarEvolutionExpander.cpp (source / functions)		Hit	Total	Coverage
Test:	llvm-toolchain.info	Lines:	797	872	91.4 %
Date:	2018-10-20 13:21:21	Functions:	48	53	90.6 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : //===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file contains the implementation of the scalar evolution expander,
      11             : // which is used to generate the code corresponding to a given scalar evolution
      12             : // expression.
      13             : //
      14             : //===----------------------------------------------------------------------===//
      15             : 
      16             : #include "llvm/Analysis/ScalarEvolutionExpander.h"
      17             : #include "llvm/ADT/STLExtras.h"
      18             : #include "llvm/ADT/SmallSet.h"
      19             : #include "llvm/Analysis/InstructionSimplify.h"
      20             : #include "llvm/Analysis/LoopInfo.h"
      21             : #include "llvm/Analysis/TargetTransformInfo.h"
      22             : #include "llvm/IR/DataLayout.h"
      23             : #include "llvm/IR/Dominators.h"
      24             : #include "llvm/IR/IntrinsicInst.h"
      25             : #include "llvm/IR/LLVMContext.h"
      26             : #include "llvm/IR/Module.h"
      27             : #include "llvm/IR/PatternMatch.h"
      28             : #include "llvm/Support/Debug.h"
      29             : #include "llvm/Support/raw_ostream.h"
      30             : 
      31             : using namespace llvm;
      32             : using namespace PatternMatch;
      33             : 
      34             : /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
      35             : /// reusing an existing cast if a suitable one exists, moving an existing
      36             : /// cast if a suitable one exists but isn't in the right place, or
      37             : /// creating a new one.
      38       10266 : Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
      39             :                                        Instruction::CastOps Op,
      40             :                                        BasicBlock::iterator IP) {
      41             :   // This function must be called with the builder having a valid insertion
      42             :   // point. It doesn't need to be the actual IP where the uses of the returned
      43             :   // cast will be added, but it must dominate such IP.
      44             :   // We use this precondition to produce a cast that will dominate all its
      45             :   // uses. In particular, this is crucial for the case where the builder's
      46             :   // insertion point *is* the point where we were asked to put the cast.
      47             :   // Since we don't know the builder's insertion point is actually
      48             :   // where the uses will be added (only that it dominates it), we are
      49             :   // not allowed to move it.
      50             :   BasicBlock::iterator BIP = Builder.GetInsertPoint();
      51             : 
      52             :   Instruction *Ret = nullptr;
      53             : 
      54             :   // Check to see if there is already a cast!
      55       20484 :   for (User *U : V->users())
      56       12583 :     if (U->getType() == Ty)
      57             :       if (CastInst *CI = dyn_cast<CastInst>(U))
      58        2365 :         if (CI->getOpcode() == Op) {
      59             :           // If the cast isn't where we want it, create a new cast at IP.
      60             :           // Likewise, do not reuse a cast at BIP because it must dominate
      61             :           // instructions that might be inserted before BIP.
      62        2365 :           if (BasicBlock::iterator(CI) != IP || BIP == IP) {
      63             :             // Create a new cast, and leave the old cast in place in case
      64             :             // it is being used as an insert point. Clear its operand
      65             :             // so that it doesn't hold anything live.
      66         374 :             Ret = CastInst::Create(Op, V, Ty, "", &*IP);
      67         374 :             Ret->takeName(CI);
      68         374 :             CI->replaceAllUsesWith(Ret);
      69         374 :             CI->setOperand(0, UndefValue::get(V->getType()));
      70             :             break;
      71             :           }
      72             :           Ret = CI;
      73             :           break;
      74             :         }
      75             : 
      76             :   // Create a new cast.
      77       10266 :   if (!Ret)
      78        7901 :     Ret = CastInst::Create(Op, V, Ty, V->getName(), &*IP);
      79             : 
      80             :   // We assert at the end of the function since IP might point to an
      81             :   // instruction with different dominance properties than a cast
      82             :   // (an invoke for example) and not dominate BIP (but the cast does).
      83             :   assert(SE.DT.dominates(Ret, &*BIP));
      84             : 
      85       10266 :   rememberInstruction(Ret);
      86       10266 :   return Ret;
      87             : }
      88             : 
      89        9074 : static BasicBlock::iterator findInsertPointAfter(Instruction *I,
      90             :                                                  BasicBlock *MustDominate) {
      91        9074 :   BasicBlock::iterator IP = ++I->getIterator();
      92             :   if (auto *II = dyn_cast<InvokeInst>(I))
      93             :     IP = II->getNormalDest()->begin();
      94             : 
      95       13814 :   while (isa<PHINode>(IP))
      96             :     ++IP;
      97             : 
      98        9074 :   if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) {
      99             :     ++IP;
     100        9073 :   } else if (isa<CatchSwitchInst>(IP)) {
     101             :     IP = MustDominate->getFirstInsertionPt();
     102             :   } else {
     103             :     assert(!IP->isEHPad() && "unexpected eh pad!");
     104             :   }
     105             : 
     106        9074 :   return IP;
     107             : }
     108             : 
     109             : /// InsertNoopCastOfTo - Insert a cast of V to the specified type,
     110             : /// which must be possible with a noop cast, doing what we can to share
     111             : /// the casts.
     112       87248 : Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
     113       87248 :   Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
     114             :   assert((Op == Instruction::BitCast ||
     115             :           Op == Instruction::PtrToInt ||
     116             :           Op == Instruction::IntToPtr) &&
     117             :          "InsertNoopCastOfTo cannot perform non-noop casts!");
     118             :   assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
     119             :          "InsertNoopCastOfTo cannot change sizes!");
     120             : 
     121             :   // Short-circuit unnecessary bitcasts.
     122       87248 :   if (Op == Instruction::BitCast) {
     123       86125 :     if (V->getType() == Ty)
     124             :       return V;
     125             :     if (CastInst *CI = dyn_cast<CastInst>(V)) {
     126         106 :       if (CI->getOperand(0)->getType() == Ty)
     127             :         return CI->getOperand(0);
     128             :     }
     129             :   }
     130             :   // Short-circuit unnecessary inttoptr<->ptrtoint casts.
     131       12164 :   if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
     132        1123 :       SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
     133             :     if (CastInst *CI = dyn_cast<CastInst>(V))
     134         174 :       if ((CI->getOpcode() == Instruction::PtrToInt ||
     135         351 :            CI->getOpcode() == Instruction::IntToPtr) &&
     136         175 :           SE.getTypeSizeInBits(CI->getType()) ==
     137         175 :           SE.getTypeSizeInBits(CI->getOperand(0)->getType()))
     138             :         return CI->getOperand(0);
     139             :     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
     140          10 :       if ((CE->getOpcode() == Instruction::PtrToInt ||
     141          10 :            CE->getOpcode() == Instruction::IntToPtr) &&
     142           0 :           SE.getTypeSizeInBits(CE->getType()) ==
     143           0 :           SE.getTypeSizeInBits(CE->getOperand(0)->getType()))
     144             :         return CE->getOperand(0);
     145             :   }
     146             : 
     147             :   // Fold a cast of a constant.
     148             :   if (Constant *C = dyn_cast<Constant>(V))
     149         600 :     return ConstantExpr::getCast(Op, C, Ty);
     150             : 
     151             :   // Cast the argument at the beginning of the entry block, after
     152             :   // any bitcasts of other arguments.
     153             :   if (Argument *A = dyn_cast<Argument>(V)) {
     154        2406 :     BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
     155        1173 :     while ((isa<BitCastInst>(IP) &&
     156        1172 :             isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
     157        3920 :             cast<BitCastInst>(IP)->getOperand(0) != A) ||
     158             :            isa<DbgInfoIntrinsic>(IP))
     159             :       ++IP;
     160        1203 :     return ReuseOrCreateCast(A, Ty, Op, IP);
     161             :   }
     162             : 
     163             :   // Cast the instruction immediately after the instruction.
     164             :   Instruction *I = cast<Instruction>(V);
     165        9063 :   BasicBlock::iterator IP = findInsertPointAfter(I, Builder.GetInsertBlock());
     166        9063 :   return ReuseOrCreateCast(I, Ty, Op, IP);
     167             : }
     168             : 
     169             : /// InsertBinop - Insert the specified binary operator, doing a small amount
     170             : /// of work to avoid inserting an obviously redundant operation.
     171        7249 : Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
     172             :                                  Value *LHS, Value *RHS) {
     173             :   // Fold a binop with constant operands.
     174             :   if (Constant *CLHS = dyn_cast<Constant>(LHS))
     175             :     if (Constant *CRHS = dyn_cast<Constant>(RHS))
     176         223 :       return ConstantExpr::get(Opcode, CLHS, CRHS);
     177             : 
     178             :   // Do a quick scan to see if we have this binop nearby.  If so, reuse it.
     179             :   unsigned ScanLimit = 6;
     180        7026 :   BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
     181             :   // Scanning starts from the last instruction before the insertion point.
     182        7026 :   BasicBlock::iterator IP = Builder.GetInsertPoint();
     183        7026 :   if (IP != BlockBegin) {
     184             :     --IP;
     185       27651 :     for (; ScanLimit; --IP, --ScanLimit) {
     186             :       // Don't count dbg.value against the ScanLimit, to avoid perturbing the
     187             :       // generated code.
     188       25595 :       if (isa<DbgInfoIntrinsic>(IP))
     189         890 :         ScanLimit++;
     190             : 
     191             :       // Conservatively, do not use any instruction which has any of wrap/exact
     192             :       // flags installed.
     193             :       // TODO: Instead of simply disable poison instructions we can be clever
     194             :       //       here and match SCEV to this instruction.
     195             :       auto canGeneratePoison = [](Instruction *I) {
     196             :         if (isa<OverflowingBinaryOperator>(I) &&
     197             :             (I->hasNoSignedWrap() || I->hasNoUnsignedWrap()))
     198             :           return true;
     199             :         if (isa<PossiblyExactOperator>(I) && I->isExact())
     200             :           return true;
     201             :         return false;
     202             :       };
     203       11252 :       if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
     204       25963 :           IP->getOperand(1) == RHS && !canGeneratePoison(&*IP))
     205             :         return &*IP;
     206       25335 :       if (IP == BlockBegin) break;
     207             :     }
     208             :   }
     209             : 
     210             :   // Save the original insertion point so we can restore it when we're done.
     211             :   DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
     212       13532 :   SCEVInsertPointGuard Guard(Builder, this);
     213             : 
     214             :   // Move the insertion point out of as many loops as we can.
     215        8820 :   while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
     216        2008 :     if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
     217          61 :     BasicBlock *Preheader = L->getLoopPreheader();
     218          61 :     if (!Preheader) break;
     219             : 
     220             :     // Ok, move up a level.
     221          46 :     Builder.SetInsertPoint(Preheader->getTerminator());
     222          46 :   }
     223             : 
     224             :   // If we haven't found this binop, insert it.
     225       13532 :   Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS));
     226        6766 :   BO->setDebugLoc(Loc);
     227        6766 :   rememberInstruction(BO);
     228             : 
     229             :   return BO;
     230             : }
     231             : 
     232             : /// FactorOutConstant - Test if S is divisible by Factor, using signed
     233             : /// division. If so, update S with Factor divided out and return true.
     234             : /// S need not be evenly divisible if a reasonable remainder can be
     235             : /// computed.
     236             : /// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
     237             : /// unnecessary; in its place, just signed-divide Ops[i] by the scale and
     238             : /// check to see if the divide was folded.
     239       18723 : static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,
     240             :                               const SCEV *Factor, ScalarEvolution &SE,
     241             :                               const DataLayout &DL) {
     242             :   // Everything is divisible by one.
     243       18723 :   if (Factor->isOne())
     244             :     return true;
     245             : 
     246             :   // x/x == 1.
     247       15047 :   if (S == Factor) {
     248        1458 :     S = SE.getConstant(S->getType(), 1);
     249        1458 :     return true;
     250             :   }
     251             : 
     252             :   // For a Constant, check for a multiple of the given factor.
     253             :   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
     254             :     // 0/x == 0.
     255        7017 :     if (C->isZero())
     256             :       return true;
     257             :     // Check for divisibility.
     258             :     if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) {
     259             :       ConstantInt *CI =
     260       13768 :           ConstantInt::get(SE.getContext(), C->getAPInt().sdiv(FC->getAPInt()));
     261             :       // If the quotient is zero and the remainder is non-zero, reject
     262             :       // the value at this scale. It will be considered for subsequent
     263             :       // smaller scales.
     264        6884 :       if (!CI->isZero()) {
     265        5446 :         const SCEV *Div = SE.getConstant(CI);
     266        5446 :         S = Div;
     267        5446 :         Remainder = SE.getAddExpr(
     268        5446 :             Remainder, SE.getConstant(C->getAPInt().srem(FC->getAPInt())));
     269        5446 :         return true;
     270             :       }
     271             :     }
     272             :   }
     273             : 
     274             :   // In a Mul, check if there is a constant operand which is a multiple
     275             :   // of the given factor.
     276        8010 :   if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
     277             :     // Size is known, check if there is a constant operand which is a multiple
     278             :     // of the given factor. If so, we can factor it.
     279             :     const SCEVConstant *FC = cast<SCEVConstant>(Factor);
     280        4632 :     if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
     281        9256 :       if (!C->getAPInt().srem(FC->getAPInt())) {
     282        3346 :         SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
     283        3346 :         NewMulOps[0] = SE.getConstant(C->getAPInt().sdiv(FC->getAPInt()));
     284        3346 :         S = SE.getMulExpr(NewMulOps);
     285             :         return true;
     286             :       }
     287             :   }
     288             : 
     289             :   // In an AddRec, check if both start and step are divisible.
     290        4664 :   if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
     291          87 :     const SCEV *Step = A->getStepRecurrence(SE);
     292          87 :     const SCEV *StepRem = SE.getConstant(Step->getType(), 0);
     293          87 :     if (!FactorOutConstant(Step, StepRem, Factor, SE, DL))
     294             :       return false;
     295          61 :     if (!StepRem->isZero())
     296             :       return false;
     297          61 :     const SCEV *Start = A->getStart();
     298          61 :     if (!FactorOutConstant(Start, Remainder, Factor, SE, DL))
     299             :       return false;
     300         122 :     S = SE.getAddRecExpr(Start, Step, A->getLoop(),
     301             :                          A->getNoWrapFlags(SCEV::FlagNW));
     302          61 :     return true;
     303             :   }
     304             : 
     305             :   return false;
     306             : }
     307             : 
     308             : /// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs
     309             : /// is the number of SCEVAddRecExprs present, which are kept at the end of
     310             : /// the list.
     311             : ///
     312       13282 : static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
     313             :                                 Type *Ty,
     314             :                                 ScalarEvolution &SE) {
     315             :   unsigned NumAddRecs = 0;
     316       13350 :   for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i)
     317          68 :     ++NumAddRecs;
     318             :   // Group Ops into non-addrecs and addrecs.
     319       13282 :   SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs);
     320       13282 :   SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end());
     321             :   // Let ScalarEvolution sort and simplify the non-addrecs list.
     322       13282 :   const SCEV *Sum = NoAddRecs.empty() ?
     323       12822 :                     SE.getConstant(Ty, 0) :
     324       13282 :                     SE.getAddExpr(NoAddRecs);
     325             :   // If it returned an add, use the operands. Otherwise it simplified
     326             :   // the sum into a single value, so just use that.
     327             :   Ops.clear();
     328             :   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum))
     329          14 :     Ops.append(Add->op_begin(), Add->op_end());
     330       13275 :   else if (!Sum->isZero())
     331         453 :     Ops.push_back(Sum);
     332             :   // Then append the addrecs.
     333       13282 :   Ops.append(AddRecs.begin(), AddRecs.end());
     334       13282 : }
     335             : 
     336             : /// SplitAddRecs - Flatten a list of add operands, moving addrec start values
     337             : /// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}.
     338             : /// This helps expose more opportunities for folding parts of the expressions
     339             : /// into GEP indices.
     340             : ///
     341       15421 : static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
     342             :                          Type *Ty,
     343             :                          ScalarEvolution &SE) {
     344             :   // Find the addrecs.
     345             :   SmallVector<const SCEV *, 8> AddRecs;
     346       31924 :   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     347       33124 :     while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) {
     348          69 :       const SCEV *Start = A->getStart();
     349          69 :       if (Start->isZero()) break;
     350          59 :       const SCEV *Zero = SE.getConstant(Ty, 0);
     351         118 :       AddRecs.push_back(SE.getAddRecExpr(Zero,
     352             :                                          A->getStepRecurrence(SE),
     353             :                                          A->getLoop(),
     354             :                                          A->getNoWrapFlags(SCEV::FlagNW)));
     355             :       if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) {
     356           4 :         Ops[i] = Zero;
     357           8 :         Ops.append(Add->op_begin(), Add->op_end());
     358           4 :         e += Add->getNumOperands();
     359             :       } else {
     360          55 :         Ops[i] = Start;
     361             :       }
     362             :     }
     363       15421 :   if (!AddRecs.empty()) {
     364             :     // Add the addrecs onto the end of the list.
     365          59 :     Ops.append(AddRecs.begin(), AddRecs.end());
     366             :     // Resort the operand list, moving any constants to the front.
     367          59 :     SimplifyAddOperands(Ops, Ty, SE);
     368             :   }
     369       15421 : }
     370             : 
     371             : /// expandAddToGEP - Expand an addition expression with a pointer type into
     372             : /// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
     373             : /// BasicAliasAnalysis and other passes analyze the result. See the rules
     374             : /// for getelementptr vs. inttoptr in
     375             : /// http://llvm.org/docs/LangRef.html#pointeraliasing
     376             : /// for details.
     377             : ///
     378             : /// Design note: The correctness of using getelementptr here depends on
     379             : /// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
     380             : /// they may introduce pointer arithmetic which may not be safely converted
     381             : /// into getelementptr.
     382             : ///
     383             : /// Design note: It might seem desirable for this function to be more
     384             : /// loop-aware. If some of the indices are loop-invariant while others
     385             : /// aren't, it might seem desirable to emit multiple GEPs, keeping the
     386             : /// loop-invariant portions of the overall computation outside the loop.
     387             : /// However, there are a few reasons this is not done here. Hoisting simple
     388             : /// arithmetic is a low-level optimization that often isn't very
     389             : /// important until late in the optimization process. In fact, passes
     390             : /// like InstructionCombining will combine GEPs, even if it means
     391             : /// pushing loop-invariant computation down into loops, so even if the
     392             : /// GEPs were split here, the work would quickly be undone. The
     393             : /// LoopStrengthReduction pass, which is usually run quite late (and
     394             : /// after the last InstructionCombining pass), takes care of hoisting
     395             : /// loop-invariant portions of expressions, after considering what
     396             : /// can be folded using target addressing modes.
     397             : ///
     398       15421 : Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     399             :                                     const SCEV *const *op_end,
     400             :                                     PointerType *PTy,
     401             :                                     Type *Ty,
     402             :                                     Value *V) {
     403       15421 :   Type *OriginalElTy = PTy->getElementType();
     404             :   Type *ElTy = OriginalElTy;
     405             :   SmallVector<Value *, 4> GepIndices;
     406             :   SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
     407             :   bool AnyNonZeroIndices = false;
     408             : 
     409             :   // Split AddRecs up into parts as either of the parts may be usable
     410             :   // without the other.
     411       15421 :   SplitAddRecs(Ops, Ty, SE);
     412             : 
     413       15421 :   Type *IntPtrTy = DL.getIntPtrType(PTy);
     414             : 
     415             :   // Descend down the pointer's type and attempt to convert the other
     416             :   // operands into GEP indices, at each level. The first index in a GEP
     417             :   // indexes into the array implied by the pointer operand; the rest of
     418             :   // the indices index into the element or field type selected by the
     419             :   // preceding index.
     420             :   for (;;) {
     421             :     // If the scale size is not 0, attempt to factor out a scale for
     422             :     // array indexing.
     423             :     SmallVector<const SCEV *, 8> ScaledOps;
     424       17379 :     if (ElTy->isSized()) {
     425       17379 :       const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy);
     426       17379 :       if (!ElSize->isZero()) {
     427             :         SmallVector<const SCEV *, 8> NewOps;
     428       35934 :         for (const SCEV *Op : Ops) {
     429       18575 :           const SCEV *Remainder = SE.getConstant(Ty, 0);
     430       18575 :           if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) {
     431             :             // Op now has ElSize factored out.
     432       13998 :             ScaledOps.push_back(Op);
     433       13998 :             if (!Remainder->isZero())
     434          88 :               NewOps.push_back(Remainder);
     435             :             AnyNonZeroIndices = true;
     436             :           } else {
     437             :             // The operand was not divisible, so add it to the list of operands
     438             :             // we'll scan next iteration.
     439        4577 :             NewOps.push_back(Op);
     440             :           }
     441             :         }
     442             :         // If we made any changes, update Ops.
     443       17359 :         if (!ScaledOps.empty()) {
     444             :           Ops = NewOps;
     445       13223 :           SimplifyAddOperands(Ops, Ty, SE);
     446             :         }
     447             :       }
     448             :     }
     449             : 
     450             :     // Record the scaled array index for this level of the type. If
     451             :     // we didn't find any operands that could be factored, tentatively
     452             :     // assume that element zero was selected (since the zero offset
     453             :     // would obviously be folded away).
     454       17379 :     Value *Scaled = ScaledOps.empty() ?
     455        4156 :                     Constant::getNullValue(Ty) :
     456       13223 :                     expandCodeFor(SE.getAddExpr(ScaledOps), Ty);
     457       17379 :     GepIndices.push_back(Scaled);
     458             : 
     459             :     // Collect struct field index operands.
     460             :     while (StructType *STy = dyn_cast<StructType>(ElTy)) {
     461             :       bool FoundFieldNo = false;
     462             :       // An empty struct has no fields.
     463        2253 :       if (STy->getNumElements() == 0) break;
     464             :       // Field offsets are known. See if a constant offset falls within any of
     465             :       // the struct fields.
     466        2251 :       if (Ops.empty())
     467             :         break;
     468        1769 :       if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
     469        1570 :         if (SE.getTypeSizeInBits(C->getType()) <= 64) {
     470         785 :           const StructLayout &SL = *DL.getStructLayout(STy);
     471         785 :           uint64_t FullOffset = C->getValue()->getZExtValue();
     472         785 :           if (FullOffset < SL.getSizeInBytes()) {
     473         678 :             unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
     474         678 :             GepIndices.push_back(
     475         678 :                 ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
     476         678 :             ElTy = STy->getTypeAtIndex(ElIdx);
     477         678 :             Ops[0] =
     478        2034 :                 SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
     479             :             AnyNonZeroIndices = true;
     480             :             FoundFieldNo = true;
     481             :           }
     482             :         }
     483             :       // If no struct field offsets were found, tentatively assume that
     484             :       // field zero was selected (since the zero offset would obviously
     485             :       // be folded away).
     486        1769 :       if (!FoundFieldNo) {
     487        1091 :         ElTy = STy->getTypeAtIndex(0u);
     488        1091 :         GepIndices.push_back(
     489        1091 :           Constant::getNullValue(Type::getInt32Ty(Ty->getContext())));
     490             :       }
     491             :     }
     492             : 
     493             :     if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
     494        1958 :       ElTy = ATy->getElementType();
     495             :     else
     496             :       break;
     497             :   }
     498             : 
     499             :   // If none of the operands were convertible to proper GEP indices, cast
     500             :   // the base to i8* and do an ugly getelementptr with that. It's still
     501             :   // better than ptrtoint+arithmetic+inttoptr at least.
     502       15421 :   if (!AnyNonZeroIndices) {
     503             :     // Cast the base to i8*.
     504        2049 :     V = InsertNoopCastOfTo(V,
     505        2049 :        Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
     506             : 
     507             :     assert(!isa<Instruction>(V) ||
     508             :            SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
     509             : 
     510             :     // Expand the operands for a plain byte offset.
     511        2049 :     Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty);
     512             : 
     513             :     // Fold a GEP with constant operands.
     514             :     if (Constant *CLHS = dyn_cast<Constant>(V))
     515             :       if (Constant *CRHS = dyn_cast<Constant>(Idx))
     516          12 :         return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ty->getContext()),
     517             :                                               CLHS, CRHS);
     518             : 
     519             :     // Do a quick scan to see if we have this GEP nearby.  If so, reuse it.
     520             :     unsigned ScanLimit = 6;
     521        2037 :     BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
     522             :     // Scanning starts from the last instruction before the insertion point.
     523        2037 :     BasicBlock::iterator IP = Builder.GetInsertPoint();
     524        2037 :     if (IP != BlockBegin) {
     525             :       --IP;
     526       12059 :       for (; ScanLimit; --IP, --ScanLimit) {
     527             :         // Don't count dbg.value against the ScanLimit, to avoid perturbing the
     528             :         // generated code.
     529       11166 :         if (isa<DbgInfoIntrinsic>(IP))
     530        1730 :           ScanLimit++;
     531        2988 :         if (IP->getOpcode() == Instruction::GetElementPtr &&
     532       11395 :             IP->getOperand(0) == V && IP->getOperand(1) == Idx)
     533             :           return &*IP;
     534       11055 :         if (IP == BlockBegin) break;
     535             :       }
     536             :     }
     537             : 
     538             :     // Save the original insertion point so we can restore it when we're done.
     539        3852 :     SCEVInsertPointGuard Guard(Builder, this);
     540             : 
     541             :     // Move the insertion point out of as many loops as we can.
     542        3464 :     while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
     543        1526 :       if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
     544          12 :       BasicBlock *Preheader = L->getLoopPreheader();
     545          12 :       if (!Preheader) break;
     546             : 
     547             :       // Ok, move up a level.
     548          12 :       Builder.SetInsertPoint(Preheader->getTerminator());
     549          12 :     }
     550             : 
     551             :     // Emit a GEP.
     552        3852 :     Value *GEP = Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep");
     553        1926 :     rememberInstruction(GEP);
     554             : 
     555             :     return GEP;
     556             :   }
     557             : 
     558             :   {
     559       26744 :     SCEVInsertPointGuard Guard(Builder, this);
     560             : 
     561             :     // Move the insertion point out of as many loops as we can.
     562       24683 :     while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
     563       10984 :       if (!L->isLoopInvariant(V)) break;
     564             : 
     565             :       bool AnyIndexNotLoopInvariant = any_of(
     566           0 :           GepIndices, [L](Value *Op) { return !L->isLoopInvariant(Op); });
     567             : 
     568        4112 :       if (AnyIndexNotLoopInvariant)
     569             :         break;
     570             : 
     571         327 :       BasicBlock *Preheader = L->getLoopPreheader();
     572         327 :       if (!Preheader) break;
     573             : 
     574             :       // Ok, move up a level.
     575         327 :       Builder.SetInsertPoint(Preheader->getTerminator());
     576         327 :     }
     577             : 
     578             :     // Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
     579             :     // because ScalarEvolution may have changed the address arithmetic to
     580             :     // compute a value which is beyond the end of the allocated object.
     581             :     Value *Casted = V;
     582       13372 :     if (V->getType() != PTy)
     583         104 :       Casted = InsertNoopCastOfTo(Casted, PTy);
     584       26744 :     Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep");
     585       13372 :     Ops.push_back(SE.getUnknown(GEP));
     586       13372 :     rememberInstruction(GEP);
     587             :   }
     588             : 
     589       13372 :   return expand(SE.getAddExpr(Ops));
     590             : }
     591             : 
     592        1061 : Value *SCEVExpander::expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty,
     593             :                                     Value *V) {
     594        1061 :   const SCEV *const Ops[1] = {Op};
     595        1061 :   return expandAddToGEP(Ops, Ops + 1, PTy, Ty, V);
     596             : }
     597             : 
     598             : /// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
     599             : /// SCEV expansion. If they are nested, this is the most nested. If they are
     600             : /// neighboring, pick the later.
     601       18871 : static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
     602             :                                         DominatorTree &DT) {
     603       18871 :   if (!A) return B;
     604        1106 :   if (!B) return A;
     605         890 :   if (A->contains(B)) return B;
     606         684 :   if (B->contains(A)) return A;
     607         540 :   if (DT.dominates(A->getHeader(), B->getHeader())) return B;
     608         159 :   if (DT.dominates(B->getHeader(), A->getHeader())) return A;
     609             :   return A; // Arbitrarily break the tie.
     610             : }
     611             : 
     612             : /// getRelevantLoop - Get the most relevant loop associated with the given
     613             : /// expression, according to PickMostRelevantLoop.
     614       61137 : const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
     615             :   // Test whether we've already computed the most relevant loop for this SCEV.
     616       61137 :   auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr));
     617       61137 :   if (!Pair.second)
     618       22427 :     return Pair.first->second;
     619             : 
     620       38710 :   if (isa<SCEVConstant>(S))
     621             :     // A constant has no relevant loops.
     622             :     return nullptr;
     623       20696 :   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     624             :     if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
     625       23996 :       return Pair.first->second = SE.LI.getLoopFor(I->getParent());
     626             :     // A non-instruction has no relevant loops.
     627             :     return nullptr;
     628             :   }
     629             :   if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) {
     630             :     const Loop *L = nullptr;
     631             :     if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
     632         145 :       L = AR->getLoop();
     633       21947 :     for (const SCEV *Op : N->operands())
     634       14974 :       L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT);
     635        6973 :     return RelevantLoops[N] = L;
     636             :   }
     637             :   if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) {
     638        1123 :     const Loop *Result = getRelevantLoop(C->getOperand());
     639        1123 :     return RelevantLoops[C] = Result;
     640             :   }
     641             :   if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
     642        1184 :     const Loop *Result = PickMostRelevantLoop(
     643        1184 :         getRelevantLoop(D->getLHS()), getRelevantLoop(D->getRHS()), SE.DT);
     644        1184 :     return RelevantLoops[D] = Result;
     645             :   }
     646           0 :   llvm_unreachable("Unexpected SCEV type!");
     647             : }
     648             : 
     649             : namespace {
     650             : 
     651             : /// LoopCompare - Compare loops by PickMostRelevantLoop.
     652             : class LoopCompare {
     653             :   DominatorTree &DT;
     654             : public:
     655             :   explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
     656             : 
     657           0 :   bool operator()(std::pair<const Loop *, const SCEV *> LHS,
     658             :                   std::pair<const Loop *, const SCEV *> RHS) const {
     659             :     // Keep pointer operands sorted at the end.
     660           0 :     if (LHS.second->getType()->isPointerTy() !=
     661           0 :         RHS.second->getType()->isPointerTy())
     662           0 :       return LHS.second->getType()->isPointerTy();
     663             : 
     664             :     // Compare loops with PickMostRelevantLoop.
     665           0 :     if (LHS.first != RHS.first)
     666           0 :       return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
     667             : 
     668             :     // If one operand is a non-constant negative and the other is not,
     669             :     // put the non-constant negative on the right so that a sub can
     670             :     // be used instead of a negate and add.
     671           0 :     if (LHS.second->isNonConstantNegative()) {
     672           0 :       if (!RHS.second->isNonConstantNegative())
     673           0 :         return false;
     674           0 :     } else if (RHS.second->isNonConstantNegative())
     675           0 :       return true;
     676             : 
     677             :     // Otherwise they are equivalent according to this comparison.
     678             :     return false;
     679             :   }
     680             : };
     681             : 
     682             : }
     683             : 
     684       18355 : Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
     685       18355 :   Type *Ty = SE.getEffectiveSCEVType(S->getType());
     686             : 
     687             :   // Collect all the add operands in a loop, along with their associated loops.
     688             :   // Iterate in reverse so that constants are emitted last, all else equal, and
     689             :   // so that pointer operands are inserted first, which the code below relies on
     690             :   // to form more involved GEPs.
     691             :   SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
     692       18355 :   for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()),
     693       56864 :        E(S->op_begin()); I != E; ++I)
     694       38509 :     OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
     695             : 
     696             :   // Sort by loop. Use a stable sort so that constants follow non-constants and
     697             :   // pointer operands precede non-pointer operands.
     698       18355 :   std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT));
     699             : 
     700             :   // Emit instructions to add all the operands. Hoist as much as possible
     701             :   // out of loops, and form meaningful getelementptrs where possible.
     702             :   Value *Sum = nullptr;
     703       55792 :   for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
     704       37437 :     const Loop *CurLoop = I->first;
     705       37437 :     const SCEV *Op = I->second;
     706       37437 :     if (!Sum) {
     707             :       // This is the first operand. Just expand it.
     708       18355 :       Sum = expand(Op);
     709       18355 :       ++I;
     710       19082 :     } else if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
     711             :       // The running sum expression is a pointer. Try to form a getelementptr
     712             :       // at this level with that as the base.
     713             :       SmallVector<const SCEV *, 4> NewOps;
     714       29792 :       for (; I != E && I->first == CurLoop; ++I) {
     715             :         // If the operand is SCEVUnknown and not instructions, peek through
     716             :         // it, to enable more of it to be folded into the GEP.
     717       15432 :         const SCEV *X = I->second;
     718        8625 :         if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X))
     719        8625 :           if (!isa<Instruction>(U->getValue()))
     720        5605 :             X = SE.getSCEV(U->getValue());
     721       15432 :         NewOps.push_back(X);
     722             :       }
     723       14360 :       Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
     724        4722 :     } else if (PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
     725             :       // The running sum is an integer, and there's a pointer at this level.
     726             :       // Try to form a getelementptr. If the running sum is instructions,
     727             :       // use a SCEVUnknown to avoid re-analyzing them.
     728             :       SmallVector<const SCEV *, 4> NewOps;
     729           0 :       NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) :
     730           0 :                                                SE.getSCEV(Sum));
     731           0 :       for (++I; I != E && I->first == CurLoop; ++I)
     732           0 :         NewOps.push_back(I->second);
     733           0 :       Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
     734        4722 :     } else if (Op->isNonConstantNegative()) {
     735             :       // Instead of doing a negate and add, just do a subtract.
     736         998 :       Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
     737         998 :       Sum = InsertNoopCastOfTo(Sum, Ty);
     738         998 :       Sum = InsertBinop(Instruction::Sub, Sum, W);
     739         998 :       ++I;
     740             :     } else {
     741             :       // A simple add.
     742        3724 :       Value *W = expandCodeFor(Op, Ty);
     743        3724 :       Sum = InsertNoopCastOfTo(Sum, Ty);
     744             :       // Canonicalize a constant to the RHS.
     745        3724 :       if (isa<Constant>(Sum)) std::swap(Sum, W);
     746        3724 :       Sum = InsertBinop(Instruction::Add, Sum, W);
     747        3724 :       ++I;
     748             :     }
     749             :   }
     750             : 
     751       18355 :   return Sum;
     752             : }
     753             : 
     754        1777 : Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
     755        1777 :   Type *Ty = SE.getEffectiveSCEVType(S->getType());
     756             : 
     757             :   // Collect all the mul operands in a loop, along with their associated loops.
     758             :   // Iterate in reverse so that constants are emitted last, all else equal.
     759             :   SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
     760        1777 :   for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()),
     761        5940 :        E(S->op_begin()); I != E; ++I)
     762        4163 :     OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
     763             : 
     764             :   // Sort by loop. Use a stable sort so that constants follow non-constants.
     765        1777 :   std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT));
     766             : 
     767             :   // Emit instructions to mul all the operands. Hoist as much as possible
     768             :   // out of loops.
     769             :   Value *Prod = nullptr;
     770        1777 :   auto I = OpsAndLoops.begin();
     771             : 
     772             :   // Expand the calculation of X pow N in the following manner:
     773             :   // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
     774             :   // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
     775             :   const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops, &Ty]() {
     776             :     auto E = I;
     777             :     // Calculate how many times the same operand from the same loop is included
     778             :     // into this power.
     779             :     uint64_t Exponent = 0;
     780             :     const uint64_t MaxExponent = UINT64_MAX >> 1;
     781             :     // No one sane will ever try to calculate such huge exponents, but if we
     782             :     // need this, we stop on UINT64_MAX / 2 because we need to exit the loop
     783             :     // below when the power of 2 exceeds our Exponent, and we want it to be
     784             :     // 1u << 31 at most to not deal with unsigned overflow.
     785             :     while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) {
     786             :       ++Exponent;
     787             :       ++E;
     788             :     }
     789             :     assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?");
     790             : 
     791             :     // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
     792             :     // that are needed into the result.
     793             :     Value *P = expandCodeFor(I->second, Ty);
     794             :     Value *Result = nullptr;
     795             :     if (Exponent & 1)
     796             :       Result = P;
     797             :     for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
     798             :       P = InsertBinop(Instruction::Mul, P, P);
     799             :       if (Exponent & BinExp)
     800             :         Result = Result ? InsertBinop(Instruction::Mul, Result, P) : P;
     801             :     }
     802             : 
     803             :     I = E;
     804             :     assert(Result && "Nothing was expanded?");
     805             :     return Result;
     806        1777 :   };
     807             : 
     808        5433 :   while (I != OpsAndLoops.end()) {
     809        3656 :     if (!Prod) {
     810             :       // This is the first operand. Just expand it.
     811        1777 :       Prod = ExpandOpBinPowN();
     812        1879 :     } else if (I->second->isAllOnesValue()) {
     813             :       // Instead of doing a multiply by negative one, just do a negate.
     814         502 :       Prod = InsertNoopCastOfTo(Prod, Ty);
     815         502 :       Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
     816         502 :       ++I;
     817             :     } else {
     818             :       // A simple mul.
     819        1377 :       Value *W = ExpandOpBinPowN();
     820        1377 :       Prod = InsertNoopCastOfTo(Prod, Ty);
     821             :       // Canonicalize a constant to the RHS.
     822        1377 :       if (isa<Constant>(Prod)) std::swap(Prod, W);
     823             :       const APInt *RHS;
     824        1377 :       if (match(W, m_Power2(RHS))) {
     825             :         // Canonicalize Prod*(1<<C) to Prod<<C.
     826             :         assert(!Ty->isVectorTy() && "vector types are not SCEVable");
     827         694 :         Prod = InsertBinop(Instruction::Shl, Prod,
     828        1388 :                            ConstantInt::get(Ty, RHS->logBase2()));
     829             :       } else {
     830         683 :         Prod = InsertBinop(Instruction::Mul, Prod, W);
     831             :       }
     832             :     }
     833             :   }
     834             : 
     835        1777 :   return Prod;
     836             : }
     837             : 
     838         531 : Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
     839         531 :   Type *Ty = SE.getEffectiveSCEVType(S->getType());
     840             : 
     841         531 :   Value *LHS = expandCodeFor(S->getLHS(), Ty);
     842         531 :   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
     843             :     const APInt &RHS = SC->getAPInt();
     844         510 :     if (RHS.isPowerOf2())
     845         388 :       return InsertBinop(Instruction::LShr, LHS,
     846         776 :                          ConstantInt::get(Ty, RHS.logBase2()));
     847             :   }
     848             : 
     849         143 :   Value *RHS = expandCodeFor(S->getRHS(), Ty);
     850         143 :   return InsertBinop(Instruction::UDiv, LHS, RHS);
     851             : }
     852             : 
     853             : /// Move parts of Base into Rest to leave Base with the minimal
     854             : /// expression that provides a pointer operand suitable for a
     855             : /// GEP expansion.
     856         232 : static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
     857             :                               ScalarEvolution &SE) {
     858         233 :   while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) {
     859           1 :     Base = A->getStart();
     860           3 :     Rest = SE.getAddExpr(Rest,
     861             :                          SE.getAddRecExpr(SE.getConstant(A->getType(), 0),
     862             :                                           A->getStepRecurrence(SE),
     863             :                                           A->getLoop(),
     864             :                                           A->getNoWrapFlags(SCEV::FlagNW)));
     865           1 :   }
     866             :   if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
     867         118 :     Base = A->getOperand(A->getNumOperands()-1);
     868             :     SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end());
     869          59 :     NewAddOps.back() = Rest;
     870          59 :     Rest = SE.getAddExpr(NewAddOps);
     871          59 :     ExposePointerBase(Base, Rest, SE);
     872             :   }
     873         232 : }
     874             : 
     875             : /// Determine if this is a well-behaved chain of instructions leading back to
     876             : /// the PHI. If so, it may be reused by expanded expressions.
     877          12 : bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
     878             :                                          const Loop *L) {
     879          16 :   if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
     880           1 :       (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV)))
     881             :     return false;
     882             :   // If any of the operands don't dominate the insert position, bail.
     883             :   // Addrec operands are always loop-invariant, so this can only happen
     884             :   // if there are instructions which haven't been hoisted.
     885          15 :   if (L == IVIncInsertLoop) {
     886           0 :     for (User::op_iterator OI = IncV->op_begin()+1,
     887           0 :            OE = IncV->op_end(); OI != OE; ++OI)
     888             :       if (Instruction *OInst = dyn_cast<Instruction>(OI))
     889           0 :         if (!SE.DT.dominates(OInst, IVIncInsertPos))
     890             :           return false;
     891             :   }
     892             :   // Advance to the next instruction.
     893          15 :   IncV = dyn_cast<Instruction>(IncV->getOperand(0));
     894             :   if (!IncV)
     895             :     return false;
     896             : 
     897          13 :   if (IncV->mayHaveSideEffects())
     898             :     return false;
     899             : 
     900          13 :   if (IncV == PN)
     901             :     return true;
     902             : 
     903             :   return isNormalAddRecExprPHI(PN, IncV, L);
     904             : }
     905             : 
     906             : /// getIVIncOperand returns an induction variable increment's induction
     907             : /// variable operand.
     908             : ///
     909             : /// If allowScale is set, any type of GEP is allowed as long as the nonIV
     910             : /// operands dominate InsertPos.
     911             : ///
     912             : /// If allowScale is not set, ensure that a GEP increment conforms to one of the
     913             : /// simple patterns generated by getAddRecExprPHILiterally and
     914             : /// expandAddtoGEP. If the pattern isn't recognized, return NULL.
     915        6284 : Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
     916             :                                            Instruction *InsertPos,
     917             :                                            bool allowScale) {
     918        6284 :   if (IncV == InsertPos)
     919             :     return nullptr;
     920             : 
     921        6280 :   switch (IncV->getOpcode()) {
     922             :   default:
     923             :     return nullptr;
     924             :   // Check for a simple Add/Sub or GEP of a loop invariant step.
     925        4339 :   case Instruction::Add:
     926             :   case Instruction::Sub: {
     927        4339 :     Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
     928         128 :     if (!OInst || SE.DT.dominates(OInst, InsertPos))
     929             :       return dyn_cast<Instruction>(IncV->getOperand(0));
     930             :     return nullptr;
     931             :   }
     932          57 :   case Instruction::BitCast:
     933          57 :     return dyn_cast<Instruction>(IncV->getOperand(0));
     934        1857 :   case Instruction::GetElementPtr:
     935        5545 :     for (auto I = IncV->op_begin() + 1, E = IncV->op_end(); I != E; ++I) {
     936        1902 :       if (isa<Constant>(*I))
     937             :         continue;
     938             :       if (Instruction *OInst = dyn_cast<Instruction>(*I)) {
     939          52 :         if (!SE.DT.dominates(OInst, InsertPos))
     940             :           return nullptr;
     941             :       }
     942          71 :       if (allowScale) {
     943             :         // allow any kind of GEP as long as it can be hoisted.
     944             :         continue;
     945             :       }
     946             :       // This must be a pointer addition of constants (pretty), which is already
     947             :       // handled, or some number of address-size elements (ugly). Ugly geps
     948             :       // have 2 operands. i1* is used by the expander to represent an
     949             :       // address-size element.
     950          70 :       if (IncV->getNumOperands() != 2)
     951             :         return nullptr;
     952          70 :       unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace();
     953          70 :       if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS)
     954          70 :           && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS))
     955             :         return nullptr;
     956             :       break;
     957        1786 :     }
     958             :     return dyn_cast<Instruction>(IncV->getOperand(0));
     959             :   }
     960             : }
     961             : 
     962             : /// If the insert point of the current builder or any of the builders on the
     963             : /// stack of saved builders has 'I' as its insert point, update it to point to
     964             : /// the instruction after 'I'.  This is intended to be used when the instruction
     965             : /// 'I' is being moved.  If this fixup is not done and 'I' is moved to a
     966             : /// different block, the inconsistent insert point (with a mismatched
     967             : /// Instruction and Block) can lead to an instruction being inserted in a block
     968             : /// other than its parent.
     969         288 : void SCEVExpander::fixupInsertPoints(Instruction *I) {
     970             :   BasicBlock::iterator It(*I);
     971             :   BasicBlock::iterator NewInsertPt = std::next(It);
     972         288 :   if (Builder.GetInsertPoint() == It)
     973           2 :     Builder.SetInsertPoint(&*NewInsertPt);
     974         289 :   for (auto *InsertPtGuard : InsertPointGuards)
     975           1 :     if (InsertPtGuard->GetInsertPoint() == It)
     976             :       InsertPtGuard->SetInsertPoint(NewInsertPt);
     977         288 : }
     978             : 
     979             : /// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
     980             : /// it available to other uses in this loop. Recursively hoist any operands,
     981             : /// until we reach a value that dominates InsertPos.
     982        6087 : bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) {
     983        6087 :   if (SE.DT.dominates(IncV, InsertPos))
     984             :       return true;
     985             : 
     986             :   // InsertPos must itself dominate IncV so that IncV's new position satisfies
     987             :   // its existing users.
     988         589 :   if (isa<PHINode>(InsertPos) ||
     989         291 :       !SE.DT.dominates(InsertPos->getParent(), IncV->getParent()))
     990           7 :     return false;
     991             : 
     992         291 :   if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos))
     993             :     return false;
     994             : 
     995             :   // Check that the chain of IV operands leading back to Phi can be hoisted.
     996             :   SmallVector<Instruction*, 4> IVIncs;
     997             :   for(;;) {
     998         296 :     Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true);
     999         296 :     if (!Oper)
    1000             :       return false;
    1001             :     // IncV is safe to hoist.
    1002         292 :     IVIncs.push_back(IncV);
    1003         292 :     IncV = Oper;
    1004         292 :     if (SE.DT.dominates(IncV, InsertPos))
    1005             :       break;
    1006             :   }
    1007         575 :   for (auto I = IVIncs.rbegin(), E = IVIncs.rend(); I != E; ++I) {
    1008         288 :     fixupInsertPoints(*I);
    1009         288 :     (*I)->moveBefore(InsertPos);
    1010             :   }
    1011             :   return true;
    1012             : }
    1013             : 
    1014             : /// Determine if this cyclic phi is in a form that would have been generated by
    1015             : /// LSR. We don't care if the phi was actually expanded in this pass, as long
    1016             : /// as it is in a low-cost form, for example, no implied multiplication. This
    1017             : /// should match any patterns generated by getAddRecExprPHILiterally and
    1018             : /// expandAddtoGEP.
    1019        5914 : bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
    1020             :                                            const Loop *L) {
    1021             :   for(Instruction *IVOper = IncV;
    1022        5988 :       (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(),
    1023        5988 :                                 /*allowScale=*/false));) {
    1024        5931 :     if (IVOper == PN)
    1025             :       return true;
    1026             :   }
    1027             :   return false;
    1028             : }
    1029             : 
    1030             : /// expandIVInc - Expand an IV increment at Builder's current InsertPos.
    1031             : /// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
    1032             : /// need to materialize IV increments elsewhere to handle difficult situations.
    1033        3647 : Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
    1034             :                                  Type *ExpandTy, Type *IntTy,
    1035             :                                  bool useSubtract) {
    1036             :   Value *IncV;
    1037             :   // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
    1038        3647 :   if (ExpandTy->isPointerTy()) {
    1039             :     PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
    1040             :     // If the step isn't constant, don't use an implicitly scaled GEP, because
    1041             :     // that would require a multiply inside the loop.
    1042         991 :     if (!isa<ConstantInt>(StepV))
    1043         104 :       GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
    1044             :                                   GEPPtrTy->getAddressSpace());
    1045         991 :     IncV = expandAddToGEP(SE.getSCEV(StepV), GEPPtrTy, IntTy, PN);
    1046         991 :     if (IncV->getType() != PN->getType()) {
    1047         434 :       IncV = Builder.CreateBitCast(IncV, PN->getType());
    1048         434 :       rememberInstruction(IncV);
    1049             :     }
    1050             :   } else {
    1051        2656 :     IncV = useSubtract ?
    1052          39 :       Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
    1053        7929 :       Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
    1054        2656 :     rememberInstruction(IncV);
    1055             :   }
    1056        3647 :   return IncV;
    1057             : }
    1058             : 
    1059             : /// Hoist the addrec instruction chain rooted in the loop phi above the
    1060             : /// position. This routine assumes that this is possible (has been checked).
    1061        5742 : void SCEVExpander::hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,
    1062             :                                   Instruction *Pos, PHINode *LoopPhi) {
    1063             :   do {
    1064        5742 :     if (DT->dominates(InstToHoist, Pos))
    1065             :       break;
    1066             :     // Make sure the increment is where we want it. But don't move it
    1067             :     // down past a potential existing post-inc user.
    1068           0 :     fixupInsertPoints(InstToHoist);
    1069           0 :     InstToHoist->moveBefore(Pos);
    1070             :     Pos = InstToHoist;
    1071           0 :     InstToHoist = cast<Instruction>(InstToHoist->getOperand(0));
    1072           0 :   } while (InstToHoist != LoopPhi);
    1073        5742 : }
    1074             : 
    1075             : /// Check whether we can cheaply express the requested SCEV in terms of
    1076             : /// the available PHI SCEV by truncation and/or inversion of the step.
    1077          22 : static bool canBeCheaplyTransformed(ScalarEvolution &SE,
    1078             :                                     const SCEVAddRecExpr *Phi,
    1079             :                                     const SCEVAddRecExpr *Requested,
    1080             :                                     bool &InvertStep) {
    1081          22 :   Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType());
    1082          22 :   Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType());
    1083             : 
    1084          22 :   if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
    1085             :     return false;
    1086             : 
    1087             :   // Try truncate it if necessary.
    1088          22 :   Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy));
    1089             :   if (!Phi)
    1090             :     return false;
    1091             : 
    1092             :   // Check whether truncation will help.
    1093          22 :   if (Phi == Requested) {
    1094           0 :     InvertStep = false;
    1095           0 :     return true;
    1096             :   }
    1097             : 
    1098             :   // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
    1099          22 :   if (SE.getAddExpr(Requested->getStart(),
    1100             :                     SE.getNegativeSCEV(Requested)) == Phi) {
    1101           2 :     InvertStep = true;
    1102           2 :     return true;
    1103             :   }
    1104             : 
    1105             :   return false;
    1106             : }
    1107             : 
    1108        3633 : static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
    1109        3633 :   if (!isa<IntegerType>(AR->getType()))
    1110             :     return false;
    1111             : 
    1112             :   unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
    1113        2641 :   Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
    1114        2641 :   const SCEV *Step = AR->getStepRecurrence(SE);
    1115        2641 :   const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy),
    1116             :                                             SE.getSignExtendExpr(AR, WideTy));
    1117             :   const SCEV *ExtendAfterOp =
    1118        2641 :     SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy);
    1119        2641 :   return ExtendAfterOp == OpAfterExtend;
    1120             : }
    1121             : 
    1122        3633 : static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
    1123        3633 :   if (!isa<IntegerType>(AR->getType()))
    1124             :     return false;
    1125             : 
    1126             :   unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
    1127        2641 :   Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
    1128        2641 :   const SCEV *Step = AR->getStepRecurrence(SE);
    1129        2641 :   const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy),
    1130             :                                             SE.getZeroExtendExpr(AR, WideTy));
    1131             :   const SCEV *ExtendAfterOp =
    1132        2641 :     SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy);
    1133        2641 :   return ExtendAfterOp == OpAfterExtend;
    1134             : }
    1135             : 
    1136             : /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
    1137             : /// the base addrec, which is the addrec without any non-loop-dominating
    1138             : /// values, and return the PHI.
    1139             : PHINode *
    1140        9461 : SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
    1141             :                                         const Loop *L,
    1142             :                                         Type *ExpandTy,
    1143             :                                         Type *IntTy,
    1144             :                                         Type *&TruncTy,
    1145             :                                         bool &InvertStep) {
    1146             :   assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position");
    1147             : 
    1148             :   // Reuse a previously-inserted PHI, if present.
    1149        9461 :   BasicBlock *LatchBlock = L->getLoopLatch();
    1150        9461 :   if (LatchBlock) {
    1151             :     PHINode *AddRecPhiMatch = nullptr;
    1152             :     Instruction *IncV = nullptr;
    1153        9461 :     TruncTy = nullptr;
    1154        9461 :     InvertStep = false;
    1155             : 
    1156             :     // Only try partially matching scevs that need truncation and/or
    1157             :     // step-inversion if we know this loop is outside the current loop.
    1158             :     bool TryNonMatchingSCEV =
    1159       18628 :         IVIncInsertLoop &&
    1160       18334 :         SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
    1161             : 
    1162       29826 :     for (PHINode &PN : L->getHeader()->phis()) {
    1163       16717 :       if (!SE.isSCEVable(PN.getType()))
    1164             :         continue;
    1165             : 
    1166       16034 :       const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
    1167             :       if (!PhiSCEV)
    1168             :         continue;
    1169             : 
    1170             :       bool IsMatchingSCEV = PhiSCEV == Normalized;
    1171             :       // We only handle truncation and inversion of phi recurrences for the
    1172             :       // expanded expression if the expanded expression's loop dominates the
    1173             :       // loop we insert to. Check now, so we can bail out early.
    1174       13904 :       if (!IsMatchingSCEV && !TryNonMatchingSCEV)
    1175             :           continue;
    1176             : 
    1177             :       // TODO: this possibly can be reworked to avoid this cast at all.
    1178             :       Instruction *TempIncV =
    1179        5890 :           dyn_cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock));
    1180             :       if (!TempIncV)
    1181             :         continue;
    1182             : 
    1183             :       // Check whether we can reuse this PHI node.
    1184        5889 :       if (LSRMode) {
    1185        5877 :         if (!isExpandedAddRecExprPHI(&PN, TempIncV, L))
    1186             :           continue;
    1187        5826 :         if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos))
    1188             :           continue;
    1189             :       } else {
    1190          12 :         if (!isNormalAddRecExprPHI(&PN, TempIncV, L))
    1191             :           continue;
    1192             :       }
    1193             : 
    1194             :       // Stop if we have found an exact match SCEV.
    1195        5835 :       if (IsMatchingSCEV) {
    1196             :         IncV = TempIncV;
    1197        5813 :         TruncTy = nullptr;
    1198        5813 :         InvertStep = false;
    1199             :         AddRecPhiMatch = &PN;
    1200        5813 :         break;
    1201             :       }
    1202             : 
    1203             :       // Try whether the phi can be translated into the requested form
    1204             :       // (truncated and/or offset by a constant).
    1205          44 :       if ((!TruncTy || InvertStep) &&
    1206          22 :           canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) {
    1207             :         // Record the phi node. But don't stop we might find an exact match
    1208             :         // later.
    1209             :         AddRecPhiMatch = &PN;
    1210             :         IncV = TempIncV;
    1211           2 :         TruncTy = SE.getEffectiveSCEVType(Normalized->getType());
    1212             :       }
    1213             :     }
    1214             : 
    1215        9461 :     if (AddRecPhiMatch) {
    1216             :       // Potentially, move the increment. We have made sure in
    1217             :       // isExpandedAddRecExprPHI or hoistIVInc that this is possible.
    1218        5815 :       if (L == IVIncInsertLoop)
    1219        5742 :         hoistBeforePos(&SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch);
    1220             : 
    1221             :       // Ok, the add recurrence looks usable.
    1222             :       // Remember this PHI, even in post-inc mode.
    1223        5815 :       InsertedValues.insert(AddRecPhiMatch);
    1224             :       // Remember the increment.
    1225        5815 :       rememberInstruction(IncV);
    1226        5815 :       return AddRecPhiMatch;
    1227             :     }
    1228             :   }
    1229             : 
    1230             :   // Save the original insertion point so we can restore it when we're done.
    1231        7292 :   SCEVInsertPointGuard Guard(Builder, this);
    1232             : 
    1233             :   // Another AddRec may need to be recursively expanded below. For example, if
    1234             :   // this AddRec is quadratic, the StepV may itself be an AddRec in this
    1235             :   // loop. Remove this loop from the PostIncLoops set before expanding such
    1236             :   // AddRecs. Otherwise, we cannot find a valid position for the step
    1237             :   // (i.e. StepV can never dominate its loop header).  Ideally, we could do
    1238             :   // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
    1239             :   // so it's not worth implementing SmallPtrSet::swap.
    1240             :   PostIncLoopSet SavedPostIncLoops = PostIncLoops;
    1241        3646 :   PostIncLoops.clear();
    1242             : 
    1243             :   // Expand code for the start value into the loop preheader.
    1244             :   assert(L->getLoopPreheader() &&
    1245             :          "Can't expand add recurrences without a loop preheader!");
    1246        3646 :   Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
    1247             :                                 L->getLoopPreheader()->getTerminator());
    1248             : 
    1249             :   // StartV must have been be inserted into L's preheader to dominate the new
    1250             :   // phi.
    1251             :   assert(!isa<Instruction>(StartV) ||
    1252             :          SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
    1253             :                                  L->getHeader()));
    1254             : 
    1255             :   // Expand code for the step value. Do this before creating the PHI so that PHI
    1256             :   // reuse code doesn't see an incomplete PHI.
    1257        3646 :   const SCEV *Step = Normalized->getStepRecurrence(SE);
    1258             :   // If the stride is negative, insert a sub instead of an add for the increment
    1259             :   // (unless it's a constant, because subtracts of constants are canonicalized
    1260             :   // to adds).
    1261        3646 :   bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
    1262             :   if (useSubtract)
    1263          13 :     Step = SE.getNegativeSCEV(Step);
    1264             :   // Expand the step somewhere that dominates the loop header.
    1265        3646 :   Value *StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());
    1266             : 
    1267             :   // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
    1268             :   // we actually do emit an addition.  It does not apply if we emit a
    1269             :   // subtraction.
    1270        3646 :   bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized);
    1271        3646 :   bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized);
    1272             : 
    1273             :   // Create the PHI.
    1274             :   BasicBlock *Header = L->getHeader();
    1275        3646 :   Builder.SetInsertPoint(Header, Header->begin());
    1276             :   pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
    1277        7292 :   PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE),
    1278        3646 :                                   Twine(IVName) + ".iv");
    1279        3646 :   rememberInstruction(PN);
    1280             : 
    1281             :   // Create the step instructions and populate the PHI.
    1282       10938 :   for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
    1283             :     BasicBlock *Pred = *HPI;
    1284             : 
    1285             :     // Add a start value.
    1286        7292 :     if (!L->contains(Pred)) {
    1287        3646 :       PN->addIncoming(StartV, Pred);
    1288             :       continue;
    1289             :     }
    1290             : 
    1291             :     // Create a step value and add it to the PHI.
    1292             :     // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
    1293             :     // instructions at IVIncInsertPos.
    1294        3646 :     Instruction *InsertPos = L == IVIncInsertLoop ?
    1295             :       IVIncInsertPos : Pred->getTerminator();
    1296        3646 :     Builder.SetInsertPoint(InsertPos);
    1297        3646 :     Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
    1298             : 
    1299        3646 :     if (isa<OverflowingBinaryOperator>(IncV)) {
    1300        2655 :       if (IncrementIsNUW)
    1301         839 :         cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap();
    1302        2655 :       if (IncrementIsNSW)
    1303        1489 :         cast<BinaryOperator>(IncV)->setHasNoSignedWrap();
    1304             :     }
    1305        3646 :     PN->addIncoming(IncV, Pred);
    1306             :   }
    1307             : 
    1308             :   // After expanding subexpressions, restore the PostIncLoops set so the caller
    1309             :   // can ensure that IVIncrement dominates the current uses.
    1310             :   PostIncLoops = SavedPostIncLoops;
    1311             : 
    1312             :   // Remember this PHI, even in post-inc mode.
    1313        3646 :   InsertedValues.insert(PN);
    1314             : 
    1315             :   return PN;
    1316             : }
    1317             : 
    1318        9461 : Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
    1319             :   Type *STy = S->getType();
    1320        9461 :   Type *IntTy = SE.getEffectiveSCEVType(STy);
    1321        9461 :   const Loop *L = S->getLoop();
    1322             : 
    1323             :   // Determine a normalized form of this expression, which is the expression
    1324             :   // before any post-inc adjustment is made.
    1325             :   const SCEVAddRecExpr *Normalized = S;
    1326        9461 :   if (PostIncLoops.count(L)) {
    1327             :     PostIncLoopSet Loops;
    1328        3822 :     Loops.insert(L);
    1329        3822 :     Normalized = cast<SCEVAddRecExpr>(normalizeForPostIncUse(S, Loops, SE));
    1330             :   }
    1331             : 
    1332             :   // Strip off any non-loop-dominating component from the addrec start.
    1333        9461 :   const SCEV *Start = Normalized->getStart();
    1334             :   const SCEV *PostLoopOffset = nullptr;
    1335       18922 :   if (!SE.properlyDominates(Start, L->getHeader())) {
    1336             :     PostLoopOffset = Start;
    1337           1 :     Start = SE.getConstant(Normalized->getType(), 0);
    1338           1 :     Normalized = cast<SCEVAddRecExpr>(
    1339           1 :       SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE),
    1340             :                        Normalized->getLoop(),
    1341             :                        Normalized->getNoWrapFlags(SCEV::FlagNW)));
    1342             :   }
    1343             : 
    1344             :   // Strip off any non-loop-dominating component from the addrec step.
    1345        9461 :   const SCEV *Step = Normalized->getStepRecurrence(SE);
    1346             :   const SCEV *PostLoopScale = nullptr;
    1347       18922 :   if (!SE.dominates(Step, L->getHeader())) {
    1348             :     PostLoopScale = Step;
    1349           0 :     Step = SE.getConstant(Normalized->getType(), 1);
    1350           0 :     if (!Start->isZero()) {
    1351             :         // The normalization below assumes that Start is constant zero, so if
    1352             :         // it isn't re-associate Start to PostLoopOffset.
    1353             :         assert(!PostLoopOffset && "Start not-null but PostLoopOffset set?");
    1354             :         PostLoopOffset = Start;
    1355           0 :         Start = SE.getConstant(Normalized->getType(), 0);
    1356             :     }
    1357             :     Normalized =
    1358           0 :       cast<SCEVAddRecExpr>(SE.getAddRecExpr(
    1359             :                              Start, Step, Normalized->getLoop(),
    1360             :                              Normalized->getNoWrapFlags(SCEV::FlagNW)));
    1361             :   }
    1362             : 
    1363             :   // Expand the core addrec. If we need post-loop scaling, force it to
    1364             :   // expand to an integer type to avoid the need for additional casting.
    1365        9461 :   Type *ExpandTy = PostLoopScale ? IntTy : STy;
    1366             :   // We can't use a pointer type for the addrec if the pointer type is
    1367             :   // non-integral.
    1368             :   Type *AddRecPHIExpandTy =
    1369        9461 :       DL.isNonIntegralPointerType(STy) ? Normalized->getType() : ExpandTy;
    1370             : 
    1371             :   // In some cases, we decide to reuse an existing phi node but need to truncate
    1372             :   // it and/or invert the step.
    1373        9461 :   Type *TruncTy = nullptr;
    1374        9461 :   bool InvertStep = false;
    1375        9461 :   PHINode *PN = getAddRecExprPHILiterally(Normalized, L, AddRecPHIExpandTy,
    1376             :                                           IntTy, TruncTy, InvertStep);
    1377             : 
    1378             :   // Accommodate post-inc mode, if necessary.
    1379             :   Value *Result;
    1380        9461 :   if (!PostIncLoops.count(L))
    1381             :     Result = PN;
    1382             :   else {
    1383             :     // In PostInc mode, use the post-incremented value.
    1384        3822 :     BasicBlock *LatchBlock = L->getLoopLatch();
    1385             :     assert(LatchBlock && "PostInc mode requires a unique loop latch!");
    1386        3822 :     Result = PN->getIncomingValueForBlock(LatchBlock);
    1387             : 
    1388             :     // For an expansion to use the postinc form, the client must call
    1389             :     // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
    1390             :     // or dominated by IVIncInsertPos.
    1391        7644 :     if (isa<Instruction>(Result) &&
    1392        7644 :         !SE.DT.dominates(cast<Instruction>(Result),
    1393             :                          &*Builder.GetInsertPoint())) {
    1394             :       // The induction variable's postinc expansion does not dominate this use.
    1395             :       // IVUsers tries to prevent this case, so it is rare. However, it can
    1396             :       // happen when an IVUser outside the loop is not dominated by the latch
    1397             :       // block. Adjusting IVIncInsertPos before expansion begins cannot handle
    1398             :       // all cases. Consider a phi outside whose operand is replaced during
    1399             :       // expansion with the value of the postinc user. Without fundamentally
    1400             :       // changing the way postinc users are tracked, the only remedy is
    1401             :       // inserting an extra IV increment. StepV might fold into PostLoopOffset,
    1402             :       // but hopefully expandCodeFor handles that.
    1403             :       bool useSubtract =
    1404           1 :         !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
    1405             :       if (useSubtract)
    1406           0 :         Step = SE.getNegativeSCEV(Step);
    1407             :       Value *StepV;
    1408             :       {
    1409             :         // Expand the step somewhere that dominates the loop header.
    1410           2 :         SCEVInsertPointGuard Guard(Builder, this);
    1411           1 :         StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());
    1412             :       }
    1413           1 :       Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
    1414             :     }
    1415             :   }
    1416             : 
    1417             :   // We have decided to reuse an induction variable of a dominating loop. Apply
    1418             :   // truncation and/or inversion of the step.
    1419        9461 :   if (TruncTy) {
    1420           2 :     Type *ResTy = Result->getType();
    1421             :     // Normalize the result type.
    1422           2 :     if (ResTy != SE.getEffectiveSCEVType(ResTy))
    1423           0 :       Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy));
    1424             :     // Truncate the result.
    1425           2 :     if (TruncTy != Result->getType()) {
    1426           2 :       Result = Builder.CreateTrunc(Result, TruncTy);
    1427           2 :       rememberInstruction(Result);
    1428             :     }
    1429             :     // Invert the result.
    1430           2 :     if (InvertStep) {
    1431           6 :       Result = Builder.CreateSub(expandCodeFor(Normalized->getStart(), TruncTy),
    1432             :                                  Result);
    1433           2 :       rememberInstruction(Result);
    1434             :     }
    1435             :   }
    1436             : 
    1437             :   // Re-apply any non-loop-dominating scale.
    1438        9461 :   if (PostLoopScale) {
    1439             :     assert(S->isAffine() && "Can't linearly scale non-affine recurrences.");
    1440           0 :     Result = InsertNoopCastOfTo(Result, IntTy);
    1441           0 :     Result = Builder.CreateMul(Result,
    1442             :                                expandCodeFor(PostLoopScale, IntTy));
    1443           0 :     rememberInstruction(Result);
    1444             :   }
    1445             : 
    1446             :   // Re-apply any non-loop-dominating offset.
    1447        9461 :   if (PostLoopOffset) {
    1448             :     if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
    1449           2 :       if (Result->getType()->isIntegerTy()) {
    1450           1 :         Value *Base = expandCodeFor(PostLoopOffset, ExpandTy);
    1451           1 :         Result = expandAddToGEP(SE.getUnknown(Result), PTy, IntTy, Base);
    1452             :       } else {
    1453           0 :         Result = expandAddToGEP(PostLoopOffset, PTy, IntTy, Result);
    1454             :       }
    1455             :     } else {
    1456           0 :       Result = InsertNoopCastOfTo(Result, IntTy);
    1457           0 :       Result = Builder.CreateAdd(Result,
    1458             :                                  expandCodeFor(PostLoopOffset, IntTy));
    1459           0 :       rememberInstruction(Result);
    1460             :     }
    1461             :   }
    1462             : 
    1463        9461 :   return Result;
    1464             : }
    1465             : 
    1466        9843 : Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
    1467        9843 :   if (!CanonicalMode) return expandAddRecExprLiterally(S);
    1468             : 
    1469         382 :   Type *Ty = SE.getEffectiveSCEVType(S->getType());
    1470         382 :   const Loop *L = S->getLoop();
    1471             : 
    1472             :   // First check for an existing canonical IV in a suitable type.
    1473             :   PHINode *CanonicalIV = nullptr;
    1474         382 :   if (PHINode *PN = L->getCanonicalInductionVariable())
    1475         234 :     if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
    1476             :       CanonicalIV = PN;
    1477             : 
    1478             :   // Rewrite an AddRec in terms of the canonical induction variable, if
    1479             :   // its type is more narrow.
    1480         222 :   if (CanonicalIV &&
    1481         222 :       SE.getTypeSizeInBits(CanonicalIV->getType()) >
    1482         222 :       SE.getTypeSizeInBits(Ty)) {
    1483          11 :     SmallVector<const SCEV *, 4> NewOps(S->getNumOperands());
    1484          33 :     for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
    1485          44 :       NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType());
    1486          22 :     Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
    1487             :                                        S->getNoWrapFlags(SCEV::FlagNW)));
    1488             :     BasicBlock::iterator NewInsertPt =
    1489          11 :         findInsertPointAfter(cast<Instruction>(V), Builder.GetInsertBlock());
    1490          11 :     V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr,
    1491          11 :                       &*NewInsertPt);
    1492             :     return V;
    1493             :   }
    1494             : 
    1495             :   // {X,+,F} --> X + {0,+,F}
    1496         742 :   if (!S->getStart()->isZero()) {
    1497         173 :     SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end());
    1498         346 :     NewOps[0] = SE.getConstant(Ty, 0);
    1499         346 :     const SCEV *Rest = SE.getAddRecExpr(NewOps, L,
    1500             :                                         S->getNoWrapFlags(SCEV::FlagNW));
    1501             : 
    1502             :     // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
    1503             :     // comments on expandAddToGEP for details.
    1504         173 :     const SCEV *Base = S->getStart();
    1505             :     // Dig into the expression to find the pointer base for a GEP.
    1506         173 :     const SCEV *ExposedRest = Rest;
    1507         173 :     ExposePointerBase(Base, ExposedRest, SE);
    1508             :     // If we found a pointer, expand the AddRec with a GEP.
    1509         173 :     if (PointerType *PTy = dyn_cast<PointerType>(Base->getType())) {
    1510             :       // Make sure the Base isn't something exotic, such as a multiplied
    1511             :       // or divided pointer value. In those cases, the result type isn't
    1512             :       // actually a pointer type.
    1513         130 :       if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) {
    1514          65 :         Value *StartV = expand(Base);
    1515             :         assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
    1516          65 :         return expandAddToGEP(ExposedRest, PTy, Ty, StartV);
    1517             :       }
    1518             :     }
    1519             : 
    1520             :     // Just do a normal add. Pre-expand the operands to suppress folding.
    1521             :     //
    1522             :     // The LHS and RHS values are factored out of the expand call to make the
    1523             :     // output independent of the argument evaluation order.
    1524         216 :     const SCEV *AddExprLHS = SE.getUnknown(expand(S->getStart()));
    1525         108 :     const SCEV *AddExprRHS = SE.getUnknown(expand(Rest));
    1526         108 :     return expand(SE.getAddExpr(AddExprLHS, AddExprRHS));
    1527             :   }
    1528             : 
    1529             :   // If we don't yet have a canonical IV, create one.
    1530         198 :   if (!CanonicalIV) {
    1531             :     // Create and insert the PHI node for the induction variable in the
    1532             :     // specified loop.
    1533             :     BasicBlock *Header = L->getHeader();
    1534             :     pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
    1535          87 :     CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar",
    1536             :                                   &Header->front());
    1537          87 :     rememberInstruction(CanonicalIV);
    1538             : 
    1539             :     SmallSet<BasicBlock *, 4> PredSeen;
    1540          87 :     Constant *One = ConstantInt::get(Ty, 1);
    1541         265 :     for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
    1542             :       BasicBlock *HP = *HPI;
    1543         178 :       if (!PredSeen.insert(HP).second) {
    1544             :         // There must be an incoming value for each predecessor, even the
    1545             :         // duplicates!
    1546           0 :         CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP);
    1547           0 :         continue;
    1548             :       }
    1549             : 
    1550         178 :       if (L->contains(HP)) {
    1551             :         // Insert a unit add instruction right before the terminator
    1552             :         // corresponding to the back-edge.
    1553          91 :         Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
    1554             :                                                      "indvar.next",
    1555             :                                                      HP->getTerminator());
    1556          91 :         Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
    1557          91 :         rememberInstruction(Add);
    1558          91 :         CanonicalIV->addIncoming(Add, HP);
    1559             :       } else {
    1560          87 :         CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP);
    1561             :       }
    1562             :     }
    1563             :   }
    1564             : 
    1565             :   // {0,+,1} --> Insert a canonical induction variable into the loop!
    1566         198 :   if (S->isAffine() && S->getOperand(1)->isOne()) {
    1567             :     assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
    1568             :            "IVs with types different from the canonical IV should "
    1569             :            "already have been handled!");
    1570             :     return CanonicalIV;
    1571             :   }
    1572             : 
    1573             :   // {0,+,F} --> {0,+,1} * F
    1574             : 
    1575             :   // If this is a simple linear addrec, emit it now as a special case.
    1576          84 :   if (S->isAffine())    // {0,+,F} --> i*F
    1577             :     return
    1578         252 :       expand(SE.getTruncateOrNoop(
    1579          84 :         SE.getMulExpr(SE.getUnknown(CanonicalIV),
    1580             :                       SE.getNoopOrAnyExtend(S->getOperand(1),
    1581             :                                             CanonicalIV->getType())),
    1582          84 :         Ty));
    1583             : 
    1584             :   // If this is a chain of recurrences, turn it into a closed form, using the
    1585             :   // folders, then expandCodeFor the closed form.  This allows the folders to
    1586             :   // simplify the expression without having to build a bunch of special code
    1587             :   // into this folder.
    1588           0 :   const SCEV *IH = SE.getUnknown(CanonicalIV);   // Get I as a "symbolic" SCEV.
    1589             : 
    1590             :   // Promote S up to the canonical IV type, if the cast is foldable.
    1591             :   const SCEV *NewS = S;
    1592           0 :   const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType());
    1593           0 :   if (isa<SCEVAddRecExpr>(Ext))
    1594             :     NewS = Ext;
    1595             : 
    1596           0 :   const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
    1597             :   //cerr << "Evaluated: " << *this << "\n     to: " << *V << "\n";
    1598             : 
    1599             :   // Truncate the result down to the original type, if needed.
    1600           0 :   const SCEV *T = SE.getTruncateOrNoop(V, Ty);
    1601           0 :   return expand(T);
    1602             : }
    1603             : 
    1604         218 : Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
    1605         218 :   Type *Ty = SE.getEffectiveSCEVType(S->getType());
    1606         218 :   Value *V = expandCodeFor(S->getOperand(),
    1607         218 :                            SE.getEffectiveSCEVType(S->getOperand()->getType()));
    1608         218 :   Value *I = Builder.CreateTrunc(V, Ty);
    1609         218 :   rememberInstruction(I);
    1610         218 :   return I;
    1611             : }
    1612             : 
    1613         414 : Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
    1614         414 :   Type *Ty = SE.getEffectiveSCEVType(S->getType());
    1615         414 :   Value *V = expandCodeFor(S->getOperand(),
    1616         414 :                            SE.getEffectiveSCEVType(S->getOperand()->getType()));
    1617         414 :   Value *I = Builder.CreateZExt(V, Ty);
    1618         414 :   rememberInstruction(I);
    1619         414 :   return I;
    1620             : }
    1621             : 
    1622         149 : Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
    1623         149 :   Type *Ty = SE.getEffectiveSCEVType(S->getType());
    1624         149 :   Value *V = expandCodeFor(S->getOperand(),
    1625         149 :                            SE.getEffectiveSCEVType(S->getOperand()->getType()));
    1626         149 :   Value *I = Builder.CreateSExt(V, Ty);
    1627         149 :   rememberInstruction(I);
    1628         149 :   return I;
    1629             : }
    1630             : 
    1631         379 : Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
    1632         758 :   Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
    1633         379 :   Type *Ty = LHS->getType();
    1634         782 :   for (int i = S->getNumOperands()-2; i >= 0; --i) {
    1635             :     // In the case of mixed integer and pointer types, do the
    1636             :     // rest of the comparisons as integer.
    1637         806 :     if (S->getOperand(i)->getType() != Ty) {
    1638           0 :       Ty = SE.getEffectiveSCEVType(Ty);
    1639           0 :       LHS = InsertNoopCastOfTo(LHS, Ty);
    1640             :     }
    1641         806 :     Value *RHS = expandCodeFor(S->getOperand(i), Ty);
    1642         403 :     Value *ICmp = Builder.CreateICmpSGT(LHS, RHS);
    1643         403 :     rememberInstruction(ICmp);
    1644         403 :     Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
    1645         403 :     rememberInstruction(Sel);
    1646             :     LHS = Sel;
    1647             :   }
    1648             :   // In the case of mixed integer and pointer types, cast the
    1649             :   // final result back to the pointer type.
    1650         379 :   if (LHS->getType() != S->getType())
    1651           0 :     LHS = InsertNoopCastOfTo(LHS, S->getType());
    1652         379 :   return LHS;
    1653             : }
    1654             : 
    1655         108 : Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
    1656         216 :   Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
    1657         108 :   Type *Ty = LHS->getType();
    1658         216 :   for (int i = S->getNumOperands()-2; i >= 0; --i) {
    1659             :     // In the case of mixed integer and pointer types, do the
    1660             :     // rest of the comparisons as integer.
    1661         216 :     if (S->getOperand(i)->getType() != Ty) {
    1662           3 :       Ty = SE.getEffectiveSCEVType(Ty);
    1663           3 :       LHS = InsertNoopCastOfTo(LHS, Ty);
    1664             :     }
    1665         216 :     Value *RHS = expandCodeFor(S->getOperand(i), Ty);
    1666         108 :     Value *ICmp = Builder.CreateICmpUGT(LHS, RHS);
    1667         108 :     rememberInstruction(ICmp);
    1668         108 :     Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
    1669         108 :     rememberInstruction(Sel);
    1670             :     LHS = Sel;
    1671             :   }
    1672             :   // In the case of mixed integer and pointer types, cast the
    1673             :   // final result back to the pointer type.
    1674         108 :   if (LHS->getType() != S->getType())
    1675           2 :     LHS = InsertNoopCastOfTo(LHS, S->getType());
    1676         108 :   return LHS;
    1677             : }
    1678             : 
    1679       21284 : Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
    1680             :                                    Instruction *IP) {
    1681             :   setInsertPoint(IP);
    1682       21284 :   return expandCodeFor(SH, Ty);
    1683             : }
    1684             : 
    1685      105697 : Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
    1686             :   // Expand the code for this SCEV.
    1687      105697 :   Value *V = expand(SH);
    1688      105697 :   if (Ty) {
    1689             :     assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
    1690             :            "non-trivial casts should be done with the SCEVs directly!");
    1691       78489 :     V = InsertNoopCastOfTo(V, Ty);
    1692             :   }
    1693      105697 :   return V;
    1694             : }
    1695             : 
    1696             : ScalarEvolution::ValueOffsetPair
    1697      100340 : SCEVExpander::FindValueInExprValueMap(const SCEV *S,
    1698             :                                       const Instruction *InsertPt) {
    1699      100340 :   SetVector<ScalarEvolution::ValueOffsetPair> *Set = SE.getSCEVValues(S);
    1700             :   // If the expansion is not in CanonicalMode, and the SCEV contains any
    1701             :   // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
    1702      100340 :   if (CanonicalMode || !SE.containsAddRecurrence(S)) {
    1703             :     // If S is scConstant, it may be worse to reuse an existing Value.
    1704       90503 :     if (S->getSCEVType() != scConstant && Set) {
    1705             :       // Choose a Value from the set which dominates the insertPt.
    1706             :       // insertPt should be inside the Value's parent loop so as not to break
    1707             :       // the LCSSA form.
    1708       20921 :       for (auto const &VOPair : *Set) {
    1709       14134 :         Value *V = VOPair.first;
    1710       14134 :         ConstantInt *Offset = VOPair.second;
    1711             :         Instruction *EntInst = nullptr;
    1712       14134 :         if (V && isa<Instruction>(V) && (EntInst = cast<Instruction>(V)) &&
    1713       13843 :             S->getType() == V->getType() &&
    1714       12510 :             EntInst->getFunction() == InsertPt->getFunction() &&
    1715       21874 :             SE.DT.dominates(EntInst, InsertPt) &&
    1716        7448 :             (SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
    1717        1485 :              SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
    1718        5741 :           return {V, Offset};
    1719             :       }
    1720             :     }
    1721             :   }
    1722       94599 :   return {nullptr, nullptr};
    1723             : }
    1724             : 
    1725             : // The expansion of SCEV will either reuse a previous Value in ExprValueMap,
    1726             : // or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
    1727             : // and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
    1728             : // literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
    1729             : // the expansion will try to reuse Value from ExprValueMap, and only when it
    1730             : // fails, expand the SCEV literally.
    1731      138395 : Value *SCEVExpander::expand(const SCEV *S) {
    1732             :   // Compute an insertion point for this SCEV object. Hoist the instructions
    1733             :   // as far out in the loop nest as possible.
    1734             :   Instruction *InsertPt = &*Builder.GetInsertPoint();
    1735      138395 :   for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
    1736       40159 :        L = L->getParentLoop())
    1737      178554 :     if (SE.isLoopInvariant(S, L)) {
    1738       81907 :       if (!L) break;
    1739       40159 :       if (BasicBlock *Preheader = L->getLoopPreheader())
    1740             :         InsertPt = Preheader->getTerminator();
    1741             :       else {
    1742             :         // LSR sets the insertion point for AddRec start/step values to the
    1743             :         // block start to simplify value reuse, even though it's an invalid
    1744             :         // position. SCEVExpander must correct for this in all cases.
    1745             :         InsertPt = &*L->getHeader()->getFirstInsertionPt();
    1746             :       }
    1747             :     } else {
    1748             :       // We can move insertion point only if there is no div or rem operations
    1749             :       // otherwise we are risky to move it over the check for zero denominator.
    1750             :       auto SafeToHoist = [](const SCEV *S) {
    1751             :         return !SCEVExprContains(S, [](const SCEV *S) {
    1752             :                   if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
    1753             :                     if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
    1754             :                       // Division by non-zero constants can be hoisted.
    1755             :                       return SC->getValue()->isZero();
    1756             :                     // All other divisions should not be moved as they may be
    1757             :                     // divisions by zero and should be kept within the
    1758             :                     // conditions of the surrounding loops that guard their
    1759             :                     // execution (see PR35406).
    1760             :                     return true;
    1761             :                   }
    1762             :                   return false;
    1763             :                 });
    1764             :       };
    1765             :       // If the SCEV is computable at this level, insert it into the header
    1766             :       // after the PHIs (and after any other instructions that we've inserted
    1767             :       // there) so that it is guaranteed to dominate any user inside the loop.
    1768      108782 :       if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L) &&
    1769             :           SafeToHoist(S))
    1770             :         InsertPt = &*L->getHeader()->getFirstInsertionPt();
    1771      154676 :       while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
    1772       39038 :              (isInsertedInstruction(InsertPt) ||
    1773             :               isa<DbgInfoIntrinsic>(InsertPt))) {
    1774             :         InsertPt = &*std::next(InsertPt->getIterator());
    1775             :       }
    1776             :       break;
    1777       40159 :     }
    1778             : 
    1779             :   // Check to see if we already expanded this here.
    1780      138395 :   auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));
    1781      138395 :   if (I != InsertedExpressions.end())
    1782       40637 :     return I->second;
    1783             : 
    1784       97758 :   SCEVInsertPointGuard Guard(Builder, this);
    1785       97758 :   Builder.SetInsertPoint(InsertPt);
    1786             : 
    1787             :   // Expand the expression into instructions.
    1788       97758 :   ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, InsertPt);
    1789       97758 :   Value *V = VO.first;
    1790             : 
    1791       97758 :   if (!V)
    1792       92335 :     V = visit(S);
    1793        5423 :   else if (VO.second) {
    1794         104 :     if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) {
    1795           2 :       Type *Ety = Vty->getPointerElementType();
    1796             :       int64_t Offset = VO.second->getSExtValue();
    1797           2 :       int64_t ESize = SE.getTypeSizeInBits(Ety);
    1798           2 :       if ((Offset * 8) % ESize == 0) {
    1799             :         ConstantInt *Idx =
    1800           2 :             ConstantInt::getSigned(VO.second->getType(), -(Offset * 8) / ESize);
    1801           2 :         V = Builder.CreateGEP(Ety, V, Idx, "scevgep");
    1802             :       } else {
    1803             :         ConstantInt *Idx =
    1804           2 :             ConstantInt::getSigned(VO.second->getType(), -Offset);
    1805             :         unsigned AS = Vty->getAddressSpace();
    1806           3 :         V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS));
    1807           1 :         V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx,
    1808             :                               "uglygep");
    1809           1 :         V = Builder.CreateBitCast(V, Vty);
    1810             :       }
    1811             :     } else {
    1812         204 :       V = Builder.CreateSub(V, VO.second);
    1813             :     }
    1814             :   }
    1815             :   // Remember the expanded value for this SCEV at this location.
    1816             :   //
    1817             :   // This is independent of PostIncLoops. The mapped value simply materializes
    1818             :   // the expression at this insertion point. If the mapped value happened to be
    1819             :   // a postinc expansion, it could be reused by a non-postinc user, but only if
    1820             :   // its insertion point was already at the head of the loop.
    1821      195516 :   InsertedExpressions[std::make_pair(S, InsertPt)] = V;
    1822             :   return V;
    1823             : }
    1824             : 
    1825       46866 : void SCEVExpander::rememberInstruction(Value *I) {
    1826       46866 :   if (!PostIncLoops.empty())
    1827        3921 :     InsertedPostIncValues.insert(I);
    1828             :   else
    1829       42945 :     InsertedValues.insert(I);
    1830       46866 : }
    1831             : 
    1832             : /// getOrInsertCanonicalInductionVariable - This method returns the
    1833             : /// canonical induction variable of the specified type for the specified
    1834             : /// loop (inserting one if there is none).  A canonical induction variable
    1835             : /// starts at zero and steps by one on each iteration.
    1836             : PHINode *
    1837           0 : SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
    1838             :                                                     Type *Ty) {
    1839             :   assert(Ty->isIntegerTy() && "Can only insert integer induction variables!");
    1840             : 
    1841             :   // Build a SCEV for {0,+,1}<L>.
    1842             :   // Conservatively use FlagAnyWrap for now.
    1843           0 :   const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0),
    1844             :                                    SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap);
    1845             : 
    1846             :   // Emit code for it.
    1847           0 :   SCEVInsertPointGuard Guard(Builder, this);
    1848             :   PHINode *V =
    1849           0 :       cast<PHINode>(expandCodeFor(H, nullptr, &L->getHeader()->front()));
    1850             : 
    1851           0 :   return V;
    1852             : }
    1853             : 
    1854             : /// replaceCongruentIVs - Check for congruent phis in this loop header and
    1855             : /// replace them with their most canonical representative. Return the number of
    1856             : /// phis eliminated.
    1857             : ///
    1858             : /// This does not depend on any SCEVExpander state but should be used in
    1859             : /// the same context that SCEVExpander is used.
    1860             : unsigned
    1861       13637 : SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
    1862             :                                   SmallVectorImpl<WeakTrackingVH> &DeadInsts,
    1863             :                                   const TargetTransformInfo *TTI) {
    1864             :   // Find integer phis in order of increasing width.
    1865             :   SmallVector<PHINode*, 8> Phis;
    1866       49065 :   for (PHINode &PN : L->getHeader()->phis())
    1867       21791 :     Phis.push_back(&PN);
    1868             : 
    1869       13637 :   if (TTI)
    1870             :     llvm::sort(Phis, [](Value *LHS, Value *RHS) {
    1871             :       // Put pointers at the back and make sure pointer < pointer = false.
    1872             :       if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
    1873             :         return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
    1874             :       return RHS->getType()->getPrimitiveSizeInBits() <
    1875             :              LHS->getType()->getPrimitiveSizeInBits();
    1876             :     });
    1877             : 
    1878             :   unsigned NumElim = 0;
    1879             :   DenseMap<const SCEV *, PHINode *> ExprToIVMap;
    1880             :   // Process phis from wide to narrow. Map wide phis to their truncation
    1881             :   // so narrow phis can reuse them.
    1882       35428 :   for (PHINode *Phi : Phis) {
    1883             :     auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
    1884             :       if (Value *V = SimplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC}))
    1885             :         return V;
    1886             :       if (!SE.isSCEVable(PN->getType()))
    1887             :         return nullptr;
    1888             :       auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN));
    1889             :       if (!Const)
    1890             :         return nullptr;
    1891             :       return Const->getValue();
    1892       21791 :     };
    1893             : 
    1894             :     // Fold constant phis. They may be congruent to other constant phis and
    1895             :     // would confuse the logic below that expects proper IVs.
    1896       21791 :     if (Value *V = SimplifyPHINode(Phi)) {
    1897          15 :       if (V->getType() != Phi->getType())
    1898       21732 :         continue;
    1899          14 :       Phi->replaceAllUsesWith(V);
    1900          14 :       DeadInsts.emplace_back(Phi);
    1901          14 :       ++NumElim;
    1902             :       DEBUG_WITH_TYPE(DebugType, dbgs()
    1903             :                       << "INDVARS: Eliminated constant iv: " << *Phi << '\n');
    1904          14 :       continue;
    1905             :     }
    1906             : 
    1907       21776 :     if (!SE.isSCEVable(Phi->getType()))
    1908             :       continue;
    1909             : 
    1910       19601 :     PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
    1911       19601 :     if (!OrigPhiRef) {
    1912       19540 :       OrigPhiRef = Phi;
    1913       46474 :       if (Phi->getType()->isIntegerTy() && TTI &&
    1914        7394 :           TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
    1915             :         // This phi can be freely truncated to the narrowest phi type. Map the
    1916             :         // truncated expression to it so it will be reused for narrow types.
    1917             :         const SCEV *TruncExpr =
    1918        1280 :           SE.getTruncateExpr(SE.getSCEV(Phi), Phis.back()->getType());
    1919         640 :         ExprToIVMap[TruncExpr] = Phi;
    1920             :       }
    1921       19540 :       continue;
    1922             :     }
    1923             : 
    1924             :     // Replacing a pointer phi with an integer phi or vice-versa doesn't make
    1925             :     // sense.
    1926         183 :     if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
    1927             :       continue;
    1928             : 
    1929          59 :     if (BasicBlock *LatchBlock = L->getLoopLatch()) {
    1930          59 :       Instruction *OrigInc = dyn_cast<Instruction>(
    1931             :           OrigPhiRef->getIncomingValueForBlock(LatchBlock));
    1932             :       Instruction *IsomorphicInc =
    1933          59 :           dyn_cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
    1934             : 
    1935          59 :       if (OrigInc && IsomorphicInc) {
    1936             :         // If this phi has the same width but is more canonical, replace the
    1937             :         // original with it. As part of the "more canonical" determination,
    1938             :         // respect a prior decision to use an IV chain.
    1939          57 :         if (OrigPhiRef->getType() == Phi->getType() &&
    1940          33 :             !(ChainedPhis.count(Phi) ||
    1941          90 :               isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)) &&
    1942          61 :             (ChainedPhis.count(Phi) ||
    1943           4 :              isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) {
    1944             :           std::swap(OrigPhiRef, Phi);
    1945             :           std::swap(OrigInc, IsomorphicInc);
    1946             :         }
    1947             :         // Replacing the congruent phi is sufficient because acyclic
    1948             :         // redundancy elimination, CSE/GVN, should handle the
    1949             :         // rest. However, once SCEV proves that a phi is congruent,
    1950             :         // it's often the head of an IV user cycle that is isomorphic
    1951             :         // with the original phi. It's worth eagerly cleaning up the
    1952             :         // common case of a single IV increment so that DeleteDeadPHIs
    1953             :         // can remove cycles that had postinc uses.
    1954             :         const SCEV *TruncExpr =
    1955          57 :             SE.getTruncateOrNoop(SE.getSCEV(OrigInc), IsomorphicInc->getType());
    1956          55 :         if (OrigInc != IsomorphicInc &&
    1957         110 :             TruncExpr == SE.getSCEV(IsomorphicInc) &&
    1958         166 :             SE.LI.replacementPreservesLCSSAForm(IsomorphicInc, OrigInc) &&
    1959          54 :             hoistIVInc(OrigInc, IsomorphicInc)) {
    1960             :           DEBUG_WITH_TYPE(DebugType,
    1961             :                           dbgs() << "INDVARS: Eliminated congruent iv.inc: "
    1962             :                                  << *IsomorphicInc << '\n');
    1963             :           Value *NewInc = OrigInc;
    1964          43 :           if (OrigInc->getType() != IsomorphicInc->getType()) {
    1965             :             Instruction *IP = nullptr;
    1966             :             if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
    1967           1 :               IP = &*PN->getParent()->getFirstInsertionPt();
    1968             :             else
    1969             :               IP = OrigInc->getNextNode();
    1970             : 
    1971          20 :             IRBuilder<> Builder(IP);
    1972          20 :             Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
    1973          40 :             NewInc = Builder.CreateTruncOrBitCast(
    1974             :                 OrigInc, IsomorphicInc->getType(), IVName);
    1975             :           }
    1976          43 :           IsomorphicInc->replaceAllUsesWith(NewInc);
    1977          43 :           DeadInsts.emplace_back(IsomorphicInc);
    1978             :         }
    1979             :       }
    1980             :     }
    1981             :     DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv: "
    1982             :                                       << *Phi << '\n');
    1983          59 :     ++NumElim;
    1984          59 :     Value *NewIV = OrigPhiRef;
    1985          59 :     if (OrigPhiRef->getType() != Phi->getType()) {
    1986          24 :       IRBuilder<> Builder(&*L->getHeader()->getFirstInsertionPt());
    1987          24 :       Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
    1988          48 :       NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
    1989             :     }
    1990          59 :     Phi->replaceAllUsesWith(NewIV);
    1991          59 :     DeadInsts.emplace_back(Phi);
    1992             :   }
    1993       13637 :   return NumElim;
    1994             : }
    1995             : 
    1996           0 : Value *SCEVExpander::getExactExistingExpansion(const SCEV *S,
    1997             :                                                const Instruction *At, Loop *L) {
    1998             :   Optional<ScalarEvolution::ValueOffsetPair> VO =
    1999           0 :       getRelatedExistingExpansion(S, At, L);
    2000           0 :   if (VO && VO.getValue().second == nullptr)
    2001           0 :     return VO.getValue().first;
    2002             :   return nullptr;
    2003             : }
    2004             : 
    2005             : Optional<ScalarEvolution::ValueOffsetPair>
    2006        2602 : SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
    2007             :                                           Loop *L) {
    2008             :   using namespace llvm::PatternMatch;
    2009             : 
    2010             :   SmallVector<BasicBlock *, 4> ExitingBlocks;
    2011        2602 :   L->getExitingBlocks(ExitingBlocks);
    2012             : 
    2013             :   // Look for suitable value in simple conditions at the loop exits.
    2014        5685 :   for (BasicBlock *BB : ExitingBlocks) {
    2015             :     ICmpInst::Predicate Pred;
    2016             :     Instruction *LHS, *RHS;
    2017             :     BasicBlock *TrueBB, *FalseBB;
    2018             : 
    2019        3103 :     if (!match(BB->getTerminator(),
    2020        3103 :                m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
    2021             :                     TrueBB, FalseBB)))
    2022        2386 :       continue;
    2023             : 
    2024         717 :     if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
    2025           1 :       return ScalarEvolution::ValueOffsetPair(LHS, nullptr);
    2026             : 
    2027         716 :     if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
    2028          19 :       return ScalarEvolution::ValueOffsetPair(RHS, nullptr);
    2029             :   }
    2030             : 
    2031             :   // Use expand's logic which is used for reusing a previous Value in
    2032             :   // ExprValueMap.
    2033        2582 :   ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, At);
    2034        2582 :   if (VO.first)
    2035             :     return VO;
    2036             : 
    2037             :   // There is potential to make this significantly smarter, but this simple
    2038             :   // heuristic already gets some interesting cases.
    2039             : 
    2040             :   // Can not find suitable value.
    2041             :   return None;
    2042             : }
    2043             : 
    2044        6235 : bool SCEVExpander::isHighCostExpansionHelper(
    2045             :     const SCEV *S, Loop *L, const Instruction *At,
    2046             :     SmallPtrSetImpl<const SCEV *> &Processed) {
    2047             : 
    2048             :   // If we can find an existing value for this scev available at the point "At"
    2049             :   // then consider the expression cheap.
    2050        8717 :   if (At && getRelatedExistingExpansion(S, At, L))
    2051             :     return false;
    2052             : 
    2053             :   // Zero/One operand expressions
    2054       11796 :   switch (S->getSCEVType()) {
    2055             :   case scUnknown:
    2056             :   case scConstant:
    2057             :     return false;
    2058          17 :   case scTruncate:
    2059          17 :     return isHighCostExpansionHelper(cast<SCEVTruncateExpr>(S)->getOperand(),
    2060          17 :                                      L, At, Processed);
    2061          10 :   case scZeroExtend:
    2062          10 :     return isHighCostExpansionHelper(cast<SCEVZeroExtendExpr>(S)->getOperand(),
    2063          10 :                                      L, At, Processed);
    2064          11 :   case scSignExtend:
    2065          11 :     return isHighCostExpansionHelper(cast<SCEVSignExtendExpr>(S)->getOperand(),
    2066          11 :                                      L, At, Processed);
    2067             :   }
    2068             : 
    2069        2351 :   if (!Processed.insert(S).second)
    2070             :     return false;
    2071             : 
    2072             :   if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) {
    2073             :     // If the divisor is a power of two and the SCEV type fits in a native
    2074             :     // integer, consider the division cheap irrespective of whether it occurs in
    2075             :     // the user code since it can be lowered into a right shift.
    2076         540 :     if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS()))
    2077         537 :       if (SC->getAPInt().isPowerOf2()) {
    2078             :         const DataLayout &DL =
    2079         420 :             L->getHeader()->getParent()->getParent()->getDataLayout();
    2080             :         unsigned Width = cast<IntegerType>(UDivExpr->getType())->getBitWidth();
    2081         420 :         return DL.isIllegalInteger(Width);
    2082             :       }
    2083             : 
    2084             :     // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
    2085             :     // HowManyLessThans produced to compute a precise expression, rather than a
    2086             :     // UDiv from the user's code. If we can't find a UDiv in the code with some
    2087             :     // simple searching, assume the former consider UDivExpr expensive to
    2088             :     // compute.
    2089         120 :     BasicBlock *ExitingBB = L->getExitingBlock();
    2090         120 :     if (!ExitingBB)
    2091             :       return true;
    2092             : 
    2093             :     // At the beginning of this function we already tried to find existing value
    2094             :     // for plain 'S'. Now try to lookup 'S + 1' since it is common pattern
    2095             :     // involving division. This is just a simple search heuristic.
    2096         120 :     if (!At)
    2097             :       At = &ExitingBB->back();
    2098         120 :     if (!getRelatedExistingExpansion(
    2099         240 :             SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), At, L))
    2100             :       return true;
    2101             :   }
    2102             : 
    2103             :   // HowManyLessThans uses a Max expression whenever the loop is not guarded by
    2104             :   // the exit condition.
    2105        1812 :   if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
    2106             :     return true;
    2107             : 
    2108             :   // Recurse past nary expressions, which commonly occur in the
    2109             :   // BackedgeTakenCount. They may already exist in program code, and if not,
    2110             :   // they are not too expensive rematerialize.
    2111             :   if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S)) {
    2112        4924 :     for (auto *Op : NAry->operands())
    2113        3540 :       if (isHighCostExpansionHelper(Op, L, At, Processed))
    2114             :         return true;
    2115             :   }
    2116             : 
    2117             :   // If we haven't recognized an expensive SCEV pattern, assume it's an
    2118             :   // expression produced by program code.
    2119             :   return false;
    2120             : }
    2121             : 
    2122        1040 : Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred,
    2123             :                                             Instruction *IP) {
    2124             :   assert(IP);
    2125        1040 :   switch (Pred->getKind()) {
    2126             :   case SCEVPredicate::P_Union:
    2127         948 :     return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP);
    2128             :   case SCEVPredicate::P_Equal:
    2129          17 :     return expandEqualPredicate(cast<SCEVEqualPredicate>(Pred), IP);
    2130             :   case SCEVPredicate::P_Wrap: {
    2131             :     auto *AddRecPred = cast<SCEVWrapPredicate>(Pred);
    2132          75 :     return expandWrapPredicate(AddRecPred, IP);
    2133             :   }
    2134             :   }
    2135           0 :   llvm_unreachable("Unknown SCEV predicate type");
    2136             : }
    2137             : 
    2138          17 : Value *SCEVExpander::expandEqualPredicate(const SCEVEqualPredicate *Pred,
    2139             :                                           Instruction *IP) {
    2140          17 :   Value *Expr0 = expandCodeFor(Pred->getLHS(), Pred->getLHS()->getType(), IP);
    2141          17 :   Value *Expr1 = expandCodeFor(Pred->getRHS(), Pred->getRHS()->getType(), IP);
    2142             : 
    2143          17 :   Builder.SetInsertPoint(IP);
    2144          17 :   auto *I = Builder.CreateICmpNE(Expr0, Expr1, "ident.check");
    2145          17 :   return I;
    2146             : }
    2147             : 
    2148          75 : Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
    2149             :                                            Instruction *Loc, bool Signed) {
    2150             :   assert(AR->isAffine() && "Cannot generate RT check for "
    2151             :                            "non-affine expression");
    2152             : 
    2153          75 :   SCEVUnionPredicate Pred;
    2154             :   const SCEV *ExitCount =
    2155          75 :       SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred);
    2156             : 
    2157             :   assert(ExitCount != SE.getCouldNotCompute() && "Invalid loop count");
    2158             : 
    2159          75 :   const SCEV *Step = AR->getStepRecurrence(SE);
    2160          75 :   const SCEV *Start = AR->getStart();
    2161             : 
    2162             :   Type *ARTy = AR->getType();
    2163          75 :   unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType());
    2164          75 :   unsigned DstBits = SE.getTypeSizeInBits(ARTy);
    2165             : 
    2166             :   // The expression {Start,+,Step} has nusw/nssw if
    2167             :   //   Step < 0, Start - |Step| * Backedge <= Start
    2168             :   //   Step >= 0, Start + |Step| * Backedge > Start
    2169             :   // and |Step| * Backedge doesn't unsigned overflow.
    2170             : 
    2171          75 :   IntegerType *CountTy = IntegerType::get(Loc->getContext(), SrcBits);
    2172          75 :   Builder.SetInsertPoint(Loc);
    2173          75 :   Value *TripCountVal = expandCodeFor(ExitCount, CountTy, Loc);
    2174             : 
    2175             :   IntegerType *Ty =
    2176          75 :       IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy));
    2177          75 :   Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty;
    2178             : 
    2179          75 :   Value *StepValue = expandCodeFor(Step, Ty, Loc);
    2180          75 :   Value *NegStepValue = expandCodeFor(SE.getNegativeSCEV(Step), Ty, Loc);
    2181          75 :   Value *StartValue = expandCodeFor(Start, ARExpandTy, Loc);
    2182             : 
    2183             :   ConstantInt *Zero =
    2184          75 :       ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits));
    2185             : 
    2186          75 :   Builder.SetInsertPoint(Loc);
    2187             :   // Compute |Step|
    2188         150 :   Value *StepCompare = Builder.CreateICmp(ICmpInst::ICMP_SLT, StepValue, Zero);
    2189          75 :   Value *AbsStep = Builder.CreateSelect(StepCompare, NegStepValue, StepValue);
    2190             : 
    2191             :   // Get the backedge taken count and truncate or extended to the AR type.
    2192          75 :   Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
    2193          75 :   auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
    2194         150 :                                          Intrinsic::umul_with_overflow, Ty);
    2195             : 
    2196             :   // Compute |Step| * Backedge
    2197          75 :   CallInst *Mul = Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
    2198         150 :   Value *MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
    2199         150 :   Value *OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
    2200             : 
    2201             :   // Compute:
    2202             :   //   Start + |Step| * Backedge < Start
    2203             :   //   Start - |Step| * Backedge > Start
    2204             :   Value *Add = nullptr, *Sub = nullptr;
    2205             :   if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARExpandTy)) {
    2206           2 :     const SCEV *MulS = SE.getSCEV(MulV);
    2207           2 :     const SCEV *NegMulS = SE.getNegativeSCEV(MulS);
    2208           4 :     Add = Builder.CreateBitCast(expandAddToGEP(MulS, ARPtrTy, Ty, StartValue),
    2209             :                                 ARPtrTy);
    2210           4 :     Sub = Builder.CreateBitCast(
    2211             :         expandAddToGEP(NegMulS, ARPtrTy, Ty, StartValue), ARPtrTy);
    2212             :   } else {
    2213          73 :     Add = Builder.CreateAdd(StartValue, MulV);
    2214          73 :     Sub = Builder.CreateSub(StartValue, MulV);
    2215             :   }
    2216             : 
    2217         116 :   Value *EndCompareGT = Builder.CreateICmp(
    2218             :       Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
    2219             : 
    2220         116 :   Value *EndCompareLT = Builder.CreateICmp(
    2221             :       Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue);
    2222             : 
    2223             :   // Select the answer based on the sign of Step.
    2224             :   Value *EndCheck =
    2225          75 :       Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
    2226             : 
    2227             :   // If the backedge taken count type is larger than the AR type,
    2228             :   // check that we don't drop any bits by truncating it. If we are
    2229             :   // dropping bits, then we have overflow (unless the step is zero).
    2230          75 :   if (SE.getTypeSizeInBits(CountTy) > SE.getTypeSizeInBits(Ty)) {
    2231          98 :     auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits);
    2232             :     auto *BackedgeCheck =
    2233          49 :         Builder.CreateICmp(ICmpInst::ICMP_UGT, TripCountVal,
    2234          49 :                            ConstantInt::get(Loc->getContext(), MaxVal));
    2235          49 :     BackedgeCheck = Builder.CreateAnd(
    2236             :         BackedgeCheck, Builder.CreateICmp(ICmpInst::ICMP_NE, StepValue, Zero));
    2237             : 
    2238          49 :     EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck);
    2239             :   }
    2240             : 
    2241          75 :   EndCheck = Builder.CreateOr(EndCheck, OfMul);
    2242          75 :   return EndCheck;
    2243             : }
    2244             : 
    2245          75 : Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred,
    2246             :                                          Instruction *IP) {
    2247          75 :   const auto *A = cast<SCEVAddRecExpr>(Pred->getExpr());
    2248             :   Value *NSSWCheck = nullptr, *NUSWCheck = nullptr;
    2249             : 
    2250             :   // Add a check for NUSW
    2251          75 :   if (Pred->getFlags() & SCEVWrapPredicate::IncrementNUSW)
    2252          41 :     NUSWCheck = generateOverflowCheck(A, IP, false);
    2253             : 
    2254             :   // Add a check for NSSW
    2255          75 :   if (Pred->getFlags() & SCEVWrapPredicate::IncrementNSSW)
    2256          34 :     NSSWCheck = generateOverflowCheck(A, IP, true);
    2257             : 
    2258          75 :   if (NUSWCheck && NSSWCheck)
    2259           0 :     return Builder.CreateOr(NUSWCheck, NSSWCheck);
    2260             : 
    2261          75 :   if (NUSWCheck)
    2262             :     return NUSWCheck;
    2263             : 
    2264          34 :   if (NSSWCheck)
    2265             :     return NSSWCheck;
    2266             : 
    2267           0 :   return ConstantInt::getFalse(IP->getContext());
    2268             : }
    2269             : 
    2270         948 : Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union,
    2271             :                                           Instruction *IP) {
    2272         948 :   auto *BoolType = IntegerType::get(IP->getContext(), 1);
    2273         948 :   Value *Check = ConstantInt::getNullValue(BoolType);
    2274             : 
    2275             :   // Loop over all checks in this set.
    2276        1040 :   for (auto Pred : Union->getPredicates()) {
    2277          92 :     auto *NextCheck = expandCodeForPredicate(Pred, IP);
    2278          92 :     Builder.SetInsertPoint(IP);
    2279         184 :     Check = Builder.CreateOr(Check, NextCheck);
    2280             :   }
    2281             : 
    2282         948 :   return Check;
    2283             : }
    2284             : 
    2285             : namespace {
    2286             : // Search for a SCEV subexpression that is not safe to expand.  Any expression
    2287             : // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
    2288             : // UDiv expressions. We don't know if the UDiv is derived from an IR divide
    2289             : // instruction, but the important thing is that we prove the denominator is
    2290             : // nonzero before expansion.
    2291             : //
    2292             : // IVUsers already checks that IV-derived expressions are safe. So this check is
    2293             : // only needed when the expression includes some subexpression that is not IV
    2294             : // derived.
    2295             : //
    2296             : // Currently, we only allow division by a nonzero constant here. If this is
    2297             : // inadequate, we could easily allow division by SCEVUnknown by using
    2298             : // ValueTracking to check isKnownNonZero().
    2299             : //
    2300             : // We cannot generally expand recurrences unless the step dominates the loop
    2301             : // header. The expander handles the special case of affine recurrences by
    2302             : // scaling the recurrence outside the loop, but this technique isn't generally
    2303             : // applicable. Expanding a nested recurrence outside a loop requires computing
    2304             : // binomial coefficients. This could be done, but the recurrence has to be in a
    2305             : // perfectly reduced form, which can't be guaranteed.
    2306             : struct SCEVFindUnsafe {
    2307             :   ScalarEvolution &SE;
    2308             :   bool IsUnsafe;
    2309             : 
    2310       14375 :   SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {}
    2311             : 
    2312           0 :   bool follow(const SCEV *S) {
    2313             :     if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
    2314           0 :       const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
    2315           0 :       if (!SC || SC->getValue()->isZero()) {
    2316           0 :         IsUnsafe = true;
    2317           0 :         return false;
    2318             :       }
    2319             :     }
    2320             :     if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
    2321           0 :       const SCEV *Step = AR->getStepRecurrence(SE);
    2322           0 :       if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) {
    2323           0 :         IsUnsafe = true;
    2324           0 :         return false;
    2325             :       }
    2326             :     }
    2327             :     return true;
    2328             :   }
    2329           0 :   bool isDone() const { return IsUnsafe; }
    2330             : };
    2331             : }
    2332             : 
    2333             : namespace llvm {
    2334       14375 : bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) {
    2335             :   SCEVFindUnsafe Search(SE);
    2336       14375 :   visitAll(S, Search);
    2337       14375 :   return !Search.IsUnsafe;
    2338             : }
    2339             : 
    2340         189 : bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint,
    2341             :                       ScalarEvolution &SE) {
    2342         189 :   return isSafeToExpand(S, SE) && SE.dominates(S, InsertionPoint->getParent());
    2343             : }
    2344             : }

Generated by: LCOV version 1.13