LCOV - code coverage report
Current view: top level - lib/Analysis - ScalarEvolutionExpander.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 856 910 94.1 %
Date: 2018-07-13 00:08:38 Functions: 55 56 98.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file contains the implementation of the scalar evolution expander,
      11             : // which is used to generate the code corresponding to a given scalar evolution
      12             : // expression.
      13             : //
      14             : //===----------------------------------------------------------------------===//
      15             : 
      16             : #include "llvm/Analysis/ScalarEvolutionExpander.h"
      17             : #include "llvm/ADT/STLExtras.h"
      18             : #include "llvm/ADT/SmallSet.h"
      19             : #include "llvm/Analysis/InstructionSimplify.h"
      20             : #include "llvm/Analysis/LoopInfo.h"
      21             : #include "llvm/Analysis/TargetTransformInfo.h"
      22             : #include "llvm/IR/DataLayout.h"
      23             : #include "llvm/IR/Dominators.h"
      24             : #include "llvm/IR/IntrinsicInst.h"
      25             : #include "llvm/IR/LLVMContext.h"
      26             : #include "llvm/IR/Module.h"
      27             : #include "llvm/IR/PatternMatch.h"
      28             : #include "llvm/Support/Debug.h"
      29             : #include "llvm/Support/raw_ostream.h"
      30             : 
      31             : using namespace llvm;
      32             : using namespace PatternMatch;
      33             : 
      34             : /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
      35             : /// reusing an existing cast if a suitable one exists, moving an existing
      36             : /// cast if a suitable one exists but isn't in the right place, or
      37             : /// creating a new one.
      38        7746 : Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
      39             :                                        Instruction::CastOps Op,
      40             :                                        BasicBlock::iterator IP) {
      41             :   // This function must be called with the builder having a valid insertion
      42             :   // point. It doesn't need to be the actual IP where the uses of the returned
      43             :   // cast will be added, but it must dominate such IP.
      44             :   // We use this precondition to produce a cast that will dominate all its
      45             :   // uses. In particular, this is crucial for the case where the builder's
      46             :   // insertion point *is* the point where we were asked to put the cast.
      47             :   // Since we don't know the builder's insertion point is actually
      48             :   // where the uses will be added (only that it dominates it), we are
      49             :   // not allowed to move it.
      50             :   BasicBlock::iterator BIP = Builder.GetInsertPoint();
      51             : 
      52             :   Instruction *Ret = nullptr;
      53             : 
      54             :   // Check to see if there is already a cast!
      55       14966 :   for (User *U : V->users())
      56        9200 :     if (U->getType() == Ty)
      57             :       if (CastInst *CI = dyn_cast<CastInst>(U))
      58        1980 :         if (CI->getOpcode() == Op) {
      59             :           // If the cast isn't where we want it, create a new cast at IP.
      60             :           // Likewise, do not reuse a cast at BIP because it must dominate
      61             :           // instructions that might be inserted before BIP.
      62        1980 :           if (BasicBlock::iterator(CI) != IP || BIP == IP) {
      63             :             // Create a new cast, and leave the old cast in place in case
      64             :             // it is being used as an insert point. Clear its operand
      65             :             // so that it doesn't hold anything live.
      66         294 :             Ret = CastInst::Create(Op, V, Ty, "", &*IP);
      67         294 :             Ret->takeName(CI);
      68         294 :             CI->replaceAllUsesWith(Ret);
      69         294 :             CI->setOperand(0, UndefValue::get(V->getType()));
      70             :             break;
      71             :           }
      72             :           Ret = CI;
      73             :           break;
      74             :         }
      75             : 
      76             :   // Create a new cast.
      77        7746 :   if (!Ret)
      78       11532 :     Ret = CastInst::Create(Op, V, Ty, V->getName(), &*IP);
      79             : 
      80             :   // We assert at the end of the function since IP might point to an
      81             :   // instruction with different dominance properties than a cast
      82             :   // (an invoke for example) and not dominate BIP (but the cast does).
      83             :   assert(SE.DT.dominates(Ret, &*BIP));
      84             : 
      85        7746 :   rememberInstruction(Ret);
      86        7746 :   return Ret;
      87             : }
      88             : 
      89        6783 : static BasicBlock::iterator findInsertPointAfter(Instruction *I,
      90             :                                                  BasicBlock *MustDominate) {
      91        6783 :   BasicBlock::iterator IP = ++I->getIterator();
      92             :   if (auto *II = dyn_cast<InvokeInst>(I))
      93             :     IP = II->getNormalDest()->begin();
      94             : 
      95       11255 :   while (isa<PHINode>(IP))
      96             :     ++IP;
      97             : 
      98        6783 :   if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) {
      99             :     ++IP;
     100        6780 :   } else if (isa<CatchSwitchInst>(IP)) {
     101             :     IP = MustDominate->getFirstInsertionPt();
     102             :   } else {
     103             :     assert(!IP->isEHPad() && "unexpected eh pad!");
     104             :   }
     105             : 
     106        6783 :   return IP;
     107             : }
     108             : 
     109             : /// InsertNoopCastOfTo - Insert a cast of V to the specified type,
     110             : /// which must be possible with a noop cast, doing what we can to share
     111             : /// the casts.
     112       71614 : Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
     113       71614 :   Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
     114             :   assert((Op == Instruction::BitCast ||
     115             :           Op == Instruction::PtrToInt ||
     116             :           Op == Instruction::IntToPtr) &&
     117             :          "InsertNoopCastOfTo cannot perform non-noop casts!");
     118             :   assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
     119             :          "InsertNoopCastOfTo cannot change sizes!");
     120             : 
     121             :   // Short-circuit unnecessary bitcasts.
     122       71614 :   if (Op == Instruction::BitCast) {
     123       70849 :     if (V->getType() == Ty)
     124             :       return V;
     125             :     if (CastInst *CI = dyn_cast<CastInst>(V)) {
     126          80 :       if (CI->getOperand(0)->getType() == Ty)
     127             :         return CI->getOperand(0);
     128             :     }
     129             :   }
     130             :   // Short-circuit unnecessary inttoptr<->ptrtoint casts.
     131        9224 :   if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
     132         765 :       SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
     133             :     if (CastInst *CI = dyn_cast<CastInst>(V))
     134         142 :       if ((CI->getOpcode() == Instruction::PtrToInt ||
     135         287 :            CI->getOpcode() == Instruction::IntToPtr) &&
     136         143 :           SE.getTypeSizeInBits(CI->getType()) ==
     137         286 :           SE.getTypeSizeInBits(CI->getOperand(0)->getType()))
     138             :         return CI->getOperand(0);
     139             :     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
     140          10 :       if ((CE->getOpcode() == Instruction::PtrToInt ||
     141          10 :            CE->getOpcode() == Instruction::IntToPtr) &&
     142           0 :           SE.getTypeSizeInBits(CE->getType()) ==
     143           0 :           SE.getTypeSizeInBits(CE->getOperand(0)->getType()))
     144             :         return CE->getOperand(0);
     145             :   }
     146             : 
     147             :   // Fold a cast of a constant.
     148             :   if (Constant *C = dyn_cast<Constant>(V))
     149         570 :     return ConstantExpr::getCast(Op, C, Ty);
     150             : 
     151             :   // Cast the argument at the beginning of the entry block, after
     152             :   // any bitcasts of other arguments.
     153             :   if (Argument *A = dyn_cast<Argument>(V)) {
     154        1946 :     BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
     155        1043 :     while ((isa<BitCastInst>(IP) &&
     156        1042 :             isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
     157        4652 :             cast<BitCastInst>(IP)->getOperand(0) != A) ||
     158             :            isa<DbgInfoIntrinsic>(IP))
     159             :       ++IP;
     160         973 :     return ReuseOrCreateCast(A, Ty, Op, IP);
     161             :   }
     162             : 
     163             :   // Cast the instruction immediately after the instruction.
     164             :   Instruction *I = cast<Instruction>(V);
     165        6773 :   BasicBlock::iterator IP = findInsertPointAfter(I, Builder.GetInsertBlock());
     166        6773 :   return ReuseOrCreateCast(I, Ty, Op, IP);
     167             : }
     168             : 
     169             : /// InsertBinop - Insert the specified binary operator, doing a small amount
     170             : /// of work to avoid inserting an obviously redundant operation.
     171        6702 : Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
     172             :                                  Value *LHS, Value *RHS) {
     173             :   // Fold a binop with constant operands.
     174             :   if (Constant *CLHS = dyn_cast<Constant>(LHS))
     175             :     if (Constant *CRHS = dyn_cast<Constant>(RHS))
     176         220 :       return ConstantExpr::get(Opcode, CLHS, CRHS);
     177             : 
     178             :   // Do a quick scan to see if we have this binop nearby.  If so, reuse it.
     179             :   unsigned ScanLimit = 6;
     180        6482 :   BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
     181             :   // Scanning starts from the last instruction before the insertion point.
     182        6482 :   BasicBlock::iterator IP = Builder.GetInsertPoint();
     183        6482 :   if (IP != BlockBegin) {
     184             :     --IP;
     185       47340 :     for (; ScanLimit; --IP, --ScanLimit) {
     186             :       // Don't count dbg.value against the ScanLimit, to avoid perturbing the
     187             :       // generated code.
     188       24292 :       if (isa<DbgInfoIntrinsic>(IP))
     189         377 :         ScanLimit++;
     190             : 
     191             :       // Conservatively, do not use any instruction which has any of wrap/exact
     192             :       // flags installed.
     193             :       // TODO: Instead of simply disable poison instructions we can be clever
     194             :       //       here and match SCEV to this instruction.
     195         542 :       auto canGeneratePoison = [](Instruction *I) {
     196         518 :         if (isa<OverflowingBinaryOperator>(I) &&
     197         791 :             (I->hasNoSignedWrap() || I->hasNoUnsignedWrap()))
     198             :           return true;
     199          24 :         if (isa<PossiblyExactOperator>(I) && I->isExact())
     200             :           return true;
     201             :         return false;
     202             :       };
     203       13711 :       if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
     204       24834 :           IP->getOperand(1) == RHS && !canGeneratePoison(&*IP))
     205             :         return &*IP;
     206       24075 :       if (IP == BlockBegin) break;
     207             :     }
     208             :   }
     209             : 
     210             :   // Save the original insertion point so we can restore it when we're done.
     211             :   DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
     212       12530 :   SCEVInsertPointGuard Guard(Builder, this);
     213             : 
     214             :   // Move the insertion point out of as many loops as we can.
     215        8677 :   while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
     216        2367 :     if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
     217          60 :     BasicBlock *Preheader = L->getLoopPreheader();
     218          60 :     if (!Preheader) break;
     219             : 
     220             :     // Ok, move up a level.
     221          45 :     Builder.SetInsertPoint(Preheader->getTerminator());
     222          45 :   }
     223             : 
     224             :   // If we haven't found this binop, insert it.
     225       12530 :   Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS));
     226        6265 :   BO->setDebugLoc(Loc);
     227        6265 :   rememberInstruction(BO);
     228             : 
     229             :   return BO;
     230             : }
     231             : 
     232             : /// FactorOutConstant - Test if S is divisible by Factor, using signed
     233             : /// division. If so, update S with Factor divided out and return true.
     234             : /// S need not be evenly divisible if a reasonable remainder can be
     235             : /// computed.
     236             : /// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
     237             : /// unnecessary; in its place, just signed-divide Ops[i] by the scale and
     238             : /// check to see if the divide was folded.
     239       13789 : static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,
     240             :                               const SCEV *Factor, ScalarEvolution &SE,
     241             :                               const DataLayout &DL) {
     242             :   // Everything is divisible by one.
     243       13789 :   if (Factor->isOne())
     244             :     return true;
     245             : 
     246             :   // x/x == 1.
     247       11426 :   if (S == Factor) {
     248        1174 :     S = SE.getConstant(S->getType(), 1);
     249        1174 :     return true;
     250             :   }
     251             : 
     252             :   // For a Constant, check for a multiple of the given factor.
     253             :   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
     254             :     // 0/x == 0.
     255        5325 :     if (C->isZero())
     256             :       return true;
     257             :     // Check for divisibility.
     258             :     if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) {
     259             :       ConstantInt *CI =
     260       15627 :           ConstantInt::get(SE.getContext(), C->getAPInt().sdiv(FC->getAPInt()));
     261             :       // If the quotient is zero and the remainder is non-zero, reject
     262             :       // the value at this scale. It will be considered for subsequent
     263             :       // smaller scales.
     264        5209 :       if (!CI->isZero()) {
     265        3896 :         const SCEV *Div = SE.getConstant(CI);
     266        3896 :         S = Div;
     267        3896 :         Remainder = SE.getAddExpr(
     268        7792 :             Remainder, SE.getConstant(C->getAPInt().srem(FC->getAPInt())));
     269        3896 :         return true;
     270             :       }
     271             :     }
     272             :   }
     273             : 
     274             :   // In a Mul, check if there is a constant operand which is a multiple
     275             :   // of the given factor.
     276        6240 :   if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
     277             :     // Size is known, check if there is a constant operand which is a multiple
     278             :     // of the given factor. If so, we can factor it.
     279             :     const SCEVConstant *FC = cast<SCEVConstant>(Factor);
     280        3454 :     if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
     281        6900 :       if (!C->getAPInt().srem(FC->getAPInt())) {
     282        2452 :         SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
     283        7356 :         NewMulOps[0] = SE.getConstant(C->getAPInt().sdiv(FC->getAPInt()));
     284        2452 :         S = SE.getMulExpr(NewMulOps);
     285             :         return true;
     286             :       }
     287             :   }
     288             : 
     289             :   // In an AddRec, check if both start and step are divisible.
     290        3788 :   if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
     291          87 :     const SCEV *Step = A->getStepRecurrence(SE);
     292          87 :     const SCEV *StepRem = SE.getConstant(Step->getType(), 0);
     293          87 :     if (!FactorOutConstant(Step, StepRem, Factor, SE, DL))
     294             :       return false;
     295          61 :     if (!StepRem->isZero())
     296             :       return false;
     297         122 :     const SCEV *Start = A->getStart();
     298          61 :     if (!FactorOutConstant(Start, Remainder, Factor, SE, DL))
     299             :       return false;
     300         122 :     S = SE.getAddRecExpr(Start, Step, A->getLoop(),
     301             :                          A->getNoWrapFlags(SCEV::FlagNW));
     302          61 :     return true;
     303             :   }
     304             : 
     305             :   return false;
     306             : }
     307             : 
     308             : /// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs
     309             : /// is the number of SCEVAddRecExprs present, which are kept at the end of
     310             : /// the list.
     311             : ///
     312        9491 : static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
     313             :                                 Type *Ty,
     314             :                                 ScalarEvolution &SE) {
     315             :   unsigned NumAddRecs = 0;
     316       10453 :   for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i)
     317          68 :     ++NumAddRecs;
     318             :   // Group Ops into non-addrecs and addrecs.
     319        9491 :   SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs);
     320        9491 :   SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end());
     321             :   // Let ScalarEvolution sort and simplify the non-addrecs list.
     322        9491 :   const SCEV *Sum = NoAddRecs.empty() ?
     323             :                     SE.getConstant(Ty, 0) :
     324        9491 :                     SE.getAddExpr(NoAddRecs);
     325             :   // If it returned an add, use the operands. Otherwise it simplified
     326             :   // the sum into a single value, so just use that.
     327             :   Ops.clear();
     328             :   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum))
     329          12 :     Ops.append(Add->op_begin(), Add->op_end());
     330        9485 :   else if (!Sum->isZero())
     331         373 :     Ops.push_back(Sum);
     332             :   // Then append the addrecs.
     333        9491 :   Ops.append(AddRecs.begin(), AddRecs.end());
     334        9491 : }
     335             : 
     336             : /// SplitAddRecs - Flatten a list of add operands, moving addrec start values
     337             : /// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}.
     338             : /// This helps expose more opportunities for folding parts of the expressions
     339             : /// into GEP indices.
     340             : ///
     341       11133 : static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
     342             :                          Type *Ty,
     343             :                          ScalarEvolution &SE) {
     344             :   // Find the addrecs.
     345             :   SmallVector<const SCEV *, 8> AddRecs;
     346       23001 :   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     347       23854 :     while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) {
     348          69 :       const SCEV *Start = A->getStart();
     349          69 :       if (Start->isZero()) break;
     350          59 :       const SCEV *Zero = SE.getConstant(Ty, 0);
     351         118 :       AddRecs.push_back(SE.getAddRecExpr(Zero,
     352             :                                          A->getStepRecurrence(SE),
     353             :                                          A->getLoop(),
     354             :                                          A->getNoWrapFlags(SCEV::FlagNW)));
     355             :       if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) {
     356           4 :         Ops[i] = Zero;
     357           8 :         Ops.append(Add->op_begin(), Add->op_end());
     358           4 :         e += Add->getNumOperands();
     359             :       } else {
     360          55 :         Ops[i] = Start;
     361             :       }
     362             :     }
     363       11133 :   if (!AddRecs.empty()) {
     364             :     // Add the addrecs onto the end of the list.
     365          59 :     Ops.append(AddRecs.begin(), AddRecs.end());
     366             :     // Resort the operand list, moving any constants to the front.
     367          59 :     SimplifyAddOperands(Ops, Ty, SE);
     368             :   }
     369       11133 : }
     370             : 
     371             : /// expandAddToGEP - Expand an addition expression with a pointer type into
     372             : /// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
     373             : /// BasicAliasAnalysis and other passes analyze the result. See the rules
     374             : /// for getelementptr vs. inttoptr in
     375             : /// http://llvm.org/docs/LangRef.html#pointeraliasing
     376             : /// for details.
     377             : ///
     378             : /// Design note: The correctness of using getelementptr here depends on
     379             : /// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
     380             : /// they may introduce pointer arithmetic which may not be safely converted
     381             : /// into getelementptr.
     382             : ///
     383             : /// Design note: It might seem desirable for this function to be more
     384             : /// loop-aware. If some of the indices are loop-invariant while others
     385             : /// aren't, it might seem desirable to emit multiple GEPs, keeping the
     386             : /// loop-invariant portions of the overall computation outside the loop.
     387             : /// However, there are a few reasons this is not done here. Hoisting simple
     388             : /// arithmetic is a low-level optimization that often isn't very
     389             : /// important until late in the optimization process. In fact, passes
     390             : /// like InstructionCombining will combine GEPs, even if it means
     391             : /// pushing loop-invariant computation down into loops, so even if the
     392             : /// GEPs were split here, the work would quickly be undone. The
     393             : /// LoopStrengthReduction pass, which is usually run quite late (and
     394             : /// after the last InstructionCombining pass), takes care of hoisting
     395             : /// loop-invariant portions of expressions, after considering what
     396             : /// can be folded using target addressing modes.
     397             : ///
     398       11133 : Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     399             :                                     const SCEV *const *op_end,
     400             :                                     PointerType *PTy,
     401             :                                     Type *Ty,
     402             :                                     Value *V) {
     403       11133 :   Type *OriginalElTy = PTy->getElementType();
     404             :   Type *ElTy = OriginalElTy;
     405             :   SmallVector<Value *, 4> GepIndices;
     406             :   SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
     407             :   bool AnyNonZeroIndices = false;
     408             : 
     409             :   // Split AddRecs up into parts as either of the parts may be usable
     410             :   // without the other.
     411       11133 :   SplitAddRecs(Ops, Ty, SE);
     412             : 
     413       11133 :   Type *IntPtrTy = DL.getIntPtrType(PTy);
     414             : 
     415             :   // Descend down the pointer's type and attempt to convert the other
     416             :   // operands into GEP indices, at each level. The first index in a GEP
     417             :   // indexes into the array implied by the pointer operand; the rest of
     418             :   // the indices index into the element or field type selected by the
     419             :   // preceding index.
     420             :   for (;;) {
     421             :     // If the scale size is not 0, attempt to factor out a scale for
     422             :     // array indexing.
     423             :     SmallVector<const SCEV *, 8> ScaledOps;
     424       12859 :     if (ElTy->isSized()) {
     425       12859 :       const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy);
     426       12859 :       if (!ElSize->isZero()) {
     427             :         SmallVector<const SCEV *, 8> NewOps;
     428       40121 :         for (const SCEV *Op : Ops) {
     429       13641 :           const SCEV *Remainder = SE.getConstant(Ty, 0);
     430       13641 :           if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) {
     431             :             // Op now has ElSize factored out.
     432        9940 :             ScaledOps.push_back(Op);
     433        9940 :             if (!Remainder->isZero())
     434          85 :               NewOps.push_back(Remainder);
     435             :             AnyNonZeroIndices = true;
     436             :           } else {
     437             :             // The operand was not divisible, so add it to the list of operands
     438             :             // we'll scan next iteration.
     439        3701 :             NewOps.push_back(Op);
     440             :           }
     441             :         }
     442             :         // If we made any changes, update Ops.
     443       12839 :         if (!ScaledOps.empty()) {
     444             :           Ops = NewOps;
     445        9432 :           SimplifyAddOperands(Ops, Ty, SE);
     446             :         }
     447             :       }
     448             :     }
     449             : 
     450             :     // Record the scaled array index for this level of the type. If
     451             :     // we didn't find any operands that could be factored, tentatively
     452             :     // assume that element zero was selected (since the zero offset
     453             :     // would obviously be folded away).
     454       22291 :     Value *Scaled = ScaledOps.empty() ?
     455             :                     Constant::getNullValue(Ty) :
     456       22291 :                     expandCodeFor(SE.getAddExpr(ScaledOps), Ty);
     457       12859 :     GepIndices.push_back(Scaled);
     458             : 
     459             :     // Collect struct field index operands.
     460             :     while (StructType *STy = dyn_cast<StructType>(ElTy)) {
     461             :       bool FoundFieldNo = false;
     462             :       // An empty struct has no fields.
     463        2175 :       if (STy->getNumElements() == 0) break;
     464             :       // Field offsets are known. See if a constant offset falls within any of
     465             :       // the struct fields.
     466        2173 :       if (Ops.empty())
     467             :         break;
     468        1652 :       if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
     469        1520 :         if (SE.getTypeSizeInBits(C->getType()) <= 64) {
     470         760 :           const StructLayout &SL = *DL.getStructLayout(STy);
     471         760 :           uint64_t FullOffset = C->getValue()->getZExtValue();
     472         760 :           if (FullOffset < SL.getSizeInBytes()) {
     473         652 :             unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
     474         652 :             GepIndices.push_back(
     475        1304 :                 ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
     476         652 :             ElTy = STy->getTypeAtIndex(ElIdx);
     477         652 :             Ops[0] =
     478        1956 :                 SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
     479             :             AnyNonZeroIndices = true;
     480             :             FoundFieldNo = true;
     481             :           }
     482             :         }
     483             :       // If no struct field offsets were found, tentatively assume that
     484             :       // field zero was selected (since the zero offset would obviously
     485             :       // be folded away).
     486        1652 :       if (!FoundFieldNo) {
     487        1000 :         ElTy = STy->getTypeAtIndex(0u);
     488        1000 :         GepIndices.push_back(
     489        2000 :           Constant::getNullValue(Type::getInt32Ty(Ty->getContext())));
     490             :       }
     491             :     }
     492             : 
     493             :     if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
     494        1726 :       ElTy = ATy->getElementType();
     495             :     else
     496             :       break;
     497             :   }
     498             : 
     499             :   // If none of the operands were convertible to proper GEP indices, cast
     500             :   // the base to i8* and do an ugly getelementptr with that. It's still
     501             :   // better than ptrtoint+arithmetic+inttoptr at least.
     502       11133 :   if (!AnyNonZeroIndices) {
     503             :     // Cast the base to i8*.
     504        1557 :     V = InsertNoopCastOfTo(V,
     505        1557 :        Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
     506             : 
     507             :     assert(!isa<Instruction>(V) ||
     508             :            SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
     509             : 
     510             :     // Expand the operands for a plain byte offset.
     511        1557 :     Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty);
     512             : 
     513             :     // Fold a GEP with constant operands.
     514             :     if (Constant *CLHS = dyn_cast<Constant>(V))
     515             :       if (Constant *CRHS = dyn_cast<Constant>(Idx))
     516          16 :         return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ty->getContext()),
     517           8 :                                               CLHS, CRHS);
     518             : 
     519             :     // Do a quick scan to see if we have this GEP nearby.  If so, reuse it.
     520             :     unsigned ScanLimit = 6;
     521        1549 :     BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
     522             :     // Scanning starts from the last instruction before the insertion point.
     523        1549 :     BasicBlock::iterator IP = Builder.GetInsertPoint();
     524        1549 :     if (IP != BlockBegin) {
     525             :       --IP;
     526       17443 :       for (; ScanLimit; --IP, --ScanLimit) {
     527             :         // Don't count dbg.value against the ScanLimit, to avoid perturbing the
     528             :         // generated code.
     529        8734 :         if (isa<DbgInfoIntrinsic>(IP))
     530        1219 :           ScanLimit++;
     531        2471 :         if (IP->getOpcode() == Instruction::GetElementPtr &&
     532        8940 :             IP->getOperand(0) == V && IP->getOperand(1) == Idx)
     533             :           return &*IP;
     534        8628 :         if (IP == BlockBegin) break;
     535             :       }
     536             :     }
     537             : 
     538             :     // Save the original insertion point so we can restore it when we're done.
     539        2886 :     SCEVInsertPointGuard Guard(Builder, this);
     540             : 
     541             :     // Move the insertion point out of as many loops as we can.
     542        2634 :     while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
     543        1179 :       if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
     544          12 :       BasicBlock *Preheader = L->getLoopPreheader();
     545          12 :       if (!Preheader) break;
     546             : 
     547             :       // Ok, move up a level.
     548          12 :       Builder.SetInsertPoint(Preheader->getTerminator());
     549          12 :     }
     550             : 
     551             :     // Emit a GEP.
     552        4329 :     Value *GEP = Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep");
     553        1443 :     rememberInstruction(GEP);
     554             : 
     555             :     return GEP;
     556             :   }
     557             : 
     558             :   {
     559       19152 :     SCEVInsertPointGuard Guard(Builder, this);
     560             : 
     561             :     // Move the insertion point out of as many loops as we can.
     562       17454 :     while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
     563        7686 :       if (!L->isLoopInvariant(V)) break;
     564             : 
     565             :       bool AnyIndexNotLoopInvariant = any_of(
     566        3379 :           GepIndices, [L](Value *Op) { return !L->isLoopInvariant(Op); });
     567             : 
     568        2761 :       if (AnyIndexNotLoopInvariant)
     569             :         break;
     570             : 
     571         192 :       BasicBlock *Preheader = L->getLoopPreheader();
     572         192 :       if (!Preheader) break;
     573             : 
     574             :       // Ok, move up a level.
     575         192 :       Builder.SetInsertPoint(Preheader->getTerminator());
     576         192 :     }
     577             : 
     578             :     // Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
     579             :     // because ScalarEvolution may have changed the address arithmetic to
     580             :     // compute a value which is beyond the end of the allocated object.
     581             :     Value *Casted = V;
     582        9576 :     if (V->getType() != PTy)
     583          75 :       Casted = InsertNoopCastOfTo(Casted, PTy);
     584       19152 :     Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep");
     585        9576 :     Ops.push_back(SE.getUnknown(GEP));
     586        9576 :     rememberInstruction(GEP);
     587             :   }
     588             : 
     589        9576 :   return expand(SE.getAddExpr(Ops));
     590             : }
     591             : 
     592             : /// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
     593             : /// SCEV expansion. If they are nested, this is the most nested. If they are
     594             : /// neighboring, pick the later.
     595       15719 : static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
     596             :                                         DominatorTree &DT) {
     597       15719 :   if (!A) return B;
     598         939 :   if (!B) return A;
     599         866 :   if (A->contains(B)) return B;
     600         680 :   if (B->contains(A)) return A;
     601         534 :   if (DT.dominates(A->getHeader(), B->getHeader())) return B;
     602         157 :   if (DT.dominates(B->getHeader(), A->getHeader())) return A;
     603             :   return A; // Arbitrarily break the tie.
     604             : }
     605             : 
     606             : /// getRelevantLoop - Get the most relevant loop associated with the given
     607             : /// expression, according to PickMostRelevantLoop.
     608       47830 : const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
     609             :   // Test whether we've already computed the most relevant loop for this SCEV.
     610       95660 :   auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr));
     611       47830 :   if (!Pair.second)
     612       16613 :     return Pair.first->second;
     613             : 
     614       31217 :   if (isa<SCEVConstant>(S))
     615             :     // A constant has no relevant loops.
     616             :     return nullptr;
     617       16850 :   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     618             :     if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
     619       18310 :       return Pair.first->second = SE.LI.getLoopFor(I->getParent());
     620             :     // A non-instruction has no relevant loops.
     621             :     return nullptr;
     622             :   }
     623             :   if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) {
     624             :     const Loop *L = nullptr;
     625             :     if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
     626         130 :       L = AR->getLoop();
     627       29775 :     for (const SCEV *Op : N->operands())
     628       12102 :       L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT);
     629       11142 :     return RelevantLoops[N] = L;
     630             :   }
     631             :   if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) {
     632        1005 :     const Loop *Result = getRelevantLoop(C->getOperand());
     633        2010 :     return RelevantLoops[C] = Result;
     634             :   }
     635             :   if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
     636         719 :     const Loop *Result = PickMostRelevantLoop(
     637        1438 :         getRelevantLoop(D->getLHS()), getRelevantLoop(D->getRHS()), SE.DT);
     638        1438 :     return RelevantLoops[D] = Result;
     639             :   }
     640           0 :   llvm_unreachable("Unexpected SCEV type!");
     641             : }
     642             : 
     643             : namespace {
     644             : 
     645             : /// LoopCompare - Compare loops by PickMostRelevantLoop.
     646             : class LoopCompare {
     647             :   DominatorTree &DT;
     648             : public:
     649             :   explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
     650             : 
     651       21008 :   bool operator()(std::pair<const Loop *, const SCEV *> LHS,
     652             :                   std::pair<const Loop *, const SCEV *> RHS) const {
     653             :     // Keep pointer operands sorted at the end.
     654       42016 :     if (LHS.second->getType()->isPointerTy() !=
     655       21008 :         RHS.second->getType()->isPointerTy())
     656       22542 :       return LHS.second->getType()->isPointerTy();
     657             : 
     658             :     // Compare loops with PickMostRelevantLoop.
     659        9737 :     if (LHS.first != RHS.first)
     660        2898 :       return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
     661             : 
     662             :     // If one operand is a non-constant negative and the other is not,
     663             :     // put the non-constant negative on the right so that a sub can
     664             :     // be used instead of a negate and add.
     665        6839 :     if (LHS.second->isNonConstantNegative()) {
     666         417 :       if (!RHS.second->isNonConstantNegative())
     667             :         return false;
     668        6422 :     } else if (RHS.second->isNonConstantNegative())
     669             :       return true;
     670             : 
     671             :     // Otherwise they are equivalent according to this comparison.
     672             :     return false;
     673             :   }
     674             : };
     675             : 
     676             : }
     677             : 
     678       14128 : Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
     679       28256 :   Type *Ty = SE.getEffectiveSCEVType(S->getType());
     680             : 
     681             :   // Collect all the add operands in a loop, along with their associated loops.
     682             :   // Iterate in reverse so that constants are emitted last, all else equal, and
     683             :   // so that pointer operands are inserted first, which the code below relies on
     684             :   // to form more involved GEPs.
     685             :   SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
     686       14128 :   for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()),
     687       43672 :        E(S->op_begin()); I != E; ++I)
     688       29544 :     OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
     689             : 
     690             :   // Sort by loop. Use a stable sort so that constants follow non-constants and
     691             :   // pointer operands precede non-pointer operands.
     692       14128 :   std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT));
     693             : 
     694             :   // Emit instructions to add all the operands. Hoist as much as possible
     695             :   // out of loops, and form meaningful getelementptrs where possible.
     696             :   Value *Sum = nullptr;
     697       42947 :   for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
     698       28819 :     const Loop *CurLoop = I->first;
     699       28819 :     const SCEV *Op = I->second;
     700       28819 :     if (!Sum) {
     701             :       // This is the first operand. Just expand it.
     702       14128 :       Sum = expand(Op);
     703       14128 :       ++I;
     704       14691 :     } else if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
     705             :       // The running sum expression is a pointer. Try to form a getelementptr
     706             :       // at this level with that as the base.
     707             :       SmallVector<const SCEV *, 4> NewOps;
     708       31948 :       for (; I != E && I->first == CurLoop; ++I) {
     709             :         // If the operand is SCEVUnknown and not instructions, peek through
     710             :         // it, to enable more of it to be folded into the GEP.
     711       10891 :         const SCEV *X = I->second;
     712        5874 :         if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X))
     713        5874 :           if (!isa<Instruction>(U->getValue()))
     714        3948 :             X = SE.getSCEV(U->getValue());
     715       10891 :         NewOps.push_back(X);
     716             :       }
     717       10166 :       Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
     718        4525 :     } else if (PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
     719             :       // The running sum is an integer, and there's a pointer at this level.
     720             :       // Try to form a getelementptr. If the running sum is instructions,
     721             :       // use a SCEVUnknown to avoid re-analyzing them.
     722             :       SmallVector<const SCEV *, 4> NewOps;
     723           0 :       NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) :
     724           0 :                                                SE.getSCEV(Sum));
     725           0 :       for (++I; I != E && I->first == CurLoop; ++I)
     726           0 :         NewOps.push_back(I->second);
     727           0 :       Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
     728        4525 :     } else if (Op->isNonConstantNegative()) {
     729             :       // Instead of doing a negate and add, just do a subtract.
     730         833 :       Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
     731         833 :       Sum = InsertNoopCastOfTo(Sum, Ty);
     732         833 :       Sum = InsertBinop(Instruction::Sub, Sum, W);
     733         833 :       ++I;
     734             :     } else {
     735             :       // A simple add.
     736        3692 :       Value *W = expandCodeFor(Op, Ty);
     737        3692 :       Sum = InsertNoopCastOfTo(Sum, Ty);
     738             :       // Canonicalize a constant to the RHS.
     739        3692 :       if (isa<Constant>(Sum)) std::swap(Sum, W);
     740        3692 :       Sum = InsertBinop(Instruction::Add, Sum, W);
     741        3692 :       ++I;
     742             :     }
     743             :   }
     744             : 
     745       14128 :   return Sum;
     746             : }
     747             : 
     748        1566 : Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
     749        3132 :   Type *Ty = SE.getEffectiveSCEVType(S->getType());
     750             : 
     751             :   // Collect all the mul operands in a loop, along with their associated loops.
     752             :   // Iterate in reverse so that constants are emitted last, all else equal.
     753             :   SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
     754        1566 :   for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()),
     755        5307 :        E(S->op_begin()); I != E; ++I)
     756        3741 :     OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
     757             : 
     758             :   // Sort by loop. Use a stable sort so that constants follow non-constants.
     759        1566 :   std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT));
     760             : 
     761             :   // Emit instructions to mul all the operands. Hoist as much as possible
     762             :   // out of loops.
     763             :   Value *Prod = nullptr;
     764        1566 :   auto I = OpsAndLoops.begin();
     765             : 
     766             :   // Expand the calculation of X pow N in the following manner:
     767             :   // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
     768             :   // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
     769       15129 :   const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops, &Ty]() {
     770        2901 :     auto E = I;
     771             :     // Calculate how many times the same operand from the same loop is included
     772             :     // into this power.
     773             :     uint64_t Exponent = 0;
     774             :     const uint64_t MaxExponent = UINT64_MAX >> 1;
     775             :     // No one sane will ever try to calculate such huge exponents, but if we
     776             :     // need this, we stop on UINT64_MAX / 2 because we need to exit the loop
     777             :     // below when the power of 2 exceeds our Exponent, and we want it to be
     778             :     // 1u << 31 at most to not deal with unsigned overflow.
     779       13125 :     while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) {
     780        3408 :       ++Exponent;
     781        3408 :       ++E;
     782             :     }
     783             :     assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?");
     784             : 
     785             :     // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
     786             :     // that are needed into the result.
     787        2901 :     Value *P = expandCodeFor(I->second, Ty);
     788             :     Value *Result = nullptr;
     789        2901 :     if (Exponent & 1)
     790             :       Result = P;
     791        3109 :     for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
     792         104 :       P = InsertBinop(Instruction::Mul, P, P);
     793         104 :       if (Exponent & BinExp)
     794          53 :         Result = Result ? InsertBinop(Instruction::Mul, Result, P) : P;
     795             :     }
     796             : 
     797        2901 :     I = E;
     798             :     assert(Result && "Nothing was expanded?");
     799        2901 :     return Result;
     800        1566 :   };
     801             : 
     802        4800 :   while (I != OpsAndLoops.end()) {
     803        3234 :     if (!Prod) {
     804             :       // This is the first operand. Just expand it.
     805        1566 :       Prod = ExpandOpBinPowN();
     806        1668 :     } else if (I->second->isAllOnesValue()) {
     807             :       // Instead of doing a multiply by negative one, just do a negate.
     808         333 :       Prod = InsertNoopCastOfTo(Prod, Ty);
     809         333 :       Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
     810         333 :       ++I;
     811             :     } else {
     812             :       // A simple mul.
     813        1335 :       Value *W = ExpandOpBinPowN();
     814        1335 :       Prod = InsertNoopCastOfTo(Prod, Ty);
     815             :       // Canonicalize a constant to the RHS.
     816        1335 :       if (isa<Constant>(Prod)) std::swap(Prod, W);
     817             :       const APInt *RHS;
     818        2670 :       if (match(W, m_Power2(RHS))) {
     819             :         // Canonicalize Prod*(1<<C) to Prod<<C.
     820             :         assert(!Ty->isVectorTy() && "vector types are not SCEVable");
     821        1338 :         Prod = InsertBinop(Instruction::Shl, Prod,
     822        2007 :                            ConstantInt::get(Ty, RHS->logBase2()));
     823             :       } else {
     824         666 :         Prod = InsertBinop(Instruction::Mul, Prod, W);
     825             :       }
     826             :     }
     827             :   }
     828             : 
     829        1566 :   return Prod;
     830             : }
     831             : 
     832         392 : Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
     833         784 :   Type *Ty = SE.getEffectiveSCEVType(S->getType());
     834             : 
     835         392 :   Value *LHS = expandCodeFor(S->getLHS(), Ty);
     836         392 :   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
     837             :     const APInt &RHS = SC->getAPInt();
     838         371 :     if (RHS.isPowerOf2())
     839             :       return InsertBinop(Instruction::LShr, LHS,
     840         237 :                          ConstantInt::get(Ty, RHS.logBase2()));
     841             :   }
     842             : 
     843         155 :   Value *RHS = expandCodeFor(S->getRHS(), Ty);
     844         155 :   return InsertBinop(Instruction::UDiv, LHS, RHS);
     845             : }
     846             : 
     847             : /// Move parts of Base into Rest to leave Base with the minimal
     848             : /// expression that provides a pointer operand suitable for a
     849             : /// GEP expansion.
     850         228 : static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
     851             :                               ScalarEvolution &SE) {
     852         229 :   while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) {
     853           2 :     Base = A->getStart();
     854           3 :     Rest = SE.getAddExpr(Rest,
     855             :                          SE.getAddRecExpr(SE.getConstant(A->getType(), 0),
     856             :                                           A->getStepRecurrence(SE),
     857             :                                           A->getLoop(),
     858             :                                           A->getNoWrapFlags(SCEV::FlagNW)));
     859           1 :   }
     860             :   if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
     861         118 :     Base = A->getOperand(A->getNumOperands()-1);
     862             :     SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end());
     863          59 :     NewAddOps.back() = Rest;
     864          59 :     Rest = SE.getAddExpr(NewAddOps);
     865          59 :     ExposePointerBase(Base, Rest, SE);
     866             :   }
     867         228 : }
     868             : 
     869             : /// Determine if this is a well-behaved chain of instructions leading back to
     870             : /// the PHI. If so, it may be reused by expanded expressions.
     871          10 : bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
     872             :                                          const Loop *L) {
     873          42 :   if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
     874           1 :       (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV)))
     875             :     return false;
     876             :   // If any of the operands don't dominate the insert position, bail.
     877             :   // Addrec operands are always loop-invariant, so this can only happen
     878             :   // if there are instructions which haven't been hoisted.
     879          13 :   if (L == IVIncInsertLoop) {
     880           0 :     for (User::op_iterator OI = IncV->op_begin()+1,
     881           0 :            OE = IncV->op_end(); OI != OE; ++OI)
     882             :       if (Instruction *OInst = dyn_cast<Instruction>(OI))
     883           0 :         if (!SE.DT.dominates(OInst, IVIncInsertPos))
     884             :           return false;
     885             :   }
     886             :   // Advance to the next instruction.
     887          13 :   IncV = dyn_cast<Instruction>(IncV->getOperand(0));
     888             :   if (!IncV)
     889             :     return false;
     890             : 
     891          11 :   if (IncV->mayHaveSideEffects())
     892             :     return false;
     893             : 
     894          11 :   if (IncV == PN)
     895             :     return true;
     896             : 
     897             :   return isNormalAddRecExprPHI(PN, IncV, L);
     898             : }
     899             : 
     900             : /// getIVIncOperand returns an induction variable increment's induction
     901             : /// variable operand.
     902             : ///
     903             : /// If allowScale is set, any type of GEP is allowed as long as the nonIV
     904             : /// operands dominate InsertPos.
     905             : ///
     906             : /// If allowScale is not set, ensure that a GEP increment conforms to one of the
     907             : /// simple patterns generated by getAddRecExprPHILiterally and
     908             : /// expandAddtoGEP. If the pattern isn't recognized, return NULL.
     909        4734 : Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
     910             :                                            Instruction *InsertPos,
     911             :                                            bool allowScale) {
     912        4734 :   if (IncV == InsertPos)
     913             :     return nullptr;
     914             : 
     915        4730 :   switch (IncV->getOpcode()) {
     916             :   default:
     917             :     return nullptr;
     918             :   // Check for a simple Add/Sub or GEP of a loop invariant step.
     919        3531 :   case Instruction::Add:
     920             :   case Instruction::Sub: {
     921        3531 :     Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
     922         134 :     if (!OInst || SE.DT.dominates(OInst, InsertPos))
     923             :       return dyn_cast<Instruction>(IncV->getOperand(0));
     924             :     return nullptr;
     925             :   }
     926          38 :   case Instruction::BitCast:
     927          38 :     return dyn_cast<Instruction>(IncV->getOperand(0));
     928        1135 :   case Instruction::GetElementPtr:
     929        5709 :     for (auto I = IncV->op_begin() + 1, E = IncV->op_end(); I != E; ++I) {
     930        1179 :       if (isa<Constant>(*I))
     931        1151 :         continue;
     932             :       if (Instruction *OInst = dyn_cast<Instruction>(*I)) {
     933          19 :         if (!SE.DT.dominates(OInst, InsertPos))
     934             :           return nullptr;
     935             :       }
     936          27 :       if (allowScale) {
     937             :         // allow any kind of GEP as long as it can be hoisted.
     938           1 :         continue;
     939             :       }
     940             :       // This must be a pointer addition of constants (pretty), which is already
     941             :       // handled, or some number of address-size elements (ugly). Ugly geps
     942             :       // have 2 operands. i1* is used by the expander to represent an
     943             :       // address-size element.
     944          26 :       if (IncV->getNumOperands() != 2)
     945             :         return nullptr;
     946          26 :       unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace();
     947          52 :       if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS)
     948          44 :           && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS))
     949             :         return nullptr;
     950             :       break;
     951        1108 :     }
     952             :     return dyn_cast<Instruction>(IncV->getOperand(0));
     953             :   }
     954             : }
     955             : 
     956             : /// If the insert point of the current builder or any of the builders on the
     957             : /// stack of saved builders has 'I' as its insert point, update it to point to
     958             : /// the instruction after 'I'.  This is intended to be used when the instruction
     959             : /// 'I' is being moved.  If this fixup is not done and 'I' is moved to a
     960             : /// different block, the inconsistent insert point (with a mismatched
     961             : /// Instruction and Block) can lead to an instruction being inserted in a block
     962             : /// other than its parent.
     963         266 : void SCEVExpander::fixupInsertPoints(Instruction *I) {
     964             :   BasicBlock::iterator It(*I);
     965             :   BasicBlock::iterator NewInsertPt = std::next(It);
     966         266 :   if (Builder.GetInsertPoint() == It)
     967           2 :     Builder.SetInsertPoint(&*NewInsertPt);
     968         268 :   for (auto *InsertPtGuard : InsertPointGuards)
     969           1 :     if (InsertPtGuard->GetInsertPoint() == It)
     970             :       InsertPtGuard->SetInsertPoint(NewInsertPt);
     971         266 : }
     972             : 
     973             : /// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
     974             : /// it available to other uses in this loop. Recursively hoist any operands,
     975             : /// until we reach a value that dominates InsertPos.
     976        4564 : bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) {
     977        4564 :   if (SE.DT.dominates(IncV, InsertPos))
     978             :       return true;
     979             : 
     980             :   // InsertPos must itself dominate IncV so that IncV's new position satisfies
     981             :   // its existing users.
     982         545 :   if (isa<PHINode>(InsertPos) ||
     983         269 :       !SE.DT.dominates(InsertPos->getParent(), IncV->getParent()))
     984             :     return false;
     985             : 
     986         269 :   if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos))
     987             :     return false;
     988             : 
     989             :   // Check that the chain of IV operands leading back to Phi can be hoisted.
     990             :   SmallVector<Instruction*, 4> IVIncs;
     991             :   for(;;) {
     992         274 :     Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true);
     993         274 :     if (!Oper)
     994             :       return false;
     995             :     // IncV is safe to hoist.
     996         270 :     IVIncs.push_back(IncV);
     997         270 :     IncV = Oper;
     998         270 :     if (SE.DT.dominates(IncV, InsertPos))
     999             :       break;
    1000             :   }
    1001         265 :   for (auto I = IVIncs.rbegin(), E = IVIncs.rend(); I != E; ++I) {
    1002         266 :     fixupInsertPoints(*I);
    1003         266 :     (*I)->moveBefore(InsertPos);
    1004             :   }
    1005             :   return true;
    1006             : }
    1007             : 
    1008             : /// Determine if this cyclic phi is in a form that would have been generated by
    1009             : /// LSR. We don't care if the phi was actually expanded in this pass, as long
    1010             : /// as it is in a low-cost form, for example, no implied multiplication. This
    1011             : /// should match any patterns generated by getAddRecExprPHILiterally and
    1012             : /// expandAddtoGEP.
    1013        4407 : bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
    1014             :                                            const Loop *L) {
    1015             :   for(Instruction *IVOper = IncV;
    1016        4460 :       (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(),
    1017        4460 :                                 /*allowScale=*/false));) {
    1018        4404 :     if (IVOper == PN)
    1019             :       return true;
    1020             :   }
    1021             :   return false;
    1022             : }
    1023             : 
    1024             : /// expandIVInc - Expand an IV increment at Builder's current InsertPos.
    1025             : /// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
    1026             : /// need to materialize IV increments elsewhere to handle difficult situations.
    1027        3271 : Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
    1028             :                                  Type *ExpandTy, Type *IntTy,
    1029             :                                  bool useSubtract) {
    1030             :   Value *IncV;
    1031             :   // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
    1032        3271 :   if (ExpandTy->isPointerTy()) {
    1033             :     PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
    1034             :     // If the step isn't constant, don't use an implicitly scaled GEP, because
    1035             :     // that would require a multiply inside the loop.
    1036         901 :     if (!isa<ConstantInt>(StepV))
    1037         150 :       GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
    1038             :                                   GEPPtrTy->getAddressSpace());
    1039         901 :     const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
    1040         901 :     IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
    1041         901 :     if (IncV->getType() != PN->getType()) {
    1042         792 :       IncV = Builder.CreateBitCast(IncV, PN->getType());
    1043         396 :       rememberInstruction(IncV);
    1044             :     }
    1045             :   } else {
    1046        4740 :     IncV = useSubtract ?
    1047        2396 :       Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
    1048        7084 :       Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
    1049        2370 :     rememberInstruction(IncV);
    1050             :   }
    1051        3271 :   return IncV;
    1052             : }
    1053             : 
    1054             : /// Hoist the addrec instruction chain rooted in the loop phi above the
    1055             : /// position. This routine assumes that this is possible (has been checked).
    1056        4244 : void SCEVExpander::hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,
    1057             :                                   Instruction *Pos, PHINode *LoopPhi) {
    1058             :   do {
    1059        4244 :     if (DT->dominates(InstToHoist, Pos))
    1060             :       break;
    1061             :     // Make sure the increment is where we want it. But don't move it
    1062             :     // down past a potential existing post-inc user.
    1063           0 :     fixupInsertPoints(InstToHoist);
    1064           0 :     InstToHoist->moveBefore(Pos);
    1065             :     Pos = InstToHoist;
    1066           0 :     InstToHoist = cast<Instruction>(InstToHoist->getOperand(0));
    1067           0 :   } while (InstToHoist != LoopPhi);
    1068        4244 : }
    1069             : 
    1070             : /// Check whether we can cheaply express the requested SCEV in terms of
    1071             : /// the available PHI SCEV by truncation and/or inversion of the step.
    1072          13 : static bool canBeCheaplyTransformed(ScalarEvolution &SE,
    1073             :                                     const SCEVAddRecExpr *Phi,
    1074             :                                     const SCEVAddRecExpr *Requested,
    1075             :                                     bool &InvertStep) {
    1076          13 :   Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType());
    1077          13 :   Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType());
    1078             : 
    1079          13 :   if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
    1080             :     return false;
    1081             : 
    1082             :   // Try truncate it if necessary.
    1083          13 :   Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy));
    1084             :   if (!Phi)
    1085             :     return false;
    1086             : 
    1087             :   // Check whether truncation will help.
    1088          13 :   if (Phi == Requested) {
    1089           0 :     InvertStep = false;
    1090           0 :     return true;
    1091             :   }
    1092             : 
    1093             :   // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
    1094          26 :   if (SE.getAddExpr(Requested->getStart(),
    1095             :                     SE.getNegativeSCEV(Requested)) == Phi) {
    1096           2 :     InvertStep = true;
    1097           2 :     return true;
    1098             :   }
    1099             : 
    1100             :   return false;
    1101             : }
    1102             : 
    1103        3257 : static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
    1104        3257 :   if (!isa<IntegerType>(AR->getType()))
    1105             :     return false;
    1106             : 
    1107             :   unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
    1108        4712 :   Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
    1109        2356 :   const SCEV *Step = AR->getStepRecurrence(SE);
    1110        2356 :   const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy),
    1111        2356 :                                             SE.getSignExtendExpr(AR, WideTy));
    1112             :   const SCEV *ExtendAfterOp =
    1113        2356 :     SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy);
    1114        2356 :   return ExtendAfterOp == OpAfterExtend;
    1115             : }
    1116             : 
    1117        3257 : static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
    1118        3257 :   if (!isa<IntegerType>(AR->getType()))
    1119             :     return false;
    1120             : 
    1121             :   unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
    1122        4712 :   Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
    1123        2356 :   const SCEV *Step = AR->getStepRecurrence(SE);
    1124        2356 :   const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy),
    1125        2356 :                                             SE.getZeroExtendExpr(AR, WideTy));
    1126             :   const SCEV *ExtendAfterOp =
    1127        2356 :     SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy);
    1128        2356 :   return ExtendAfterOp == OpAfterExtend;
    1129             : }
    1130             : 
    1131             : /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
    1132             : /// the base addrec, which is the addrec without any non-loop-dominating
    1133             : /// values, and return the PHI.
    1134             : PHINode *
    1135        7575 : SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
    1136             :                                         const Loop *L,
    1137             :                                         Type *ExpandTy,
    1138             :                                         Type *IntTy,
    1139             :                                         Type *&TruncTy,
    1140             :                                         bool &InvertStep) {
    1141             :   assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position");
    1142             : 
    1143             :   // Reuse a previously-inserted PHI, if present.
    1144        7575 :   BasicBlock *LatchBlock = L->getLoopLatch();
    1145        7575 :   if (LatchBlock) {
    1146             :     PHINode *AddRecPhiMatch = nullptr;
    1147             :     Instruction *IncV = nullptr;
    1148        7575 :     TruncTy = nullptr;
    1149        7575 :     InvertStep = false;
    1150             : 
    1151             :     // Only try partially matching scevs that need truncation and/or
    1152             :     // step-inversion if we know this loop is outside the current loop.
    1153             :     bool TryNonMatchingSCEV =
    1154       14890 :         IVIncInsertLoop &&
    1155       14630 :         SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
    1156             : 
    1157        7575 :     for (PHINode &PN : L->getHeader()->phis()) {
    1158       12855 :       if (!SE.isSCEVable(PN.getType()))
    1159         485 :         continue;
    1160             : 
    1161       12370 :       const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
    1162        1231 :       if (!PhiSCEV)
    1163        1231 :         continue;
    1164             : 
    1165             :       bool IsMatchingSCEV = PhiSCEV == Normalized;
    1166             :       // We only handle truncation and inversion of phi recurrences for the
    1167             :       // expanded expression if the expanded expression's loop dominates the
    1168             :       // loop we insert to. Check now, so we can bail out early.
    1169       11139 :       if (!IsMatchingSCEV && !TryNonMatchingSCEV)
    1170        6769 :           continue;
    1171             : 
    1172             :       // TODO: this possibly can be reworked to avoid this cast at all.
    1173             :       Instruction *TempIncV =
    1174        4370 :           dyn_cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock));
    1175           1 :       if (!TempIncV)
    1176           1 :         continue;
    1177             : 
    1178             :       // Check whether we can reuse this PHI node.
    1179        4369 :       if (LSRMode) {
    1180        4359 :         if (!isExpandedAddRecExprPHI(&PN, TempIncV, L))
    1181          50 :           continue;
    1182        4309 :         if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos))
    1183           0 :           continue;
    1184             :       } else {
    1185          10 :         if (!isNormalAddRecExprPHI(&PN, TempIncV, L))
    1186           3 :           continue;
    1187             :       }
    1188             : 
    1189             :       // Stop if we have found an exact match SCEV.
    1190        4316 :       if (IsMatchingSCEV) {
    1191             :         IncV = TempIncV;
    1192        4303 :         TruncTy = nullptr;
    1193        4303 :         InvertStep = false;
    1194             :         AddRecPhiMatch = &PN;
    1195        4303 :         break;
    1196             :       }
    1197             : 
    1198             :       // Try whether the phi can be translated into the requested form
    1199             :       // (truncated and/or offset by a constant).
    1200          26 :       if ((!TruncTy || InvertStep) &&
    1201          13 :           canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) {
    1202             :         // Record the phi node. But don't stop we might find an exact match
    1203             :         // later.
    1204             :         AddRecPhiMatch = &PN;
    1205             :         IncV = TempIncV;
    1206           4 :         TruncTy = SE.getEffectiveSCEVType(Normalized->getType());
    1207             :       }
    1208             :     }
    1209             : 
    1210        7575 :     if (AddRecPhiMatch) {
    1211             :       // Potentially, move the increment. We have made sure in
    1212             :       // isExpandedAddRecExprPHI or hoistIVInc that this is possible.
    1213        4305 :       if (L == IVIncInsertLoop)
    1214        4244 :         hoistBeforePos(&SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch);
    1215             : 
    1216             :       // Ok, the add recurrence looks usable.
    1217             :       // Remember this PHI, even in post-inc mode.
    1218        4305 :       InsertedValues.insert(AddRecPhiMatch);
    1219             :       // Remember the increment.
    1220        4305 :       rememberInstruction(IncV);
    1221        4305 :       return AddRecPhiMatch;
    1222             :     }
    1223             :   }
    1224             : 
    1225             :   // Save the original insertion point so we can restore it when we're done.
    1226        6540 :   SCEVInsertPointGuard Guard(Builder, this);
    1227             : 
    1228             :   // Another AddRec may need to be recursively expanded below. For example, if
    1229             :   // this AddRec is quadratic, the StepV may itself be an AddRec in this
    1230             :   // loop. Remove this loop from the PostIncLoops set before expanding such
    1231             :   // AddRecs. Otherwise, we cannot find a valid position for the step
    1232             :   // (i.e. StepV can never dominate its loop header).  Ideally, we could do
    1233             :   // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
    1234             :   // so it's not worth implementing SmallPtrSet::swap.
    1235             :   PostIncLoopSet SavedPostIncLoops = PostIncLoops;
    1236        3270 :   PostIncLoops.clear();
    1237             : 
    1238             :   // Expand code for the start value into the loop preheader.
    1239             :   assert(L->getLoopPreheader() &&
    1240             :          "Can't expand add recurrences without a loop preheader!");
    1241        3270 :   Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
    1242        6540 :                                 L->getLoopPreheader()->getTerminator());
    1243             : 
    1244             :   // StartV must have been be inserted into L's preheader to dominate the new
    1245             :   // phi.
    1246             :   assert(!isa<Instruction>(StartV) ||
    1247             :          SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
    1248             :                                  L->getHeader()));
    1249             : 
    1250             :   // Expand code for the step value. Do this before creating the PHI so that PHI
    1251             :   // reuse code doesn't see an incomplete PHI.
    1252        3270 :   const SCEV *Step = Normalized->getStepRecurrence(SE);
    1253             :   // If the stride is negative, insert a sub instead of an add for the increment
    1254             :   // (unless it's a constant, because subtracts of constants are canonicalized
    1255             :   // to adds).
    1256        3270 :   bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
    1257             :   if (useSubtract)
    1258          13 :     Step = SE.getNegativeSCEV(Step);
    1259             :   // Expand the step somewhere that dominates the loop header.
    1260        3270 :   Value *StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());
    1261             : 
    1262             :   // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
    1263             :   // we actually do emit an addition.  It does not apply if we emit a
    1264             :   // subtraction.
    1265        3270 :   bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized);
    1266        3270 :   bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized);
    1267             : 
    1268             :   // Create the PHI.
    1269             :   BasicBlock *Header = L->getHeader();
    1270        3270 :   Builder.SetInsertPoint(Header, Header->begin());
    1271             :   pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
    1272        6540 :   PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE),
    1273        6540 :                                   Twine(IVName) + ".iv");
    1274        3270 :   rememberInstruction(PN);
    1275             : 
    1276             :   // Create the step instructions and populate the PHI.
    1277       13080 :   for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
    1278             :     BasicBlock *Pred = *HPI;
    1279             : 
    1280             :     // Add a start value.
    1281        9810 :     if (!L->contains(Pred)) {
    1282        3270 :       PN->addIncoming(StartV, Pred);
    1283             :       continue;
    1284             :     }
    1285             : 
    1286             :     // Create a step value and add it to the PHI.
    1287             :     // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
    1288             :     // instructions at IVIncInsertPos.
    1289        3270 :     Instruction *InsertPos = L == IVIncInsertLoop ?
    1290             :       IVIncInsertPos : Pred->getTerminator();
    1291        3270 :     Builder.SetInsertPoint(InsertPos);
    1292        3270 :     Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
    1293             : 
    1294        3270 :     if (isa<OverflowingBinaryOperator>(IncV)) {
    1295        2369 :       if (IncrementIsNUW)
    1296         973 :         cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap();
    1297        2369 :       if (IncrementIsNSW)
    1298        1512 :         cast<BinaryOperator>(IncV)->setHasNoSignedWrap();
    1299             :     }
    1300        3270 :     PN->addIncoming(IncV, Pred);
    1301             :   }
    1302             : 
    1303             :   // After expanding subexpressions, restore the PostIncLoops set so the caller
    1304             :   // can ensure that IVIncrement dominates the current uses.
    1305             :   PostIncLoops = SavedPostIncLoops;
    1306             : 
    1307             :   // Remember this PHI, even in post-inc mode.
    1308        3270 :   InsertedValues.insert(PN);
    1309             : 
    1310             :   return PN;
    1311             : }
    1312             : 
    1313        7575 : Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
    1314             :   Type *STy = S->getType();
    1315        7575 :   Type *IntTy = SE.getEffectiveSCEVType(STy);
    1316        7575 :   const Loop *L = S->getLoop();
    1317             : 
    1318             :   // Determine a normalized form of this expression, which is the expression
    1319             :   // before any post-inc adjustment is made.
    1320             :   const SCEVAddRecExpr *Normalized = S;
    1321        7575 :   if (PostIncLoops.count(L)) {
    1322             :     PostIncLoopSet Loops;
    1323        2923 :     Loops.insert(L);
    1324        2923 :     Normalized = cast<SCEVAddRecExpr>(normalizeForPostIncUse(S, Loops, SE));
    1325             :   }
    1326             : 
    1327             :   // Strip off any non-loop-dominating component from the addrec start.
    1328        7575 :   const SCEV *Start = Normalized->getStart();
    1329             :   const SCEV *PostLoopOffset = nullptr;
    1330       15150 :   if (!SE.properlyDominates(Start, L->getHeader())) {
    1331             :     PostLoopOffset = Start;
    1332           2 :     Start = SE.getConstant(Normalized->getType(), 0);
    1333           2 :     Normalized = cast<SCEVAddRecExpr>(
    1334           1 :       SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE),
    1335             :                        Normalized->getLoop(),
    1336             :                        Normalized->getNoWrapFlags(SCEV::FlagNW)));
    1337             :   }
    1338             : 
    1339             :   // Strip off any non-loop-dominating component from the addrec step.
    1340        7575 :   const SCEV *Step = Normalized->getStepRecurrence(SE);
    1341             :   const SCEV *PostLoopScale = nullptr;
    1342       15150 :   if (!SE.dominates(Step, L->getHeader())) {
    1343             :     PostLoopScale = Step;
    1344           0 :     Step = SE.getConstant(Normalized->getType(), 1);
    1345           0 :     if (!Start->isZero()) {
    1346             :         // The normalization below assumes that Start is constant zero, so if
    1347             :         // it isn't re-associate Start to PostLoopOffset.
    1348             :         assert(!PostLoopOffset && "Start not-null but PostLoopOffset set?");
    1349             :         PostLoopOffset = Start;
    1350           0 :         Start = SE.getConstant(Normalized->getType(), 0);
    1351             :     }
    1352             :     Normalized =
    1353           0 :       cast<SCEVAddRecExpr>(SE.getAddRecExpr(
    1354             :                              Start, Step, Normalized->getLoop(),
    1355             :                              Normalized->getNoWrapFlags(SCEV::FlagNW)));
    1356             :   }
    1357             : 
    1358             :   // Expand the core addrec. If we need post-loop scaling, force it to
    1359             :   // expand to an integer type to avoid the need for additional casting.
    1360        7575 :   Type *ExpandTy = PostLoopScale ? IntTy : STy;
    1361             :   // We can't use a pointer type for the addrec if the pointer type is
    1362             :   // non-integral.
    1363             :   Type *AddRecPHIExpandTy =
    1364        7575 :       DL.isNonIntegralPointerType(STy) ? Normalized->getType() : ExpandTy;
    1365             : 
    1366             :   // In some cases, we decide to reuse an existing phi node but need to truncate
    1367             :   // it and/or invert the step.
    1368        7575 :   Type *TruncTy = nullptr;
    1369        7575 :   bool InvertStep = false;
    1370             :   PHINode *PN = getAddRecExprPHILiterally(Normalized, L, AddRecPHIExpandTy,
    1371        7575 :                                           IntTy, TruncTy, InvertStep);
    1372             : 
    1373             :   // Accommodate post-inc mode, if necessary.
    1374             :   Value *Result;
    1375        7575 :   if (!PostIncLoops.count(L))
    1376             :     Result = PN;
    1377             :   else {
    1378             :     // In PostInc mode, use the post-incremented value.
    1379        2923 :     BasicBlock *LatchBlock = L->getLoopLatch();
    1380             :     assert(LatchBlock && "PostInc mode requires a unique loop latch!");
    1381        2923 :     Result = PN->getIncomingValueForBlock(LatchBlock);
    1382             : 
    1383             :     // For an expansion to use the postinc form, the client must call
    1384             :     // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
    1385             :     // or dominated by IVIncInsertPos.
    1386        5846 :     if (isa<Instruction>(Result) &&
    1387        5846 :         !SE.DT.dominates(cast<Instruction>(Result),
    1388             :                          &*Builder.GetInsertPoint())) {
    1389             :       // The induction variable's postinc expansion does not dominate this use.
    1390             :       // IVUsers tries to prevent this case, so it is rare. However, it can
    1391             :       // happen when an IVUser outside the loop is not dominated by the latch
    1392             :       // block. Adjusting IVIncInsertPos before expansion begins cannot handle
    1393             :       // all cases. Consider a phi outside whose operand is replaced during
    1394             :       // expansion with the value of the postinc user. Without fundamentally
    1395             :       // changing the way postinc users are tracked, the only remedy is
    1396             :       // inserting an extra IV increment. StepV might fold into PostLoopOffset,
    1397             :       // but hopefully expandCodeFor handles that.
    1398             :       bool useSubtract =
    1399           1 :         !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
    1400             :       if (useSubtract)
    1401           0 :         Step = SE.getNegativeSCEV(Step);
    1402             :       Value *StepV;
    1403             :       {
    1404             :         // Expand the step somewhere that dominates the loop header.
    1405           2 :         SCEVInsertPointGuard Guard(Builder, this);
    1406           1 :         StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());
    1407             :       }
    1408           1 :       Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
    1409             :     }
    1410             :   }
    1411             : 
    1412             :   // We have decided to reuse an induction variable of a dominating loop. Apply
    1413             :   // truncation and/or inversion of the step.
    1414        7575 :   if (TruncTy) {
    1415           2 :     Type *ResTy = Result->getType();
    1416             :     // Normalize the result type.
    1417           2 :     if (ResTy != SE.getEffectiveSCEVType(ResTy))
    1418           0 :       Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy));
    1419             :     // Truncate the result.
    1420           2 :     if (TruncTy != Result->getType()) {
    1421           4 :       Result = Builder.CreateTrunc(Result, TruncTy);
    1422           2 :       rememberInstruction(Result);
    1423             :     }
    1424             :     // Invert the result.
    1425           2 :     if (InvertStep) {
    1426           6 :       Result = Builder.CreateSub(expandCodeFor(Normalized->getStart(), TruncTy),
    1427             :                                  Result);
    1428           2 :       rememberInstruction(Result);
    1429             :     }
    1430             :   }
    1431             : 
    1432             :   // Re-apply any non-loop-dominating scale.
    1433        7575 :   if (PostLoopScale) {
    1434             :     assert(S->isAffine() && "Can't linearly scale non-affine recurrences.");
    1435           0 :     Result = InsertNoopCastOfTo(Result, IntTy);
    1436           0 :     Result = Builder.CreateMul(Result,
    1437             :                                expandCodeFor(PostLoopScale, IntTy));
    1438           0 :     rememberInstruction(Result);
    1439             :   }
    1440             : 
    1441             :   // Re-apply any non-loop-dominating offset.
    1442        7575 :   if (PostLoopOffset) {
    1443             :     if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
    1444           2 :       if (Result->getType()->isIntegerTy()) {
    1445           1 :         Value *Base = expandCodeFor(PostLoopOffset, ExpandTy);
    1446           1 :         const SCEV *const OffsetArray[1] = {SE.getUnknown(Result)};
    1447           1 :         Result = expandAddToGEP(OffsetArray, OffsetArray + 1, PTy, IntTy, Base);
    1448             :       } else {
    1449           0 :         const SCEV *const OffsetArray[1] = {PostLoopOffset};
    1450             :         Result =
    1451           0 :             expandAddToGEP(OffsetArray, OffsetArray + 1, PTy, IntTy, Result);
    1452             :       }
    1453             :     } else {
    1454           0 :       Result = InsertNoopCastOfTo(Result, IntTy);
    1455           0 :       Result = Builder.CreateAdd(Result,
    1456             :                                  expandCodeFor(PostLoopOffset, IntTy));
    1457           0 :       rememberInstruction(Result);
    1458             :     }
    1459             :   }
    1460             : 
    1461        7575 :   return Result;
    1462             : }
    1463             : 
    1464        7946 : Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
    1465        7946 :   if (!CanonicalMode) return expandAddRecExprLiterally(S);
    1466             : 
    1467         742 :   Type *Ty = SE.getEffectiveSCEVType(S->getType());
    1468         371 :   const Loop *L = S->getLoop();
    1469             : 
    1470             :   // First check for an existing canonical IV in a suitable type.
    1471             :   PHINode *CanonicalIV = nullptr;
    1472         371 :   if (PHINode *PN = L->getCanonicalInductionVariable())
    1473         227 :     if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
    1474             :       CanonicalIV = PN;
    1475             : 
    1476             :   // Rewrite an AddRec in terms of the canonical induction variable, if
    1477             :   // its type is more narrow.
    1478         215 :   if (CanonicalIV &&
    1479         215 :       SE.getTypeSizeInBits(CanonicalIV->getType()) >
    1480         215 :       SE.getTypeSizeInBits(Ty)) {
    1481          20 :     SmallVector<const SCEV *, 4> NewOps(S->getNumOperands());
    1482          30 :     for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
    1483          40 :       NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType());
    1484          20 :     Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
    1485          10 :                                        S->getNoWrapFlags(SCEV::FlagNW)));
    1486             :     BasicBlock::iterator NewInsertPt =
    1487          10 :         findInsertPointAfter(cast<Instruction>(V), Builder.GetInsertBlock());
    1488          10 :     V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr,
    1489             :                       &*NewInsertPt);
    1490             :     return V;
    1491             :   }
    1492             : 
    1493             :   // {X,+,F} --> X + {0,+,F}
    1494         722 :   if (!S->getStart()->isZero()) {
    1495         169 :     SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end());
    1496         338 :     NewOps[0] = SE.getConstant(Ty, 0);
    1497         169 :     const SCEV *Rest = SE.getAddRecExpr(NewOps, L,
    1498         169 :                                         S->getNoWrapFlags(SCEV::FlagNW));
    1499             : 
    1500             :     // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
    1501             :     // comments on expandAddToGEP for details.
    1502         338 :     const SCEV *Base = S->getStart();
    1503         169 :     const SCEV *RestArray[1] = { Rest };
    1504             :     // Dig into the expression to find the pointer base for a GEP.
    1505         169 :     ExposePointerBase(Base, RestArray[0], SE);
    1506             :     // If we found a pointer, expand the AddRec with a GEP.
    1507         169 :     if (PointerType *PTy = dyn_cast<PointerType>(Base->getType())) {
    1508             :       // Make sure the Base isn't something exotic, such as a multiplied
    1509             :       // or divided pointer value. In those cases, the result type isn't
    1510             :       // actually a pointer type.
    1511         130 :       if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) {
    1512          65 :         Value *StartV = expand(Base);
    1513             :         assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
    1514          65 :         return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV);
    1515             :       }
    1516             :     }
    1517             : 
    1518             :     // Just do a normal add. Pre-expand the operands to suppress folding.
    1519             :     //
    1520             :     // The LHS and RHS values are factored out of the expand call to make the
    1521             :     // output independent of the argument evaluation order.
    1522         208 :     const SCEV *AddExprLHS = SE.getUnknown(expand(S->getStart()));
    1523         104 :     const SCEV *AddExprRHS = SE.getUnknown(expand(Rest));
    1524         104 :     return expand(SE.getAddExpr(AddExprLHS, AddExprRHS));
    1525             :   }
    1526             : 
    1527             :   // If we don't yet have a canonical IV, create one.
    1528         192 :   if (!CanonicalIV) {
    1529             :     // Create and insert the PHI node for the induction variable in the
    1530             :     // specified loop.
    1531             :     BasicBlock *Header = L->getHeader();
    1532             :     pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
    1533         168 :     CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar",
    1534             :                                   &Header->front());
    1535          84 :     rememberInstruction(CanonicalIV);
    1536             : 
    1537             :     SmallSet<BasicBlock *, 4> PredSeen;
    1538          84 :     Constant *One = ConstantInt::get(Ty, 1);
    1539         340 :     for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
    1540             :       BasicBlock *HP = *HPI;
    1541         172 :       if (!PredSeen.insert(HP).second) {
    1542             :         // There must be an incoming value for each predecessor, even the
    1543             :         // duplicates!
    1544           0 :         CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP);
    1545           0 :         continue;
    1546             :       }
    1547             : 
    1548         172 :       if (L->contains(HP)) {
    1549             :         // Insert a unit add instruction right before the terminator
    1550             :         // corresponding to the back-edge.
    1551          88 :         Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
    1552             :                                                      "indvar.next",
    1553             :                                                      HP->getTerminator());
    1554          88 :         Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
    1555          88 :         rememberInstruction(Add);
    1556          88 :         CanonicalIV->addIncoming(Add, HP);
    1557             :       } else {
    1558          84 :         CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP);
    1559             :       }
    1560             :     }
    1561             :   }
    1562             : 
    1563             :   // {0,+,1} --> Insert a canonical induction variable into the loop!
    1564         384 :   if (S->isAffine() && S->getOperand(1)->isOne()) {
    1565             :     assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
    1566             :            "IVs with types different from the canonical IV should "
    1567             :            "already have been handled!");
    1568             :     return CanonicalIV;
    1569             :   }
    1570             : 
    1571             :   // {0,+,F} --> {0,+,1} * F
    1572             : 
    1573             :   // If this is a simple linear addrec, emit it now as a special case.
    1574          83 :   if (S->isAffine())    // {0,+,F} --> i*F
    1575             :     return
    1576         249 :       expand(SE.getTruncateOrNoop(
    1577          83 :         SE.getMulExpr(SE.getUnknown(CanonicalIV),
    1578             :                       SE.getNoopOrAnyExtend(S->getOperand(1),
    1579             :                                             CanonicalIV->getType())),
    1580          83 :         Ty));
    1581             : 
    1582             :   // If this is a chain of recurrences, turn it into a closed form, using the
    1583             :   // folders, then expandCodeFor the closed form.  This allows the folders to
    1584             :   // simplify the expression without having to build a bunch of special code
    1585             :   // into this folder.
    1586           0 :   const SCEV *IH = SE.getUnknown(CanonicalIV);   // Get I as a "symbolic" SCEV.
    1587             : 
    1588             :   // Promote S up to the canonical IV type, if the cast is foldable.
    1589             :   const SCEV *NewS = S;
    1590           0 :   const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType());
    1591           0 :   if (isa<SCEVAddRecExpr>(Ext))
    1592             :     NewS = Ext;
    1593             : 
    1594           0 :   const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
    1595             :   //cerr << "Evaluated: " << *this << "\n     to: " << *V << "\n";
    1596             : 
    1597             :   // Truncate the result down to the original type, if needed.
    1598           0 :   const SCEV *T = SE.getTruncateOrNoop(V, Ty);
    1599           0 :   return expand(T);
    1600             : }
    1601             : 
    1602         222 : Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
    1603         222 :   Type *Ty = SE.getEffectiveSCEVType(S->getType());
    1604         222 :   Value *V = expandCodeFor(S->getOperand(),
    1605         444 :                            SE.getEffectiveSCEVType(S->getOperand()->getType()));
    1606         444 :   Value *I = Builder.CreateTrunc(V, Ty);
    1607         222 :   rememberInstruction(I);
    1608         222 :   return I;
    1609             : }
    1610             : 
    1611         414 : Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
    1612         414 :   Type *Ty = SE.getEffectiveSCEVType(S->getType());
    1613         414 :   Value *V = expandCodeFor(S->getOperand(),
    1614         828 :                            SE.getEffectiveSCEVType(S->getOperand()->getType()));
    1615         828 :   Value *I = Builder.CreateZExt(V, Ty);
    1616         414 :   rememberInstruction(I);
    1617         414 :   return I;
    1618             : }
    1619             : 
    1620         151 : Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
    1621         151 :   Type *Ty = SE.getEffectiveSCEVType(S->getType());
    1622         151 :   Value *V = expandCodeFor(S->getOperand(),
    1623         302 :                            SE.getEffectiveSCEVType(S->getOperand()->getType()));
    1624         302 :   Value *I = Builder.CreateSExt(V, Ty);
    1625         151 :   rememberInstruction(I);
    1626         151 :   return I;
    1627             : }
    1628             : 
    1629         348 : Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
    1630         696 :   Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
    1631         348 :   Type *Ty = LHS->getType();
    1632         720 :   for (int i = S->getNumOperands()-2; i >= 0; --i) {
    1633             :     // In the case of mixed integer and pointer types, do the
    1634             :     // rest of the comparisons as integer.
    1635         744 :     if (S->getOperand(i)->getType() != Ty) {
    1636           0 :       Ty = SE.getEffectiveSCEVType(Ty);
    1637           0 :       LHS = InsertNoopCastOfTo(LHS, Ty);
    1638             :     }
    1639         744 :     Value *RHS = expandCodeFor(S->getOperand(i), Ty);
    1640         744 :     Value *ICmp = Builder.CreateICmpSGT(LHS, RHS);
    1641         372 :     rememberInstruction(ICmp);
    1642         372 :     Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
    1643         372 :     rememberInstruction(Sel);
    1644             :     LHS = Sel;
    1645             :   }
    1646             :   // In the case of mixed integer and pointer types, cast the
    1647             :   // final result back to the pointer type.
    1648         696 :   if (LHS->getType() != S->getType())
    1649           0 :     LHS = InsertNoopCastOfTo(LHS, S->getType());
    1650         348 :   return LHS;
    1651             : }
    1652             : 
    1653          92 : Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
    1654         184 :   Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
    1655          92 :   Type *Ty = LHS->getType();
    1656         184 :   for (int i = S->getNumOperands()-2; i >= 0; --i) {
    1657             :     // In the case of mixed integer and pointer types, do the
    1658             :     // rest of the comparisons as integer.
    1659         184 :     if (S->getOperand(i)->getType() != Ty) {
    1660           1 :       Ty = SE.getEffectiveSCEVType(Ty);
    1661           1 :       LHS = InsertNoopCastOfTo(LHS, Ty);
    1662             :     }
    1663         184 :     Value *RHS = expandCodeFor(S->getOperand(i), Ty);
    1664         184 :     Value *ICmp = Builder.CreateICmpUGT(LHS, RHS);
    1665          92 :     rememberInstruction(ICmp);
    1666          92 :     Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
    1667          92 :     rememberInstruction(Sel);
    1668             :     LHS = Sel;
    1669             :   }
    1670             :   // In the case of mixed integer and pointer types, cast the
    1671             :   // final result back to the pointer type.
    1672         184 :   if (LHS->getType() != S->getType())
    1673           1 :     LHS = InsertNoopCastOfTo(LHS, S->getType());
    1674          92 :   return LHS;
    1675             : }
    1676             : 
    1677       17926 : Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
    1678             :                                    Instruction *IP) {
    1679             :   setInsertPoint(IP);
    1680       17926 :   return expandCodeFor(SH, Ty);
    1681             : }
    1682             : 
    1683       84271 : Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
    1684             :   // Expand the code for this SCEV.
    1685       84271 :   Value *V = expand(SH);
    1686       84271 :   if (Ty) {
    1687             :     assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
    1688             :            "non-trivial casts should be done with the SCEVs directly!");
    1689       63787 :     V = InsertNoopCastOfTo(V, Ty);
    1690             :   }
    1691       84271 :   return V;
    1692             : }
    1693             : 
    1694             : ScalarEvolution::ValueOffsetPair
    1695       80028 : SCEVExpander::FindValueInExprValueMap(const SCEV *S,
    1696             :                                       const Instruction *InsertPt) {
    1697       80028 :   SetVector<ScalarEvolution::ValueOffsetPair> *Set = SE.getSCEVValues(S);
    1698             :   // If the expansion is not in CanonicalMode, and the SCEV contains any
    1699             :   // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
    1700       80028 :   if (CanonicalMode || !SE.containsAddRecurrence(S)) {
    1701             :     // If S is scConstant, it may be worse to reuse an existing Value.
    1702       72087 :     if (S->getSCEVType() != scConstant && Set) {
    1703             :       // Choose a Value from the set which dominates the insertPt.
    1704             :       // insertPt should be inside the Value's parent loop so as not to break
    1705             :       // the LCSSA form.
    1706       17702 :       for (auto const &VOPair : *Set) {
    1707       11792 :         Value *V = VOPair.first;
    1708       11792 :         ConstantInt *Offset = VOPair.second;
    1709             :         Instruction *EntInst = nullptr;
    1710       17823 :         if (V && isa<Instruction>(V) && (EntInst = cast<Instruction>(V)) &&
    1711       10788 :             S->getType() == V->getType() &&
    1712        9514 :             EntInst->getFunction() == InsertPt->getFunction() &&
    1713       17809 :             SE.DT.dominates(EntInst, InsertPt) &&
    1714        5737 :             (SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
    1715        1260 :              SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
    1716        4343 :           return {V, Offset};
    1717             :       }
    1718             :     }
    1719             :   }
    1720       75685 :   return {nullptr, nullptr};
    1721             : }
    1722             : 
    1723             : // The expansion of SCEV will either reuse a previous Value in ExprValueMap,
    1724             : // or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
    1725             : // and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
    1726             : // literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
    1727             : // the expansion will try to reuse Value from ExprValueMap, and only when it
    1728             : // fails, expand the SCEV literally.
    1729      108885 : Value *SCEVExpander::expand(const SCEV *S) {
    1730             :   // Compute an insertion point for this SCEV object. Hoist the instructions
    1731             :   // as far out in the loop nest as possible.
    1732             :   Instruction *InsertPt = &*Builder.GetInsertPoint();
    1733      138714 :   for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
    1734             :        L = L->getParentLoop())
    1735      138714 :     if (SE.isLoopInvariant(S, L)) {
    1736       65235 :       if (!L) break;
    1737       29829 :       if (BasicBlock *Preheader = L->getLoopPreheader())
    1738             :         InsertPt = Preheader->getTerminator();
    1739             :       else {
    1740             :         // LSR sets the insertion point for AddRec start/step values to the
    1741             :         // block start to simplify value reuse, even though it's an invalid
    1742             :         // position. SCEVExpander must correct for this in all cases.
    1743             :         InsertPt = &*L->getHeader()->getFirstInsertionPt();
    1744             :       }
    1745             :     } else {
    1746             :       // We can move insertion point only if there is no div or rem operations
    1747             :       // otherwise we are risky to move it over the check for zero denominator.
    1748             :       auto SafeToHoist = [](const SCEV *S) {
    1749       40618 :         return !SCEVExprContains(S, [](const SCEV *S) {
    1750             :                   if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
    1751         445 :                     if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
    1752             :                       // Division by non-zero constants can be hoisted.
    1753         431 :                       return SC->getValue()->isZero();
    1754             :                     // All other divisions should not be moved as they may be
    1755             :                     // divisions by zero and should be kept within the
    1756             :                     // conditions of the surrounding loops that guard their
    1757             :                     // execution (see PR35406).
    1758             :                     return true;
    1759             :                   }
    1760             :                   return false;
    1761             :                 });
    1762             :       };
    1763             :       // If the SCEV is computable at this level, insert it into the header
    1764             :       // after the PHIs (and after any other instructions that we've inserted
    1765             :       // there) so that it is guaranteed to dominate any user inside the loop.
    1766       83883 :       if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L) &&
    1767             :           SafeToHoist(S))
    1768             :         InsertPt = &*L->getHeader()->getFirstInsertionPt();
    1769      119570 :       while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
    1770       30222 :              (isInsertedInstruction(InsertPt) ||
    1771             :               isa<DbgInfoIntrinsic>(InsertPt))) {
    1772             :         InsertPt = &*std::next(InsertPt->getIterator());
    1773             :       }
    1774             :       break;
    1775       29829 :     }
    1776             : 
    1777             :   // Check to see if we already expanded this here.
    1778      108885 :   auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));
    1779      108885 :   if (I != InsertedExpressions.end())
    1780       31230 :     return I->second;
    1781             : 
    1782      155310 :   SCEVInsertPointGuard Guard(Builder, this);
    1783       77655 :   Builder.SetInsertPoint(InsertPt);
    1784             : 
    1785             :   // Expand the expression into instructions.
    1786       77655 :   ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, InsertPt);
    1787       77655 :   Value *V = VO.first;
    1788             : 
    1789       77655 :   if (!V)
    1790       73554 :     V = visit(S);
    1791        4101 :   else if (VO.second) {
    1792          52 :     if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) {
    1793           2 :       Type *Ety = Vty->getPointerElementType();
    1794             :       int64_t Offset = VO.second->getSExtValue();
    1795           2 :       int64_t ESize = SE.getTypeSizeInBits(Ety);
    1796           2 :       if ((Offset * 8) % ESize == 0) {
    1797             :         ConstantInt *Idx =
    1798           2 :             ConstantInt::getSigned(VO.second->getType(), -(Offset * 8) / ESize);
    1799           2 :         V = Builder.CreateGEP(Ety, V, Idx, "scevgep");
    1800             :       } else {
    1801             :         ConstantInt *Idx =
    1802           2 :             ConstantInt::getSigned(VO.second->getType(), -Offset);
    1803             :         unsigned AS = Vty->getAddressSpace();
    1804           4 :         V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS));
    1805           2 :         V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx,
    1806             :                               "uglygep");
    1807           1 :         V = Builder.CreateBitCast(V, Vty);
    1808             :       }
    1809             :     } else {
    1810         100 :       V = Builder.CreateSub(V, VO.second);
    1811             :     }
    1812             :   }
    1813             :   // Remember the expanded value for this SCEV at this location.
    1814             :   //
    1815             :   // This is independent of PostIncLoops. The mapped value simply materializes
    1816             :   // the expression at this insertion point. If the mapped value happened to be
    1817             :   // a postinc expansion, it could be reused by a non-postinc user, but only if
    1818             :   // its insertion point was already at the head of the loop.
    1819      155310 :   InsertedExpressions[std::make_pair(S, InsertPt)] = V;
    1820             :   return V;
    1821             : }
    1822             : 
    1823       37262 : void SCEVExpander::rememberInstruction(Value *I) {
    1824       37262 :   if (!PostIncLoops.empty())
    1825        2888 :     InsertedPostIncValues.insert(I);
    1826             :   else
    1827       34374 :     InsertedValues.insert(I);
    1828       37262 : }
    1829             : 
    1830             : /// getOrInsertCanonicalInductionVariable - This method returns the
    1831             : /// canonical induction variable of the specified type for the specified
    1832             : /// loop (inserting one if there is none).  A canonical induction variable
    1833             : /// starts at zero and steps by one on each iteration.
    1834             : PHINode *
    1835           0 : SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
    1836             :                                                     Type *Ty) {
    1837             :   assert(Ty->isIntegerTy() && "Can only insert integer induction variables!");
    1838             : 
    1839             :   // Build a SCEV for {0,+,1}<L>.
    1840             :   // Conservatively use FlagAnyWrap for now.
    1841           0 :   const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0),
    1842           0 :                                    SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap);
    1843             : 
    1844             :   // Emit code for it.
    1845           0 :   SCEVInsertPointGuard Guard(Builder, this);
    1846             :   PHINode *V =
    1847           0 :       cast<PHINode>(expandCodeFor(H, nullptr, &L->getHeader()->front()));
    1848             : 
    1849           0 :   return V;
    1850             : }
    1851             : 
    1852             : /// replaceCongruentIVs - Check for congruent phis in this loop header and
    1853             : /// replace them with their most canonical representative. Return the number of
    1854             : /// phis eliminated.
    1855             : ///
    1856             : /// This does not depend on any SCEVExpander state but should be used in
    1857             : /// the same context that SCEVExpander is used.
    1858             : unsigned
    1859       10930 : SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
    1860             :                                   SmallVectorImpl<WeakTrackingVH> &DeadInsts,
    1861             :                                   const TargetTransformInfo *TTI) {
    1862             :   // Find integer phis in order of increasing width.
    1863             :   SmallVector<PHINode*, 8> Phis;
    1864       10930 :   for (PHINode &PN : L->getHeader()->phis())
    1865       15915 :     Phis.push_back(&PN);
    1866             : 
    1867       10930 :   if (TTI)
    1868       12683 :     llvm::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) {
    1869             :       // Put pointers at the back and make sure pointer < pointer = false.
    1870       29816 :       if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
    1871       20639 :         return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
    1872        2982 :       return RHS->getType()->getPrimitiveSizeInBits() <
    1873        2982 :              LHS->getType()->getPrimitiveSizeInBits();
    1874             :     });
    1875             : 
    1876             :   unsigned NumElim = 0;
    1877             :   DenseMap<const SCEV *, PHINode *> ExprToIVMap;
    1878             :   // Process phis from wide to narrow. Map wide phis to their truncation
    1879             :   // so narrow phis can reuse them.
    1880       42760 :   for (PHINode *Phi : Phis) {
    1881       15915 :     auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
    1882       45726 :       if (Value *V = SimplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC}))
    1883             :         return V;
    1884       15905 :       if (!SE.isSCEVable(PN->getType()))
    1885             :         return nullptr;
    1886       13896 :       auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN));
    1887             :       if (!Const)
    1888             :         return nullptr;
    1889           4 :       return Const->getValue();
    1890       15915 :     };
    1891             : 
    1892             :     // Fold constant phis. They may be congruent to other constant phis and
    1893             :     // would confuse the logic below that expects proper IVs.
    1894       15928 :     if (Value *V = SimplifyPHINode(Phi)) {
    1895          14 :       if (V->getType() != Phi->getType())
    1896       15847 :         continue;
    1897          13 :       Phi->replaceAllUsesWith(V);
    1898          13 :       DeadInsts.emplace_back(Phi);
    1899          13 :       ++NumElim;
    1900             :       DEBUG_WITH_TYPE(DebugType, dbgs()
    1901             :                       << "INDVARS: Eliminated constant iv: " << *Phi << '\n');
    1902          13 :       continue;
    1903             :     }
    1904             : 
    1905       15901 :     if (!SE.isSCEVable(Phi->getType()))
    1906        2009 :       continue;
    1907             : 
    1908       27784 :     PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
    1909       13892 :     if (!OrigPhiRef) {
    1910       13821 :       OrigPhiRef = Phi;
    1911       33566 :       if (Phi->getType()->isIntegerTy() && TTI &&
    1912        5924 :           TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
    1913             :         // This phi can be freely truncated to the narrowest phi type. Map the
    1914             :         // truncated expression to it so it will be reused for narrow types.
    1915             :         const SCEV *TruncExpr =
    1916         361 :           SE.getTruncateExpr(SE.getSCEV(Phi), Phis.back()->getType());
    1917         361 :         ExprToIVMap[TruncExpr] = Phi;
    1918             :       }
    1919       13821 :       continue;
    1920             :     }
    1921             : 
    1922             :     // Replacing a pointer phi with an integer phi or vice-versa doesn't make
    1923             :     // sense.
    1924         213 :     if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
    1925           2 :       continue;
    1926             : 
    1927          69 :     if (BasicBlock *LatchBlock = L->getLoopLatch()) {
    1928          69 :       Instruction *OrigInc = dyn_cast<Instruction>(
    1929             :           OrigPhiRef->getIncomingValueForBlock(LatchBlock));
    1930             :       Instruction *IsomorphicInc =
    1931         138 :           dyn_cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
    1932             : 
    1933          69 :       if (OrigInc && IsomorphicInc) {
    1934             :         // If this phi has the same width but is more canonical, replace the
    1935             :         // original with it. As part of the "more canonical" determination,
    1936             :         // respect a prior decision to use an IV chain.
    1937          67 :         if (OrigPhiRef->getType() == Phi->getType() &&
    1938         111 :             !(ChainedPhis.count(Phi) ||
    1939         111 :               isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)) &&
    1940          73 :             (ChainedPhis.count(Phi) ||
    1941           4 :              isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) {
    1942             :           std::swap(OrigPhiRef, Phi);
    1943             :           std::swap(OrigInc, IsomorphicInc);
    1944             :         }
    1945             :         // Replacing the congruent phi is sufficient because acyclic
    1946             :         // redundancy elimination, CSE/GVN, should handle the
    1947             :         // rest. However, once SCEV proves that a phi is congruent,
    1948             :         // it's often the head of an IV user cycle that is isomorphic
    1949             :         // with the original phi. It's worth eagerly cleaning up the
    1950             :         // common case of a single IV increment so that DeleteDeadPHIs
    1951             :         // can remove cycles that had postinc uses.
    1952             :         const SCEV *TruncExpr =
    1953          67 :             SE.getTruncateOrNoop(SE.getSCEV(OrigInc), IsomorphicInc->getType());
    1954         132 :         if (OrigInc != IsomorphicInc &&
    1955         130 :             TruncExpr == SE.getSCEV(IsomorphicInc) &&
    1956         196 :             SE.LI.replacementPreservesLCSSAForm(IsomorphicInc, OrigInc) &&
    1957          64 :             hoistIVInc(OrigInc, IsomorphicInc)) {
    1958             :           DEBUG_WITH_TYPE(DebugType,
    1959             :                           dbgs() << "INDVARS: Eliminated congruent iv.inc: "
    1960             :                                  << *IsomorphicInc << '\n');
    1961             :           Value *NewInc = OrigInc;
    1962          53 :           if (OrigInc->getType() != IsomorphicInc->getType()) {
    1963             :             Instruction *IP = nullptr;
    1964             :             if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
    1965           1 :               IP = &*PN->getParent()->getFirstInsertionPt();
    1966             :             else
    1967             :               IP = OrigInc->getNextNode();
    1968             : 
    1969          19 :             IRBuilder<> Builder(IP);
    1970          38 :             Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
    1971          38 :             NewInc = Builder.CreateTruncOrBitCast(
    1972             :                 OrigInc, IsomorphicInc->getType(), IVName);
    1973             :           }
    1974          53 :           IsomorphicInc->replaceAllUsesWith(NewInc);
    1975          53 :           DeadInsts.emplace_back(IsomorphicInc);
    1976             :         }
    1977             :       }
    1978             :     }
    1979             :     DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv: "
    1980             :                                       << *Phi << '\n');
    1981          69 :     ++NumElim;
    1982          69 :     Value *NewIV = OrigPhiRef;
    1983          69 :     if (OrigPhiRef->getType() != Phi->getType()) {
    1984          23 :       IRBuilder<> Builder(&*L->getHeader()->getFirstInsertionPt());
    1985          46 :       Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
    1986          46 :       NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
    1987             :     }
    1988          69 :     Phi->replaceAllUsesWith(NewIV);
    1989          69 :     DeadInsts.emplace_back(Phi);
    1990             :   }
    1991       10930 :   return NumElim;
    1992             : }
    1993             : 
    1994         464 : Value *SCEVExpander::getExactExistingExpansion(const SCEV *S,
    1995             :                                                const Instruction *At, Loop *L) {
    1996             :   Optional<ScalarEvolution::ValueOffsetPair> VO =
    1997         464 :       getRelatedExistingExpansion(S, At, L);
    1998         464 :   if (VO && VO.getValue().second == nullptr)
    1999           6 :     return VO.getValue().first;
    2000             :   return nullptr;
    2001             : }
    2002             : 
    2003             : Optional<ScalarEvolution::ValueOffsetPair>
    2004        2393 : SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
    2005             :                                           Loop *L) {
    2006             :   using namespace llvm::PatternMatch;
    2007             : 
    2008             :   SmallVector<BasicBlock *, 4> ExitingBlocks;
    2009        2393 :   L->getExitingBlocks(ExitingBlocks);
    2010             : 
    2011             :   // Look for suitable value in simple conditions at the loop exits.
    2012        8175 :   for (BasicBlock *BB : ExitingBlocks) {
    2013             :     ICmpInst::Predicate Pred;
    2014             :     Instruction *LHS, *RHS;
    2015             :     BasicBlock *TrueBB, *FalseBB;
    2016             : 
    2017        2911 :     if (!match(BB->getTerminator(),
    2018        2911 :                m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
    2019             :                     TrueBB, FalseBB)))
    2020        2491 :       continue;
    2021             : 
    2022         420 :     if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
    2023           2 :       return ScalarEvolution::ValueOffsetPair(LHS, nullptr);
    2024             : 
    2025         418 :     if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
    2026          18 :       return ScalarEvolution::ValueOffsetPair(RHS, nullptr);
    2027             :   }
    2028             : 
    2029             :   // Use expand's logic which is used for reusing a previous Value in
    2030             :   // ExprValueMap.
    2031        2373 :   ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, At);
    2032        2373 :   if (VO.first)
    2033             :     return VO;
    2034             : 
    2035             :   // There is potential to make this significantly smarter, but this simple
    2036             :   // heuristic already gets some interesting cases.
    2037             : 
    2038             :   // Can not find suitable value.
    2039             :   return None;
    2040             : }
    2041             : 
    2042        4886 : bool SCEVExpander::isHighCostExpansionHelper(
    2043             :     const SCEV *S, Loop *L, const Instruction *At,
    2044             :     SmallPtrSetImpl<const SCEV *> &Processed) {
    2045             : 
    2046             :   // If we can find an existing value for this scev available at the point "At"
    2047             :   // then consider the expression cheap.
    2048        4886 :   if (At && getRelatedExistingExpansion(S, At, L))
    2049             :     return false;
    2050             : 
    2051             :   // Zero/One operand expressions
    2052        9264 :   switch (S->getSCEVType()) {
    2053             :   case scUnknown:
    2054             :   case scConstant:
    2055             :     return false;
    2056          15 :   case scTruncate:
    2057          15 :     return isHighCostExpansionHelper(cast<SCEVTruncateExpr>(S)->getOperand(),
    2058          15 :                                      L, At, Processed);
    2059          27 :   case scZeroExtend:
    2060          27 :     return isHighCostExpansionHelper(cast<SCEVZeroExtendExpr>(S)->getOperand(),
    2061          27 :                                      L, At, Processed);
    2062          11 :   case scSignExtend:
    2063          11 :     return isHighCostExpansionHelper(cast<SCEVSignExtendExpr>(S)->getOperand(),
    2064          11 :                                      L, At, Processed);
    2065             :   }
    2066             : 
    2067        1740 :   if (!Processed.insert(S).second)
    2068             :     return false;
    2069             : 
    2070             :   if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) {
    2071             :     // If the divisor is a power of two and the SCEV type fits in a native
    2072             :     // integer, consider the division cheap irrespective of whether it occurs in
    2073             :     // the user code since it can be lowered into a right shift.
    2074         339 :     if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS()))
    2075         336 :       if (SC->getAPInt().isPowerOf2()) {
    2076             :         const DataLayout &DL =
    2077         238 :             L->getHeader()->getParent()->getParent()->getDataLayout();
    2078             :         unsigned Width = cast<IntegerType>(UDivExpr->getType())->getBitWidth();
    2079         238 :         return DL.isIllegalInteger(Width);
    2080             :       }
    2081             : 
    2082             :     // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
    2083             :     // HowManyLessThans produced to compute a precise expression, rather than a
    2084             :     // UDiv from the user's code. If we can't find a UDiv in the code with some
    2085             :     // simple searching, assume the former consider UDivExpr expensive to
    2086             :     // compute.
    2087         101 :     BasicBlock *ExitingBB = L->getExitingBlock();
    2088         101 :     if (!ExitingBB)
    2089             :       return true;
    2090             : 
    2091             :     // At the beginning of this function we already tried to find existing value
    2092             :     // for plain 'S'. Now try to lookup 'S + 1' since it is common pattern
    2093             :     // involving division. This is just a simple search heuristic.
    2094         101 :     if (!At)
    2095             :       At = &ExitingBB->back();
    2096         202 :     if (!getRelatedExistingExpansion(
    2097         202 :             SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), At, L))
    2098             :       return true;
    2099             :   }
    2100             : 
    2101             :   // HowManyLessThans uses a Max expression whenever the loop is not guarded by
    2102             :   // the exit condition.
    2103        1402 :   if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
    2104             :     return true;
    2105             : 
    2106             :   // Recurse past nary expressions, which commonly occur in the
    2107             :   // BackedgeTakenCount. They may already exist in program code, and if not,
    2108             :   // they are not too expensive rematerialize.
    2109             :   if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S)) {
    2110        6148 :     for (auto *Op : NAry->operands())
    2111        2690 :       if (isHighCostExpansionHelper(Op, L, At, Processed))
    2112             :         return true;
    2113             :   }
    2114             : 
    2115             :   // If we haven't recognized an expensive SCEV pattern, assume it's an
    2116             :   // expression produced by program code.
    2117             :   return false;
    2118             : }
    2119             : 
    2120         910 : Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred,
    2121             :                                             Instruction *IP) {
    2122             :   assert(IP);
    2123         910 :   switch (Pred->getKind()) {
    2124             :   case SCEVPredicate::P_Union:
    2125         821 :     return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP);
    2126             :   case SCEVPredicate::P_Equal:
    2127          17 :     return expandEqualPredicate(cast<SCEVEqualPredicate>(Pred), IP);
    2128             :   case SCEVPredicate::P_Wrap: {
    2129             :     auto *AddRecPred = cast<SCEVWrapPredicate>(Pred);
    2130          72 :     return expandWrapPredicate(AddRecPred, IP);
    2131             :   }
    2132             :   }
    2133           0 :   llvm_unreachable("Unknown SCEV predicate type");
    2134             : }
    2135             : 
    2136          17 : Value *SCEVExpander::expandEqualPredicate(const SCEVEqualPredicate *Pred,
    2137             :                                           Instruction *IP) {
    2138          17 :   Value *Expr0 = expandCodeFor(Pred->getLHS(), Pred->getLHS()->getType(), IP);
    2139          17 :   Value *Expr1 = expandCodeFor(Pred->getRHS(), Pred->getRHS()->getType(), IP);
    2140             : 
    2141          17 :   Builder.SetInsertPoint(IP);
    2142          34 :   auto *I = Builder.CreateICmpNE(Expr0, Expr1, "ident.check");
    2143          17 :   return I;
    2144             : }
    2145             : 
    2146          72 : Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
    2147             :                                            Instruction *Loc, bool Signed) {
    2148             :   assert(AR->isAffine() && "Cannot generate RT check for "
    2149             :                            "non-affine expression");
    2150             : 
    2151         144 :   SCEVUnionPredicate Pred;
    2152             :   const SCEV *ExitCount =
    2153          72 :       SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred);
    2154             : 
    2155             :   assert(ExitCount != SE.getCouldNotCompute() && "Invalid loop count");
    2156             : 
    2157          72 :   const SCEV *Step = AR->getStepRecurrence(SE);
    2158          72 :   const SCEV *Start = AR->getStart();
    2159             : 
    2160          72 :   unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType());
    2161         144 :   unsigned DstBits = SE.getTypeSizeInBits(AR->getType());
    2162             : 
    2163             :   // The expression {Start,+,Step} has nusw/nssw if
    2164             :   //   Step < 0, Start - |Step| * Backedge <= Start
    2165             :   //   Step >= 0, Start + |Step| * Backedge > Start
    2166             :   // and |Step| * Backedge doesn't unsigned overflow.
    2167             : 
    2168          72 :   IntegerType *CountTy = IntegerType::get(Loc->getContext(), SrcBits);
    2169          72 :   Builder.SetInsertPoint(Loc);
    2170          72 :   Value *TripCountVal = expandCodeFor(ExitCount, CountTy, Loc);
    2171             : 
    2172             :   IntegerType *Ty =
    2173         144 :       IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(AR->getType()));
    2174             : 
    2175          72 :   Value *StepValue = expandCodeFor(Step, Ty, Loc);
    2176          72 :   Value *NegStepValue = expandCodeFor(SE.getNegativeSCEV(Step), Ty, Loc);
    2177          72 :   Value *StartValue = expandCodeFor(Start, Ty, Loc);
    2178             : 
    2179             :   ConstantInt *Zero =
    2180         144 :       ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits));
    2181             : 
    2182          72 :   Builder.SetInsertPoint(Loc);
    2183             :   // Compute |Step|
    2184         144 :   Value *StepCompare = Builder.CreateICmp(ICmpInst::ICMP_SLT, StepValue, Zero);
    2185          72 :   Value *AbsStep = Builder.CreateSelect(StepCompare, NegStepValue, StepValue);
    2186             : 
    2187             :   // Get the backedge taken count and truncate or extended to the AR type.
    2188          72 :   Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
    2189         144 :   auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
    2190          72 :                                          Intrinsic::umul_with_overflow, Ty);
    2191             : 
    2192             :   // Compute |Step| * Backedge
    2193         144 :   CallInst *Mul = Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
    2194         144 :   Value *MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
    2195         144 :   Value *OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
    2196             : 
    2197             :   // Compute:
    2198             :   //   Start + |Step| * Backedge < Start
    2199             :   //   Start - |Step| * Backedge > Start
    2200          72 :   Value *Add = Builder.CreateAdd(StartValue, MulV);
    2201          72 :   Value *Sub = Builder.CreateSub(StartValue, MulV);
    2202             : 
    2203         144 :   Value *EndCompareGT = Builder.CreateICmp(
    2204          72 :       Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
    2205             : 
    2206         144 :   Value *EndCompareLT = Builder.CreateICmp(
    2207          72 :       Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue);
    2208             : 
    2209             :   // Select the answer based on the sign of Step.
    2210             :   Value *EndCheck =
    2211          72 :       Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
    2212             : 
    2213             :   // If the backedge taken count type is larger than the AR type,
    2214             :   // check that we don't drop any bits by truncating it. If we are
    2215             :   // dropping bits, then we have overflow (unless the step is zero).
    2216          72 :   if (SE.getTypeSizeInBits(CountTy) > SE.getTypeSizeInBits(Ty)) {
    2217          98 :     auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits);
    2218             :     auto *BackedgeCheck =
    2219          49 :         Builder.CreateICmp(ICmpInst::ICMP_UGT, TripCountVal,
    2220          49 :                            ConstantInt::get(Loc->getContext(), MaxVal));
    2221          49 :     BackedgeCheck = Builder.CreateAnd(
    2222             :         BackedgeCheck, Builder.CreateICmp(ICmpInst::ICMP_NE, StepValue, Zero));
    2223             : 
    2224          49 :     EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck);
    2225             :   }
    2226             : 
    2227          72 :   EndCheck = Builder.CreateOr(EndCheck, OfMul);
    2228          72 :   return EndCheck;
    2229             : }
    2230             : 
    2231          72 : Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred,
    2232             :                                          Instruction *IP) {
    2233          72 :   const auto *A = cast<SCEVAddRecExpr>(Pred->getExpr());
    2234             :   Value *NSSWCheck = nullptr, *NUSWCheck = nullptr;
    2235             : 
    2236             :   // Add a check for NUSW
    2237          72 :   if (Pred->getFlags() & SCEVWrapPredicate::IncrementNUSW)
    2238          39 :     NUSWCheck = generateOverflowCheck(A, IP, false);
    2239             : 
    2240             :   // Add a check for NSSW
    2241          72 :   if (Pred->getFlags() & SCEVWrapPredicate::IncrementNSSW)
    2242          33 :     NSSWCheck = generateOverflowCheck(A, IP, true);
    2243             : 
    2244          72 :   if (NUSWCheck && NSSWCheck)
    2245           0 :     return Builder.CreateOr(NUSWCheck, NSSWCheck);
    2246             : 
    2247          72 :   if (NUSWCheck)
    2248             :     return NUSWCheck;
    2249             : 
    2250          33 :   if (NSSWCheck)
    2251             :     return NSSWCheck;
    2252             : 
    2253           0 :   return ConstantInt::getFalse(IP->getContext());
    2254             : }
    2255             : 
    2256         821 : Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union,
    2257             :                                           Instruction *IP) {
    2258         821 :   auto *BoolType = IntegerType::get(IP->getContext(), 1);
    2259         821 :   Value *Check = ConstantInt::getNullValue(BoolType);
    2260             : 
    2261             :   // Loop over all checks in this set.
    2262         999 :   for (auto Pred : Union->getPredicates()) {
    2263          89 :     auto *NextCheck = expandCodeForPredicate(Pred, IP);
    2264          89 :     Builder.SetInsertPoint(IP);
    2265         178 :     Check = Builder.CreateOr(Check, NextCheck);
    2266             :   }
    2267             : 
    2268         821 :   return Check;
    2269             : }
    2270             : 
    2271             : namespace {
    2272             : // Search for a SCEV subexpression that is not safe to expand.  Any expression
    2273             : // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
    2274             : // UDiv expressions. We don't know if the UDiv is derived from an IR divide
    2275             : // instruction, but the important thing is that we prove the denominator is
    2276             : // nonzero before expansion.
    2277             : //
    2278             : // IVUsers already checks that IV-derived expressions are safe. So this check is
    2279             : // only needed when the expression includes some subexpression that is not IV
    2280             : // derived.
    2281             : //
    2282             : // Currently, we only allow division by a nonzero constant here. If this is
    2283             : // inadequate, we could easily allow division by SCEVUnknown by using
    2284             : // ValueTracking to check isKnownNonZero().
    2285             : //
    2286             : // We cannot generally expand recurrences unless the step dominates the loop
    2287             : // header. The expander handles the special case of affine recurrences by
    2288             : // scaling the recurrence outside the loop, but this technique isn't generally
    2289             : // applicable. Expanding a nested recurrence outside a loop requires computing
    2290             : // binomial coefficients. This could be done, but the recurrence has to be in a
    2291             : // perfectly reduced form, which can't be guaranteed.
    2292             : struct SCEVFindUnsafe {
    2293             :   ScalarEvolution &SE;
    2294             :   bool IsUnsafe;
    2295             : 
    2296       12309 :   SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {}
    2297             : 
    2298       45447 :   bool follow(const SCEV *S) {
    2299             :     if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
    2300         601 :       const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
    2301        1138 :       if (!SC || SC->getValue()->isZero()) {
    2302          32 :         IsUnsafe = true;
    2303          32 :         return false;
    2304             :       }
    2305             :     }
    2306             :     if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
    2307        9490 :       const SCEV *Step = AR->getStepRecurrence(SE);
    2308        9496 :       if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) {
    2309           0 :         IsUnsafe = true;
    2310           0 :         return false;
    2311             :       }
    2312             :     }
    2313             :     return true;
    2314             :   }
    2315             :   bool isDone() const { return IsUnsafe; }
    2316             : };
    2317             : }
    2318             : 
    2319             : namespace llvm {
    2320       12309 : bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) {
    2321             :   SCEVFindUnsafe Search(SE);
    2322       12309 :   visitAll(S, Search);
    2323       12309 :   return !Search.IsUnsafe;
    2324             : }
    2325             : 
    2326         189 : bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint,
    2327             :                       ScalarEvolution &SE) {
    2328         189 :   return isSafeToExpand(S, SE) && SE.dominates(S, InsertionPoint->getParent());
    2329             : }
    2330             : }

Generated by: LCOV version 1.13