LLVM API Documentation

CodeGenPrepare.cpp
Go to the documentation of this file.
00001 //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This pass munges the code in the input function to better prepare it for
00011 // SelectionDAG-based code generation. This works around limitations in it's
00012 // basic-block-at-a-time approach. It should eventually be removed.
00013 //
00014 //===----------------------------------------------------------------------===//
00015 
00016 #include "llvm/CodeGen/Passes.h"
00017 #include "llvm/ADT/DenseMap.h"
00018 #include "llvm/ADT/SmallSet.h"
00019 #include "llvm/ADT/Statistic.h"
00020 #include "llvm/Analysis/InstructionSimplify.h"
00021 #include "llvm/Analysis/TargetLibraryInfo.h"
00022 #include "llvm/Analysis/TargetTransformInfo.h"
00023 #include "llvm/IR/CallSite.h"
00024 #include "llvm/IR/Constants.h"
00025 #include "llvm/IR/DataLayout.h"
00026 #include "llvm/IR/DerivedTypes.h"
00027 #include "llvm/IR/Dominators.h"
00028 #include "llvm/IR/Function.h"
00029 #include "llvm/IR/GetElementPtrTypeIterator.h"
00030 #include "llvm/IR/IRBuilder.h"
00031 #include "llvm/IR/InlineAsm.h"
00032 #include "llvm/IR/Instructions.h"
00033 #include "llvm/IR/IntrinsicInst.h"
00034 #include "llvm/IR/MDBuilder.h"
00035 #include "llvm/IR/PatternMatch.h"
00036 #include "llvm/IR/Statepoint.h"
00037 #include "llvm/IR/ValueHandle.h"
00038 #include "llvm/IR/ValueMap.h"
00039 #include "llvm/Pass.h"
00040 #include "llvm/Support/CommandLine.h"
00041 #include "llvm/Support/Debug.h"
00042 #include "llvm/Support/raw_ostream.h"
00043 #include "llvm/Target/TargetLowering.h"
00044 #include "llvm/Target/TargetSubtargetInfo.h"
00045 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
00046 #include "llvm/Transforms/Utils/BuildLibCalls.h"
00047 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
00048 #include "llvm/Transforms/Utils/Local.h"
00049 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
00050 using namespace llvm;
00051 using namespace llvm::PatternMatch;
00052 
00053 #define DEBUG_TYPE "codegenprepare"
00054 
00055 STATISTIC(NumBlocksElim, "Number of blocks eliminated");
00056 STATISTIC(NumPHIsElim,   "Number of trivial PHIs eliminated");
00057 STATISTIC(NumGEPsElim,   "Number of GEPs converted to casts");
00058 STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
00059                       "sunken Cmps");
00060 STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
00061                        "of sunken Casts");
00062 STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
00063                           "computations were sunk");
00064 STATISTIC(NumExtsMoved,  "Number of [s|z]ext instructions combined with loads");
00065 STATISTIC(NumExtUses,    "Number of uses of [s|z]ext instructions optimized");
00066 STATISTIC(NumRetsDup,    "Number of return instructions duplicated");
00067 STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
00068 STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
00069 STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches");
00070 STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
00071 
00072 static cl::opt<bool> DisableBranchOpts(
00073   "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
00074   cl::desc("Disable branch optimizations in CodeGenPrepare"));
00075 
00076 static cl::opt<bool>
00077     DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
00078                   cl::desc("Disable GC optimizations in CodeGenPrepare"));
00079 
00080 static cl::opt<bool> DisableSelectToBranch(
00081   "disable-cgp-select2branch", cl::Hidden, cl::init(false),
00082   cl::desc("Disable select to branch conversion."));
00083 
00084 static cl::opt<bool> AddrSinkUsingGEPs(
00085   "addr-sink-using-gep", cl::Hidden, cl::init(false),
00086   cl::desc("Address sinking in CGP using GEPs."));
00087 
00088 static cl::opt<bool> EnableAndCmpSinking(
00089    "enable-andcmp-sinking", cl::Hidden, cl::init(true),
00090    cl::desc("Enable sinkinig and/cmp into branches."));
00091 
00092 static cl::opt<bool> DisableStoreExtract(
00093     "disable-cgp-store-extract", cl::Hidden, cl::init(false),
00094     cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
00095 
00096 static cl::opt<bool> StressStoreExtract(
00097     "stress-cgp-store-extract", cl::Hidden, cl::init(false),
00098     cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
00099 
00100 static cl::opt<bool> DisableExtLdPromotion(
00101     "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
00102     cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
00103              "CodeGenPrepare"));
00104 
00105 static cl::opt<bool> StressExtLdPromotion(
00106     "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
00107     cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
00108              "optimization in CodeGenPrepare"));
00109 
00110 namespace {
00111 typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
00112 struct TypeIsSExt {
00113   Type *Ty;
00114   bool IsSExt;
00115   TypeIsSExt(Type *Ty, bool IsSExt) : Ty(Ty), IsSExt(IsSExt) {}
00116 };
00117 typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
00118 class TypePromotionTransaction;
00119 
00120   class CodeGenPrepare : public FunctionPass {
00121     /// TLI - Keep a pointer of a TargetLowering to consult for determining
00122     /// transformation profitability.
00123     const TargetMachine *TM;
00124     const TargetLowering *TLI;
00125     const TargetTransformInfo *TTI;
00126     const TargetLibraryInfo *TLInfo;
00127     DominatorTree *DT;
00128 
00129     /// CurInstIterator - As we scan instructions optimizing them, this is the
00130     /// next instruction to optimize.  Xforms that can invalidate this should
00131     /// update it.
00132     BasicBlock::iterator CurInstIterator;
00133 
00134     /// Keeps track of non-local addresses that have been sunk into a block.
00135     /// This allows us to avoid inserting duplicate code for blocks with
00136     /// multiple load/stores of the same address.
00137     ValueMap<Value*, Value*> SunkAddrs;
00138 
00139     /// Keeps track of all truncates inserted for the current function.
00140     SetOfInstrs InsertedTruncsSet;
00141     /// Keeps track of the type of the related instruction before their
00142     /// promotion for the current function.
00143     InstrToOrigTy PromotedInsts;
00144 
00145     /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
00146     /// be updated.
00147     bool ModifiedDT;
00148 
00149     /// OptSize - True if optimizing for size.
00150     bool OptSize;
00151 
00152   public:
00153     static char ID; // Pass identification, replacement for typeid
00154     explicit CodeGenPrepare(const TargetMachine *TM = nullptr)
00155         : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr) {
00156         initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
00157       }
00158     bool runOnFunction(Function &F) override;
00159 
00160     const char *getPassName() const override { return "CodeGen Prepare"; }
00161 
00162     void getAnalysisUsage(AnalysisUsage &AU) const override {
00163       AU.addPreserved<DominatorTreeWrapperPass>();
00164       AU.addRequired<TargetLibraryInfoWrapperPass>();
00165       AU.addRequired<TargetTransformInfoWrapperPass>();
00166     }
00167 
00168   private:
00169     bool EliminateFallThrough(Function &F);
00170     bool EliminateMostlyEmptyBlocks(Function &F);
00171     bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
00172     void EliminateMostlyEmptyBlock(BasicBlock *BB);
00173     bool OptimizeBlock(BasicBlock &BB, bool& ModifiedDT);
00174     bool OptimizeInst(Instruction *I, bool& ModifiedDT);
00175     bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
00176     bool OptimizeInlineAsmInst(CallInst *CS);
00177     bool OptimizeCallInst(CallInst *CI, bool& ModifiedDT);
00178     bool MoveExtToFormExtLoad(Instruction *&I);
00179     bool OptimizeExtUses(Instruction *I);
00180     bool OptimizeSelectInst(SelectInst *SI);
00181     bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI);
00182     bool OptimizeExtractElementInst(Instruction *Inst);
00183     bool DupRetToEnableTailCallOpts(BasicBlock *BB);
00184     bool PlaceDbgValues(Function &F);
00185     bool sinkAndCmp(Function &F);
00186     bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
00187                         Instruction *&Inst,
00188                         const SmallVectorImpl<Instruction *> &Exts,
00189                         unsigned CreatedInst);
00190     bool splitBranchCondition(Function &F);
00191     bool simplifyOffsetableRelocate(Instruction &I);
00192   };
00193 }
00194 
00195 char CodeGenPrepare::ID = 0;
00196 INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare",
00197                    "Optimize for code generation", false, false)
00198 
00199 FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
00200   return new CodeGenPrepare(TM);
00201 }
00202 
00203 bool CodeGenPrepare::runOnFunction(Function &F) {
00204   if (skipOptnoneFunction(F))
00205     return false;
00206 
00207   bool EverMadeChange = false;
00208   // Clear per function information.
00209   InsertedTruncsSet.clear();
00210   PromotedInsts.clear();
00211 
00212   ModifiedDT = false;
00213   if (TM)
00214     TLI = TM->getSubtargetImpl(F)->getTargetLowering();
00215   TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
00216   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI();
00217   DominatorTreeWrapperPass *DTWP =
00218       getAnalysisIfAvailable<DominatorTreeWrapperPass>();
00219   DT = DTWP ? &DTWP->getDomTree() : nullptr;
00220   OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
00221                                            Attribute::OptimizeForSize);
00222 
00223   /// This optimization identifies DIV instructions that can be
00224   /// profitably bypassed and carried out with a shorter, faster divide.
00225   if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
00226     const DenseMap<unsigned int, unsigned int> &BypassWidths =
00227        TLI->getBypassSlowDivWidths();
00228     for (Function::iterator I = F.begin(); I != F.end(); I++)
00229       EverMadeChange |= bypassSlowDivision(F, I, BypassWidths);
00230   }
00231 
00232   // Eliminate blocks that contain only PHI nodes and an
00233   // unconditional branch.
00234   EverMadeChange |= EliminateMostlyEmptyBlocks(F);
00235 
00236   // llvm.dbg.value is far away from the value then iSel may not be able
00237   // handle it properly. iSel will drop llvm.dbg.value if it can not
00238   // find a node corresponding to the value.
00239   EverMadeChange |= PlaceDbgValues(F);
00240 
00241   // If there is a mask, compare against zero, and branch that can be combined
00242   // into a single target instruction, push the mask and compare into branch
00243   // users. Do this before OptimizeBlock -> OptimizeInst ->
00244   // OptimizeCmpExpression, which perturbs the pattern being searched for.
00245   if (!DisableBranchOpts) {
00246     EverMadeChange |= sinkAndCmp(F);
00247     EverMadeChange |= splitBranchCondition(F);
00248   }
00249 
00250   bool MadeChange = true;
00251   while (MadeChange) {
00252     MadeChange = false;
00253     for (Function::iterator I = F.begin(); I != F.end(); ) {
00254       BasicBlock *BB = I++;
00255       bool ModifiedDTOnIteration = false;
00256       MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration);
00257 
00258       // Restart BB iteration if the dominator tree of the Function was changed
00259       ModifiedDT |= ModifiedDTOnIteration;
00260       if (ModifiedDTOnIteration)
00261         break;
00262     }
00263     EverMadeChange |= MadeChange;
00264   }
00265 
00266   SunkAddrs.clear();
00267 
00268   if (!DisableBranchOpts) {
00269     MadeChange = false;
00270     SmallPtrSet<BasicBlock*, 8> WorkList;
00271     for (BasicBlock &BB : F) {
00272       SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));
00273       MadeChange |= ConstantFoldTerminator(&BB, true);
00274       if (!MadeChange) continue;
00275 
00276       for (SmallVectorImpl<BasicBlock*>::iterator
00277              II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
00278         if (pred_begin(*II) == pred_end(*II))
00279           WorkList.insert(*II);
00280     }
00281 
00282     // Delete the dead blocks and any of their dead successors.
00283     MadeChange |= !WorkList.empty();
00284     while (!WorkList.empty()) {
00285       BasicBlock *BB = *WorkList.begin();
00286       WorkList.erase(BB);
00287       SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
00288 
00289       DeleteDeadBlock(BB);
00290 
00291       for (SmallVectorImpl<BasicBlock*>::iterator
00292              II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
00293         if (pred_begin(*II) == pred_end(*II))
00294           WorkList.insert(*II);
00295     }
00296 
00297     // Merge pairs of basic blocks with unconditional branches, connected by
00298     // a single edge.
00299     if (EverMadeChange || MadeChange)
00300       MadeChange |= EliminateFallThrough(F);
00301 
00302     if (MadeChange)
00303       ModifiedDT = true;
00304     EverMadeChange |= MadeChange;
00305   }
00306 
00307   if (!DisableGCOpts) {
00308     SmallVector<Instruction *, 2> Statepoints;
00309     for (BasicBlock &BB : F)
00310       for (Instruction &I : BB)
00311         if (isStatepoint(I))
00312           Statepoints.push_back(&I);
00313     for (auto &I : Statepoints)
00314       EverMadeChange |= simplifyOffsetableRelocate(*I);
00315   }
00316 
00317   if (ModifiedDT && DT)
00318     DT->recalculate(F);
00319 
00320   return EverMadeChange;
00321 }
00322 
00323 /// EliminateFallThrough - Merge basic blocks which are connected
00324 /// by a single edge, where one of the basic blocks has a single successor
00325 /// pointing to the other basic block, which has a single predecessor.
00326 bool CodeGenPrepare::EliminateFallThrough(Function &F) {
00327   bool Changed = false;
00328   // Scan all of the blocks in the function, except for the entry block.
00329   for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
00330     BasicBlock *BB = I++;
00331     // If the destination block has a single pred, then this is a trivial
00332     // edge, just collapse it.
00333     BasicBlock *SinglePred = BB->getSinglePredecessor();
00334 
00335     // Don't merge if BB's address is taken.
00336     if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
00337 
00338     BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
00339     if (Term && !Term->isConditional()) {
00340       Changed = true;
00341       DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n");
00342       // Remember if SinglePred was the entry block of the function.
00343       // If so, we will need to move BB back to the entry position.
00344       bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
00345       MergeBasicBlockIntoOnlyPred(BB, DT);
00346 
00347       if (isEntry && BB != &BB->getParent()->getEntryBlock())
00348         BB->moveBefore(&BB->getParent()->getEntryBlock());
00349 
00350       // We have erased a block. Update the iterator.
00351       I = BB;
00352     }
00353   }
00354   return Changed;
00355 }
00356 
00357 /// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
00358 /// debug info directives, and an unconditional branch.  Passes before isel
00359 /// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
00360 /// isel.  Start by eliminating these blocks so we can split them the way we
00361 /// want them.
00362 bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
00363   bool MadeChange = false;
00364   // Note that this intentionally skips the entry block.
00365   for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
00366     BasicBlock *BB = I++;
00367 
00368     // If this block doesn't end with an uncond branch, ignore it.
00369     BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
00370     if (!BI || !BI->isUnconditional())
00371       continue;
00372 
00373     // If the instruction before the branch (skipping debug info) isn't a phi
00374     // node, then other stuff is happening here.
00375     BasicBlock::iterator BBI = BI;
00376     if (BBI != BB->begin()) {
00377       --BBI;
00378       while (isa<DbgInfoIntrinsic>(BBI)) {
00379         if (BBI == BB->begin())
00380           break;
00381         --BBI;
00382       }
00383       if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
00384         continue;
00385     }
00386 
00387     // Do not break infinite loops.
00388     BasicBlock *DestBB = BI->getSuccessor(0);
00389     if (DestBB == BB)
00390       continue;
00391 
00392     if (!CanMergeBlocks(BB, DestBB))
00393       continue;
00394 
00395     EliminateMostlyEmptyBlock(BB);
00396     MadeChange = true;
00397   }
00398   return MadeChange;
00399 }
00400 
00401 /// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
00402 /// single uncond branch between them, and BB contains no other non-phi
00403 /// instructions.
00404 bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
00405                                     const BasicBlock *DestBB) const {
00406   // We only want to eliminate blocks whose phi nodes are used by phi nodes in
00407   // the successor.  If there are more complex condition (e.g. preheaders),
00408   // don't mess around with them.
00409   BasicBlock::const_iterator BBI = BB->begin();
00410   while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
00411     for (const User *U : PN->users()) {
00412       const Instruction *UI = cast<Instruction>(U);
00413       if (UI->getParent() != DestBB || !isa<PHINode>(UI))
00414         return false;
00415       // If User is inside DestBB block and it is a PHINode then check
00416       // incoming value. If incoming value is not from BB then this is
00417       // a complex condition (e.g. preheaders) we want to avoid here.
00418       if (UI->getParent() == DestBB) {
00419         if (const PHINode *UPN = dyn_cast<PHINode>(UI))
00420           for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
00421             Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
00422             if (Insn && Insn->getParent() == BB &&
00423                 Insn->getParent() != UPN->getIncomingBlock(I))
00424               return false;
00425           }
00426       }
00427     }
00428   }
00429 
00430   // If BB and DestBB contain any common predecessors, then the phi nodes in BB
00431   // and DestBB may have conflicting incoming values for the block.  If so, we
00432   // can't merge the block.
00433   const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
00434   if (!DestBBPN) return true;  // no conflict.
00435 
00436   // Collect the preds of BB.
00437   SmallPtrSet<const BasicBlock*, 16> BBPreds;
00438   if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
00439     // It is faster to get preds from a PHI than with pred_iterator.
00440     for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
00441       BBPreds.insert(BBPN->getIncomingBlock(i));
00442   } else {
00443     BBPreds.insert(pred_begin(BB), pred_end(BB));
00444   }
00445 
00446   // Walk the preds of DestBB.
00447   for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
00448     BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
00449     if (BBPreds.count(Pred)) {   // Common predecessor?
00450       BBI = DestBB->begin();
00451       while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
00452         const Value *V1 = PN->getIncomingValueForBlock(Pred);
00453         const Value *V2 = PN->getIncomingValueForBlock(BB);
00454 
00455         // If V2 is a phi node in BB, look up what the mapped value will be.
00456         if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
00457           if (V2PN->getParent() == BB)
00458             V2 = V2PN->getIncomingValueForBlock(Pred);
00459 
00460         // If there is a conflict, bail out.
00461         if (V1 != V2) return false;
00462       }
00463     }
00464   }
00465 
00466   return true;
00467 }
00468 
00469 
00470 /// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
00471 /// an unconditional branch in it.
00472 void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
00473   BranchInst *BI = cast<BranchInst>(BB->getTerminator());
00474   BasicBlock *DestBB = BI->getSuccessor(0);
00475 
00476   DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
00477 
00478   // If the destination block has a single pred, then this is a trivial edge,
00479   // just collapse it.
00480   if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
00481     if (SinglePred != DestBB) {
00482       // Remember if SinglePred was the entry block of the function.  If so, we
00483       // will need to move BB back to the entry position.
00484       bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
00485       MergeBasicBlockIntoOnlyPred(DestBB, DT);
00486 
00487       if (isEntry && BB != &BB->getParent()->getEntryBlock())
00488         BB->moveBefore(&BB->getParent()->getEntryBlock());
00489 
00490       DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
00491       return;
00492     }
00493   }
00494 
00495   // Otherwise, we have multiple predecessors of BB.  Update the PHIs in DestBB
00496   // to handle the new incoming edges it is about to have.
00497   PHINode *PN;
00498   for (BasicBlock::iterator BBI = DestBB->begin();
00499        (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
00500     // Remove the incoming value for BB, and remember it.
00501     Value *InVal = PN->removeIncomingValue(BB, false);
00502 
00503     // Two options: either the InVal is a phi node defined in BB or it is some
00504     // value that dominates BB.
00505     PHINode *InValPhi = dyn_cast<PHINode>(InVal);
00506     if (InValPhi && InValPhi->getParent() == BB) {
00507       // Add all of the input values of the input PHI as inputs of this phi.
00508       for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
00509         PN->addIncoming(InValPhi->getIncomingValue(i),
00510                         InValPhi->getIncomingBlock(i));
00511     } else {
00512       // Otherwise, add one instance of the dominating value for each edge that
00513       // we will be adding.
00514       if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
00515         for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
00516           PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
00517       } else {
00518         for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
00519           PN->addIncoming(InVal, *PI);
00520       }
00521     }
00522   }
00523 
00524   // The PHIs are now updated, change everything that refers to BB to use
00525   // DestBB and remove BB.
00526   BB->replaceAllUsesWith(DestBB);
00527   if (DT && !ModifiedDT) {
00528     BasicBlock *BBIDom  = DT->getNode(BB)->getIDom()->getBlock();
00529     BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
00530     BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
00531     DT->changeImmediateDominator(DestBB, NewIDom);
00532     DT->eraseNode(BB);
00533   }
00534   BB->eraseFromParent();
00535   ++NumBlocksElim;
00536 
00537   DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
00538 }
00539 
00540 // Computes a map of base pointer relocation instructions to corresponding
00541 // derived pointer relocation instructions given a vector of all relocate calls
00542 static void computeBaseDerivedRelocateMap(
00543     const SmallVectorImpl<User *> &AllRelocateCalls,
00544     DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> &
00545         RelocateInstMap) {
00546   // Collect information in two maps: one primarily for locating the base object
00547   // while filling the second map; the second map is the final structure holding
00548   // a mapping between Base and corresponding Derived relocate calls
00549   DenseMap<std::pair<unsigned, unsigned>, IntrinsicInst *> RelocateIdxMap;
00550   for (auto &U : AllRelocateCalls) {
00551     GCRelocateOperands ThisRelocate(U);
00552     IntrinsicInst *I = cast<IntrinsicInst>(U);
00553     auto K = std::make_pair(ThisRelocate.basePtrIndex(),
00554                             ThisRelocate.derivedPtrIndex());
00555     RelocateIdxMap.insert(std::make_pair(K, I));
00556   }
00557   for (auto &Item : RelocateIdxMap) {
00558     std::pair<unsigned, unsigned> Key = Item.first;
00559     if (Key.first == Key.second)
00560       // Base relocation: nothing to insert
00561       continue;
00562 
00563     IntrinsicInst *I = Item.second;
00564     auto BaseKey = std::make_pair(Key.first, Key.first);
00565     IntrinsicInst *Base = RelocateIdxMap[BaseKey];
00566     if (!Base)
00567       // TODO: We might want to insert a new base object relocate and gep off
00568       // that, if there are enough derived object relocates.
00569       continue;
00570     RelocateInstMap[Base].push_back(I);
00571   }
00572 }
00573 
00574 // Accepts a GEP and extracts the operands into a vector provided they're all
00575 // small integer constants
00576 static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
00577                                           SmallVectorImpl<Value *> &OffsetV) {
00578   for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
00579     // Only accept small constant integer operands
00580     auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
00581     if (!Op || Op->getZExtValue() > 20)
00582       return false;
00583   }
00584 
00585   for (unsigned i = 1; i < GEP->getNumOperands(); i++)
00586     OffsetV.push_back(GEP->getOperand(i));
00587   return true;
00588 }
00589 
00590 // Takes a RelocatedBase (base pointer relocation instruction) and Targets to
00591 // replace, computes a replacement, and affects it.
00592 static bool
00593 simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
00594                           const SmallVectorImpl<IntrinsicInst *> &Targets) {
00595   bool MadeChange = false;
00596   for (auto &ToReplace : Targets) {
00597     GCRelocateOperands MasterRelocate(RelocatedBase);
00598     GCRelocateOperands ThisRelocate(ToReplace);
00599 
00600     assert(ThisRelocate.basePtrIndex() == MasterRelocate.basePtrIndex() &&
00601            "Not relocating a derived object of the original base object");
00602     if (ThisRelocate.basePtrIndex() == ThisRelocate.derivedPtrIndex()) {
00603       // A duplicate relocate call. TODO: coalesce duplicates.
00604       continue;
00605     }
00606 
00607     Value *Base = ThisRelocate.basePtr();
00608     auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.derivedPtr());
00609     if (!Derived || Derived->getPointerOperand() != Base)
00610       continue;
00611 
00612     SmallVector<Value *, 2> OffsetV;
00613     if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
00614       continue;
00615 
00616     // Create a Builder and replace the target callsite with a gep
00617     IRBuilder<> Builder(ToReplace);
00618     Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
00619     Value *Replacement =
00620         Builder.CreateGEP(RelocatedBase, makeArrayRef(OffsetV));
00621     Instruction *ReplacementInst = cast<Instruction>(Replacement);
00622     ReplacementInst->removeFromParent();
00623     ReplacementInst->insertAfter(RelocatedBase);
00624     Replacement->takeName(ToReplace);
00625     ToReplace->replaceAllUsesWith(Replacement);
00626     ToReplace->eraseFromParent();
00627 
00628     MadeChange = true;
00629   }
00630   return MadeChange;
00631 }
00632 
00633 // Turns this:
00634 //
00635 // %base = ...
00636 // %ptr = gep %base + 15
00637 // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
00638 // %base' = relocate(%tok, i32 4, i32 4)
00639 // %ptr' = relocate(%tok, i32 4, i32 5)
00640 // %val = load %ptr'
00641 //
00642 // into this:
00643 //
00644 // %base = ...
00645 // %ptr = gep %base + 15
00646 // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
00647 // %base' = gc.relocate(%tok, i32 4, i32 4)
00648 // %ptr' = gep %base' + 15
00649 // %val = load %ptr'
00650 bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
00651   bool MadeChange = false;
00652   SmallVector<User *, 2> AllRelocateCalls;
00653 
00654   for (auto *U : I.users())
00655     if (isGCRelocate(dyn_cast<Instruction>(U)))
00656       // Collect all the relocate calls associated with a statepoint
00657       AllRelocateCalls.push_back(U);
00658 
00659   // We need atleast one base pointer relocation + one derived pointer
00660   // relocation to mangle
00661   if (AllRelocateCalls.size() < 2)
00662     return false;
00663 
00664   // RelocateInstMap is a mapping from the base relocate instruction to the
00665   // corresponding derived relocate instructions
00666   DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> RelocateInstMap;
00667   computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
00668   if (RelocateInstMap.empty())
00669     return false;
00670 
00671   for (auto &Item : RelocateInstMap)
00672     // Item.first is the RelocatedBase to offset against
00673     // Item.second is the vector of Targets to replace
00674     MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
00675   return MadeChange;
00676 }
00677 
00678 /// SinkCast - Sink the specified cast instruction into its user blocks
00679 static bool SinkCast(CastInst *CI) {
00680   BasicBlock *DefBB = CI->getParent();
00681 
00682   /// InsertedCasts - Only insert a cast in each block once.
00683   DenseMap<BasicBlock*, CastInst*> InsertedCasts;
00684 
00685   bool MadeChange = false;
00686   for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
00687        UI != E; ) {
00688     Use &TheUse = UI.getUse();
00689     Instruction *User = cast<Instruction>(*UI);
00690 
00691     // Figure out which BB this cast is used in.  For PHI's this is the
00692     // appropriate predecessor block.
00693     BasicBlock *UserBB = User->getParent();
00694     if (PHINode *PN = dyn_cast<PHINode>(User)) {
00695       UserBB = PN->getIncomingBlock(TheUse);
00696     }
00697 
00698     // Preincrement use iterator so we don't invalidate it.
00699     ++UI;
00700 
00701     // If this user is in the same block as the cast, don't change the cast.
00702     if (UserBB == DefBB) continue;
00703 
00704     // If we have already inserted a cast into this block, use it.
00705     CastInst *&InsertedCast = InsertedCasts[UserBB];
00706 
00707     if (!InsertedCast) {
00708       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
00709       InsertedCast =
00710         CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
00711                          InsertPt);
00712       MadeChange = true;
00713     }
00714 
00715     // Replace a use of the cast with a use of the new cast.
00716     TheUse = InsertedCast;
00717     ++NumCastUses;
00718   }
00719 
00720   // If we removed all uses, nuke the cast.
00721   if (CI->use_empty()) {
00722     CI->eraseFromParent();
00723     MadeChange = true;
00724   }
00725 
00726   return MadeChange;
00727 }
00728 
00729 /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
00730 /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
00731 /// sink it into user blocks to reduce the number of virtual
00732 /// registers that must be created and coalesced.
00733 ///
00734 /// Return true if any changes are made.
00735 ///
00736 static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
00737   // If this is a noop copy,
00738   EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
00739   EVT DstVT = TLI.getValueType(CI->getType());
00740 
00741   // This is an fp<->int conversion?
00742   if (SrcVT.isInteger() != DstVT.isInteger())
00743     return false;
00744 
00745   // If this is an extension, it will be a zero or sign extension, which
00746   // isn't a noop.
00747   if (SrcVT.bitsLT(DstVT)) return false;
00748 
00749   // If these values will be promoted, find out what they will be promoted
00750   // to.  This helps us consider truncates on PPC as noop copies when they
00751   // are.
00752   if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
00753       TargetLowering::TypePromoteInteger)
00754     SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
00755   if (TLI.getTypeAction(CI->getContext(), DstVT) ==
00756       TargetLowering::TypePromoteInteger)
00757     DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
00758 
00759   // If, after promotion, these are the same types, this is a noop copy.
00760   if (SrcVT != DstVT)
00761     return false;
00762 
00763   return SinkCast(CI);
00764 }
00765 
00766 /// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
00767 /// the number of virtual registers that must be created and coalesced.  This is
00768 /// a clear win except on targets with multiple condition code registers
00769 ///  (PowerPC), where it might lose; some adjustment may be wanted there.
00770 ///
00771 /// Return true if any changes are made.
00772 static bool OptimizeCmpExpression(CmpInst *CI) {
00773   BasicBlock *DefBB = CI->getParent();
00774 
00775   /// InsertedCmp - Only insert a cmp in each block once.
00776   DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
00777 
00778   bool MadeChange = false;
00779   for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
00780        UI != E; ) {
00781     Use &TheUse = UI.getUse();
00782     Instruction *User = cast<Instruction>(*UI);
00783 
00784     // Preincrement use iterator so we don't invalidate it.
00785     ++UI;
00786 
00787     // Don't bother for PHI nodes.
00788     if (isa<PHINode>(User))
00789       continue;
00790 
00791     // Figure out which BB this cmp is used in.
00792     BasicBlock *UserBB = User->getParent();
00793 
00794     // If this user is in the same block as the cmp, don't change the cmp.
00795     if (UserBB == DefBB) continue;
00796 
00797     // If we have already inserted a cmp into this block, use it.
00798     CmpInst *&InsertedCmp = InsertedCmps[UserBB];
00799 
00800     if (!InsertedCmp) {
00801       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
00802       InsertedCmp =
00803         CmpInst::Create(CI->getOpcode(),
00804                         CI->getPredicate(),  CI->getOperand(0),
00805                         CI->getOperand(1), "", InsertPt);
00806       MadeChange = true;
00807     }
00808 
00809     // Replace a use of the cmp with a use of the new cmp.
00810     TheUse = InsertedCmp;
00811     ++NumCmpUses;
00812   }
00813 
00814   // If we removed all uses, nuke the cmp.
00815   if (CI->use_empty())
00816     CI->eraseFromParent();
00817 
00818   return MadeChange;
00819 }
00820 
00821 /// isExtractBitsCandidateUse - Check if the candidates could
00822 /// be combined with shift instruction, which includes:
00823 /// 1. Truncate instruction
00824 /// 2. And instruction and the imm is a mask of the low bits:
00825 /// imm & (imm+1) == 0
00826 static bool isExtractBitsCandidateUse(Instruction *User) {
00827   if (!isa<TruncInst>(User)) {
00828     if (User->getOpcode() != Instruction::And ||
00829         !isa<ConstantInt>(User->getOperand(1)))
00830       return false;
00831 
00832     const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
00833 
00834     if ((Cimm & (Cimm + 1)).getBoolValue())
00835       return false;
00836   }
00837   return true;
00838 }
00839 
00840 /// SinkShiftAndTruncate - sink both shift and truncate instruction
00841 /// to the use of truncate's BB.
00842 static bool
00843 SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
00844                      DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
00845                      const TargetLowering &TLI) {
00846   BasicBlock *UserBB = User->getParent();
00847   DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
00848   TruncInst *TruncI = dyn_cast<TruncInst>(User);
00849   bool MadeChange = false;
00850 
00851   for (Value::user_iterator TruncUI = TruncI->user_begin(),
00852                             TruncE = TruncI->user_end();
00853        TruncUI != TruncE;) {
00854 
00855     Use &TruncTheUse = TruncUI.getUse();
00856     Instruction *TruncUser = cast<Instruction>(*TruncUI);
00857     // Preincrement use iterator so we don't invalidate it.
00858 
00859     ++TruncUI;
00860 
00861     int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
00862     if (!ISDOpcode)
00863       continue;
00864 
00865     // If the use is actually a legal node, there will not be an
00866     // implicit truncate.
00867     // FIXME: always querying the result type is just an
00868     // approximation; some nodes' legality is determined by the
00869     // operand or other means. There's no good way to find out though.
00870     if (TLI.isOperationLegalOrCustom(
00871             ISDOpcode, TLI.getValueType(TruncUser->getType(), true)))
00872       continue;
00873 
00874     // Don't bother for PHI nodes.
00875     if (isa<PHINode>(TruncUser))
00876       continue;
00877 
00878     BasicBlock *TruncUserBB = TruncUser->getParent();
00879 
00880     if (UserBB == TruncUserBB)
00881       continue;
00882 
00883     BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
00884     CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
00885 
00886     if (!InsertedShift && !InsertedTrunc) {
00887       BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
00888       // Sink the shift
00889       if (ShiftI->getOpcode() == Instruction::AShr)
00890         InsertedShift =
00891             BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
00892       else
00893         InsertedShift =
00894             BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
00895 
00896       // Sink the trunc
00897       BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
00898       TruncInsertPt++;
00899 
00900       InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
00901                                        TruncI->getType(), "", TruncInsertPt);
00902 
00903       MadeChange = true;
00904 
00905       TruncTheUse = InsertedTrunc;
00906     }
00907   }
00908   return MadeChange;
00909 }
00910 
00911 /// OptimizeExtractBits - sink the shift *right* instruction into user blocks if
00912 /// the uses could potentially be combined with this shift instruction and
00913 /// generate BitExtract instruction. It will only be applied if the architecture
00914 /// supports BitExtract instruction. Here is an example:
00915 /// BB1:
00916 ///   %x.extract.shift = lshr i64 %arg1, 32
00917 /// BB2:
00918 ///   %x.extract.trunc = trunc i64 %x.extract.shift to i16
00919 /// ==>
00920 ///
00921 /// BB2:
00922 ///   %x.extract.shift.1 = lshr i64 %arg1, 32
00923 ///   %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
00924 ///
00925 /// CodeGen will recoginze the pattern in BB2 and generate BitExtract
00926 /// instruction.
00927 /// Return true if any changes are made.
00928 static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
00929                                 const TargetLowering &TLI) {
00930   BasicBlock *DefBB = ShiftI->getParent();
00931 
00932   /// Only insert instructions in each block once.
00933   DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
00934 
00935   bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType()));
00936 
00937   bool MadeChange = false;
00938   for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
00939        UI != E;) {
00940     Use &TheUse = UI.getUse();
00941     Instruction *User = cast<Instruction>(*UI);
00942     // Preincrement use iterator so we don't invalidate it.
00943     ++UI;
00944 
00945     // Don't bother for PHI nodes.
00946     if (isa<PHINode>(User))
00947       continue;
00948 
00949     if (!isExtractBitsCandidateUse(User))
00950       continue;
00951 
00952     BasicBlock *UserBB = User->getParent();
00953 
00954     if (UserBB == DefBB) {
00955       // If the shift and truncate instruction are in the same BB. The use of
00956       // the truncate(TruncUse) may still introduce another truncate if not
00957       // legal. In this case, we would like to sink both shift and truncate
00958       // instruction to the BB of TruncUse.
00959       // for example:
00960       // BB1:
00961       // i64 shift.result = lshr i64 opnd, imm
00962       // trunc.result = trunc shift.result to i16
00963       //
00964       // BB2:
00965       //   ----> We will have an implicit truncate here if the architecture does
00966       //   not have i16 compare.
00967       // cmp i16 trunc.result, opnd2
00968       //
00969       if (isa<TruncInst>(User) && shiftIsLegal
00970           // If the type of the truncate is legal, no trucate will be
00971           // introduced in other basic blocks.
00972           && (!TLI.isTypeLegal(TLI.getValueType(User->getType()))))
00973         MadeChange =
00974             SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI);
00975 
00976       continue;
00977     }
00978     // If we have already inserted a shift into this block, use it.
00979     BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
00980 
00981     if (!InsertedShift) {
00982       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
00983 
00984       if (ShiftI->getOpcode() == Instruction::AShr)
00985         InsertedShift =
00986             BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
00987       else
00988         InsertedShift =
00989             BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
00990 
00991       MadeChange = true;
00992     }
00993 
00994     // Replace a use of the shift with a use of the new shift.
00995     TheUse = InsertedShift;
00996   }
00997 
00998   // If we removed all uses, nuke the shift.
00999   if (ShiftI->use_empty())
01000     ShiftI->eraseFromParent();
01001 
01002   return MadeChange;
01003 }
01004 
01005 //  ScalarizeMaskedLoad() translates masked load intrinsic, like 
01006 // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
01007 //                               <16 x i1> %mask, <16 x i32> %passthru)
01008 // to a chain of basic blocks, whith loading element one-by-one if
01009 // the appropriate mask bit is set
01010 // 
01011 //  %1 = bitcast i8* %addr to i32*
01012 //  %2 = extractelement <16 x i1> %mask, i32 0
01013 //  %3 = icmp eq i1 %2, true
01014 //  br i1 %3, label %cond.load, label %else
01015 //
01016 //cond.load:                                        ; preds = %0
01017 //  %4 = getelementptr i32* %1, i32 0
01018 //  %5 = load i32* %4
01019 //  %6 = insertelement <16 x i32> undef, i32 %5, i32 0
01020 //  br label %else
01021 //
01022 //else:                                             ; preds = %0, %cond.load
01023 //  %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ]
01024 //  %7 = extractelement <16 x i1> %mask, i32 1
01025 //  %8 = icmp eq i1 %7, true
01026 //  br i1 %8, label %cond.load1, label %else2
01027 //
01028 //cond.load1:                                       ; preds = %else
01029 //  %9 = getelementptr i32* %1, i32 1
01030 //  %10 = load i32* %9
01031 //  %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1
01032 //  br label %else2
01033 //
01034 //else2:                                            ; preds = %else, %cond.load1
01035 //  %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
01036 //  %12 = extractelement <16 x i1> %mask, i32 2
01037 //  %13 = icmp eq i1 %12, true
01038 //  br i1 %13, label %cond.load4, label %else5
01039 //
01040 static void ScalarizeMaskedLoad(CallInst *CI) {
01041   Value *Ptr  = CI->getArgOperand(0);
01042   Value *Src0 = CI->getArgOperand(3);
01043   Value *Mask = CI->getArgOperand(2);
01044   VectorType *VecType = dyn_cast<VectorType>(CI->getType());
01045   Type *EltTy = VecType->getElementType();
01046 
01047   assert(VecType && "Unexpected return type of masked load intrinsic");
01048 
01049   IRBuilder<> Builder(CI->getContext());
01050   Instruction *InsertPt = CI;
01051   BasicBlock *IfBlock = CI->getParent();
01052   BasicBlock *CondBlock = nullptr;
01053   BasicBlock *PrevIfBlock = CI->getParent();
01054   Builder.SetInsertPoint(InsertPt);
01055 
01056   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
01057 
01058   // Bitcast %addr fron i8* to EltTy*
01059   Type *NewPtrType =
01060     EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
01061   Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
01062   Value *UndefVal = UndefValue::get(VecType);
01063 
01064   // The result vector
01065   Value *VResult = UndefVal;
01066 
01067   PHINode *Phi = nullptr;
01068   Value *PrevPhi = UndefVal;
01069 
01070   unsigned VectorWidth = VecType->getNumElements();
01071   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
01072 
01073     // Fill the "else" block, created in the previous iteration
01074     //
01075     //  %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
01076     //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
01077     //  %to_load = icmp eq i1 %mask_1, true
01078     //  br i1 %to_load, label %cond.load, label %else
01079     //
01080     if (Idx > 0) {
01081       Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
01082       Phi->addIncoming(VResult, CondBlock);
01083       Phi->addIncoming(PrevPhi, PrevIfBlock);
01084       PrevPhi = Phi;
01085       VResult = Phi;
01086     }
01087 
01088     Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
01089     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
01090                                     ConstantInt::get(Predicate->getType(), 1));
01091 
01092     // Create "cond" block
01093     //
01094     //  %EltAddr = getelementptr i32* %1, i32 0
01095     //  %Elt = load i32* %EltAddr
01096     //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
01097     //
01098     CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
01099     Builder.SetInsertPoint(InsertPt);
01100     
01101     Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx));
01102     LoadInst* Load = Builder.CreateLoad(Gep, false);
01103     VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
01104 
01105     // Create "else" block, fill it in the next iteration
01106     BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
01107     Builder.SetInsertPoint(InsertPt);
01108     Instruction *OldBr = IfBlock->getTerminator();
01109     BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
01110     OldBr->eraseFromParent();
01111     PrevIfBlock = IfBlock;
01112     IfBlock = NewIfBlock;
01113   }
01114 
01115   Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
01116   Phi->addIncoming(VResult, CondBlock);
01117   Phi->addIncoming(PrevPhi, PrevIfBlock);
01118   Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
01119   CI->replaceAllUsesWith(NewI);
01120   CI->eraseFromParent();
01121 }
01122 
01123 //  ScalarizeMaskedStore() translates masked store intrinsic, like
01124 // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
01125 //                               <16 x i1> %mask)
01126 // to a chain of basic blocks, that stores element one-by-one if
01127 // the appropriate mask bit is set
01128 //
01129 //   %1 = bitcast i8* %addr to i32*
01130 //   %2 = extractelement <16 x i1> %mask, i32 0
01131 //   %3 = icmp eq i1 %2, true
01132 //   br i1 %3, label %cond.store, label %else
01133 //
01134 // cond.store:                                       ; preds = %0
01135 //   %4 = extractelement <16 x i32> %val, i32 0
01136 //   %5 = getelementptr i32* %1, i32 0
01137 //   store i32 %4, i32* %5
01138 //   br label %else
01139 // 
01140 // else:                                             ; preds = %0, %cond.store
01141 //   %6 = extractelement <16 x i1> %mask, i32 1
01142 //   %7 = icmp eq i1 %6, true
01143 //   br i1 %7, label %cond.store1, label %else2
01144 // 
01145 // cond.store1:                                      ; preds = %else
01146 //   %8 = extractelement <16 x i32> %val, i32 1
01147 //   %9 = getelementptr i32* %1, i32 1
01148 //   store i32 %8, i32* %9
01149 //   br label %else2
01150 //   . . .
01151 static void ScalarizeMaskedStore(CallInst *CI) {
01152   Value *Ptr  = CI->getArgOperand(1);
01153   Value *Src = CI->getArgOperand(0);
01154   Value *Mask = CI->getArgOperand(3);
01155 
01156   VectorType *VecType = dyn_cast<VectorType>(Src->getType());
01157   Type *EltTy = VecType->getElementType();
01158 
01159   assert(VecType && "Unexpected data type in masked store intrinsic");
01160 
01161   IRBuilder<> Builder(CI->getContext());
01162   Instruction *InsertPt = CI;
01163   BasicBlock *IfBlock = CI->getParent();
01164   Builder.SetInsertPoint(InsertPt);
01165   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
01166 
01167   // Bitcast %addr fron i8* to EltTy*
01168   Type *NewPtrType =
01169     EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
01170   Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
01171 
01172   unsigned VectorWidth = VecType->getNumElements();
01173   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
01174 
01175     // Fill the "else" block, created in the previous iteration
01176     //
01177     //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
01178     //  %to_store = icmp eq i1 %mask_1, true
01179     //  br i1 %to_load, label %cond.store, label %else
01180     //
01181     Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
01182     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
01183                                     ConstantInt::get(Predicate->getType(), 1));
01184 
01185     // Create "cond" block
01186     //
01187     //  %OneElt = extractelement <16 x i32> %Src, i32 Idx
01188     //  %EltAddr = getelementptr i32* %1, i32 0
01189     //  %store i32 %OneElt, i32* %EltAddr
01190     //
01191     BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
01192     Builder.SetInsertPoint(InsertPt);
01193     
01194     Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
01195     Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx));
01196     Builder.CreateStore(OneElt, Gep);
01197 
01198     // Create "else" block, fill it in the next iteration
01199     BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
01200     Builder.SetInsertPoint(InsertPt);
01201     Instruction *OldBr = IfBlock->getTerminator();
01202     BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
01203     OldBr->eraseFromParent();
01204     IfBlock = NewIfBlock;
01205   }
01206   CI->eraseFromParent();
01207 }
01208 
01209 bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
01210   BasicBlock *BB = CI->getParent();
01211 
01212   // Lower inline assembly if we can.
01213   // If we found an inline asm expession, and if the target knows how to
01214   // lower it to normal LLVM code, do so now.
01215   if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
01216     if (TLI->ExpandInlineAsm(CI)) {
01217       // Avoid invalidating the iterator.
01218       CurInstIterator = BB->begin();
01219       // Avoid processing instructions out of order, which could cause
01220       // reuse before a value is defined.
01221       SunkAddrs.clear();
01222       return true;
01223     }
01224     // Sink address computing for memory operands into the block.
01225     if (OptimizeInlineAsmInst(CI))
01226       return true;
01227   }
01228 
01229   IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
01230   if (II) {
01231     switch (II->getIntrinsicID()) {
01232     default: break;
01233     case Intrinsic::objectsize: {
01234       // Lower all uses of llvm.objectsize.*
01235       bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
01236       Type *ReturnTy = CI->getType();
01237       Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
01238 
01239       // Substituting this can cause recursive simplifications, which can
01240       // invalidate our iterator.  Use a WeakVH to hold onto it in case this
01241       // happens.
01242       WeakVH IterHandle(CurInstIterator);
01243 
01244       replaceAndRecursivelySimplify(CI, RetVal,
01245                                     TLI ? TLI->getDataLayout() : nullptr,
01246                                     TLInfo, ModifiedDT ? nullptr : DT);
01247 
01248       // If the iterator instruction was recursively deleted, start over at the
01249       // start of the block.
01250       if (IterHandle != CurInstIterator) {
01251         CurInstIterator = BB->begin();
01252         SunkAddrs.clear();
01253       }
01254       return true;
01255     }
01256     case Intrinsic::masked_load: {
01257       // Scalarize unsupported vector masked load
01258       if (!TTI->isLegalMaskedLoad(CI->getType(), 1)) {
01259         ScalarizeMaskedLoad(CI);
01260         ModifiedDT = true;
01261         return true;
01262       }
01263       return false;
01264     }
01265     case Intrinsic::masked_store: {
01266       if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(), 1)) {
01267         ScalarizeMaskedStore(CI);
01268         ModifiedDT = true;
01269         return true;
01270       }
01271       return false;
01272     }
01273     }
01274 
01275     if (TLI) {
01276       SmallVector<Value*, 2> PtrOps;
01277       Type *AccessTy;
01278       if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
01279         while (!PtrOps.empty())
01280           if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
01281             return true;
01282     }
01283   }
01284 
01285   // From here on out we're working with named functions.
01286   if (!CI->getCalledFunction()) return false;
01287 
01288   // We'll need DataLayout from here on out.
01289   const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr;
01290   if (!TD) return false;
01291 
01292   // Lower all default uses of _chk calls.  This is very similar
01293   // to what InstCombineCalls does, but here we are only lowering calls
01294   // to fortified library functions (e.g. __memcpy_chk) that have the default
01295   // "don't know" as the objectsize.  Anything else should be left alone.
01296   FortifiedLibCallSimplifier Simplifier(TD, TLInfo, true);
01297   if (Value *V = Simplifier.optimizeCall(CI)) {
01298     CI->replaceAllUsesWith(V);
01299     CI->eraseFromParent();
01300     return true;
01301   }
01302   return false;
01303 }
01304 
01305 /// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
01306 /// instructions to the predecessor to enable tail call optimizations. The
01307 /// case it is currently looking for is:
01308 /// @code
01309 /// bb0:
01310 ///   %tmp0 = tail call i32 @f0()
01311 ///   br label %return
01312 /// bb1:
01313 ///   %tmp1 = tail call i32 @f1()
01314 ///   br label %return
01315 /// bb2:
01316 ///   %tmp2 = tail call i32 @f2()
01317 ///   br label %return
01318 /// return:
01319 ///   %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
01320 ///   ret i32 %retval
01321 /// @endcode
01322 ///
01323 /// =>
01324 ///
01325 /// @code
01326 /// bb0:
01327 ///   %tmp0 = tail call i32 @f0()
01328 ///   ret i32 %tmp0
01329 /// bb1:
01330 ///   %tmp1 = tail call i32 @f1()
01331 ///   ret i32 %tmp1
01332 /// bb2:
01333 ///   %tmp2 = tail call i32 @f2()
01334 ///   ret i32 %tmp2
01335 /// @endcode
01336 bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
01337   if (!TLI)
01338     return false;
01339 
01340   ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
01341   if (!RI)
01342     return false;
01343 
01344   PHINode *PN = nullptr;
01345   BitCastInst *BCI = nullptr;
01346   Value *V = RI->getReturnValue();
01347   if (V) {
01348     BCI = dyn_cast<BitCastInst>(V);
01349     if (BCI)
01350       V = BCI->getOperand(0);
01351 
01352     PN = dyn_cast<PHINode>(V);
01353     if (!PN)
01354       return false;
01355   }
01356 
01357   if (PN && PN->getParent() != BB)
01358     return false;
01359 
01360   // It's not safe to eliminate the sign / zero extension of the return value.
01361   // See llvm::isInTailCallPosition().
01362   const Function *F = BB->getParent();
01363   AttributeSet CallerAttrs = F->getAttributes();
01364   if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
01365       CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
01366     return false;
01367 
01368   // Make sure there are no instructions between the PHI and return, or that the
01369   // return is the first instruction in the block.
01370   if (PN) {
01371     BasicBlock::iterator BI = BB->begin();
01372     do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
01373     if (&*BI == BCI)
01374       // Also skip over the bitcast.
01375       ++BI;
01376     if (&*BI != RI)
01377       return false;
01378   } else {
01379     BasicBlock::iterator BI = BB->begin();
01380     while (isa<DbgInfoIntrinsic>(BI)) ++BI;
01381     if (&*BI != RI)
01382       return false;
01383   }
01384 
01385   /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
01386   /// call.
01387   SmallVector<CallInst*, 4> TailCalls;
01388   if (PN) {
01389     for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
01390       CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
01391       // Make sure the phi value is indeed produced by the tail call.
01392       if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
01393           TLI->mayBeEmittedAsTailCall(CI))
01394         TailCalls.push_back(CI);
01395     }
01396   } else {
01397     SmallPtrSet<BasicBlock*, 4> VisitedBBs;
01398     for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
01399       if (!VisitedBBs.insert(*PI).second)
01400         continue;
01401 
01402       BasicBlock::InstListType &InstList = (*PI)->getInstList();
01403       BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
01404       BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
01405       do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
01406       if (RI == RE)
01407         continue;
01408 
01409       CallInst *CI = dyn_cast<CallInst>(&*RI);
01410       if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
01411         TailCalls.push_back(CI);
01412     }
01413   }
01414 
01415   bool Changed = false;
01416   for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
01417     CallInst *CI = TailCalls[i];
01418     CallSite CS(CI);
01419 
01420     // Conservatively require the attributes of the call to match those of the
01421     // return. Ignore noalias because it doesn't affect the call sequence.
01422     AttributeSet CalleeAttrs = CS.getAttributes();
01423     if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
01424           removeAttribute(Attribute::NoAlias) !=
01425         AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
01426           removeAttribute(Attribute::NoAlias))
01427       continue;
01428 
01429     // Make sure the call instruction is followed by an unconditional branch to
01430     // the return block.
01431     BasicBlock *CallBB = CI->getParent();
01432     BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
01433     if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
01434       continue;
01435 
01436     // Duplicate the return into CallBB.
01437     (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
01438     ModifiedDT = Changed = true;
01439     ++NumRetsDup;
01440   }
01441 
01442   // If we eliminated all predecessors of the block, delete the block now.
01443   if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
01444     BB->eraseFromParent();
01445 
01446   return Changed;
01447 }
01448 
01449 //===----------------------------------------------------------------------===//
01450 // Memory Optimization
01451 //===----------------------------------------------------------------------===//
01452 
01453 namespace {
01454 
01455 /// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
01456 /// which holds actual Value*'s for register values.
01457 struct ExtAddrMode : public TargetLowering::AddrMode {
01458   Value *BaseReg;
01459   Value *ScaledReg;
01460   ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {}
01461   void print(raw_ostream &OS) const;
01462   void dump() const;
01463 
01464   bool operator==(const ExtAddrMode& O) const {
01465     return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
01466            (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
01467            (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
01468   }
01469 };
01470 
01471 #ifndef NDEBUG
01472 static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
01473   AM.print(OS);
01474   return OS;
01475 }
01476 #endif
01477 
01478 void ExtAddrMode::print(raw_ostream &OS) const {
01479   bool NeedPlus = false;
01480   OS << "[";
01481   if (BaseGV) {
01482     OS << (NeedPlus ? " + " : "")
01483        << "GV:";
01484     BaseGV->printAsOperand(OS, /*PrintType=*/false);
01485     NeedPlus = true;
01486   }
01487 
01488   if (BaseOffs) {
01489     OS << (NeedPlus ? " + " : "")
01490        << BaseOffs;
01491     NeedPlus = true;
01492   }
01493 
01494   if (BaseReg) {
01495     OS << (NeedPlus ? " + " : "")
01496        << "Base:";
01497     BaseReg->printAsOperand(OS, /*PrintType=*/false);
01498     NeedPlus = true;
01499   }
01500   if (Scale) {
01501     OS << (NeedPlus ? " + " : "")
01502        << Scale << "*";
01503     ScaledReg->printAsOperand(OS, /*PrintType=*/false);
01504   }
01505 
01506   OS << ']';
01507 }
01508 
01509 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
01510 void ExtAddrMode::dump() const {
01511   print(dbgs());
01512   dbgs() << '\n';
01513 }
01514 #endif
01515 
01516 /// \brief This class provides transaction based operation on the IR.
01517 /// Every change made through this class is recorded in the internal state and
01518 /// can be undone (rollback) until commit is called.
01519 class TypePromotionTransaction {
01520 
01521   /// \brief This represents the common interface of the individual transaction.
01522   /// Each class implements the logic for doing one specific modification on
01523   /// the IR via the TypePromotionTransaction.
01524   class TypePromotionAction {
01525   protected:
01526     /// The Instruction modified.
01527     Instruction *Inst;
01528 
01529   public:
01530     /// \brief Constructor of the action.
01531     /// The constructor performs the related action on the IR.
01532     TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
01533 
01534     virtual ~TypePromotionAction() {}
01535 
01536     /// \brief Undo the modification done by this action.
01537     /// When this method is called, the IR must be in the same state as it was
01538     /// before this action was applied.
01539     /// \pre Undoing the action works if and only if the IR is in the exact same
01540     /// state as it was directly after this action was applied.
01541     virtual void undo() = 0;
01542 
01543     /// \brief Advocate every change made by this action.
01544     /// When the results on the IR of the action are to be kept, it is important
01545     /// to call this function, otherwise hidden information may be kept forever.
01546     virtual void commit() {
01547       // Nothing to be done, this action is not doing anything.
01548     }
01549   };
01550 
01551   /// \brief Utility to remember the position of an instruction.
01552   class InsertionHandler {
01553     /// Position of an instruction.
01554     /// Either an instruction:
01555     /// - Is the first in a basic block: BB is used.
01556     /// - Has a previous instructon: PrevInst is used.
01557     union {
01558       Instruction *PrevInst;
01559       BasicBlock *BB;
01560     } Point;
01561     /// Remember whether or not the instruction had a previous instruction.
01562     bool HasPrevInstruction;
01563 
01564   public:
01565     /// \brief Record the position of \p Inst.
01566     InsertionHandler(Instruction *Inst) {
01567       BasicBlock::iterator It = Inst;
01568       HasPrevInstruction = (It != (Inst->getParent()->begin()));
01569       if (HasPrevInstruction)
01570         Point.PrevInst = --It;
01571       else
01572         Point.BB = Inst->getParent();
01573     }
01574 
01575     /// \brief Insert \p Inst at the recorded position.
01576     void insert(Instruction *Inst) {
01577       if (HasPrevInstruction) {
01578         if (Inst->getParent())
01579           Inst->removeFromParent();
01580         Inst->insertAfter(Point.PrevInst);
01581       } else {
01582         Instruction *Position = Point.BB->getFirstInsertionPt();
01583         if (Inst->getParent())
01584           Inst->moveBefore(Position);
01585         else
01586           Inst->insertBefore(Position);
01587       }
01588     }
01589   };
01590 
01591   /// \brief Move an instruction before another.
01592   class InstructionMoveBefore : public TypePromotionAction {
01593     /// Original position of the instruction.
01594     InsertionHandler Position;
01595 
01596   public:
01597     /// \brief Move \p Inst before \p Before.
01598     InstructionMoveBefore(Instruction *Inst, Instruction *Before)
01599         : TypePromotionAction(Inst), Position(Inst) {
01600       DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n");
01601       Inst->moveBefore(Before);
01602     }
01603 
01604     /// \brief Move the instruction back to its original position.
01605     void undo() override {
01606       DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
01607       Position.insert(Inst);
01608     }
01609   };
01610 
01611   /// \brief Set the operand of an instruction with a new value.
01612   class OperandSetter : public TypePromotionAction {
01613     /// Original operand of the instruction.
01614     Value *Origin;
01615     /// Index of the modified instruction.
01616     unsigned Idx;
01617 
01618   public:
01619     /// \brief Set \p Idx operand of \p Inst with \p NewVal.
01620     OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
01621         : TypePromotionAction(Inst), Idx(Idx) {
01622       DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
01623                    << "for:" << *Inst << "\n"
01624                    << "with:" << *NewVal << "\n");
01625       Origin = Inst->getOperand(Idx);
01626       Inst->setOperand(Idx, NewVal);
01627     }
01628 
01629     /// \brief Restore the original value of the instruction.
01630     void undo() override {
01631       DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
01632                    << "for: " << *Inst << "\n"
01633                    << "with: " << *Origin << "\n");
01634       Inst->setOperand(Idx, Origin);
01635     }
01636   };
01637 
01638   /// \brief Hide the operands of an instruction.
01639   /// Do as if this instruction was not using any of its operands.
01640   class OperandsHider : public TypePromotionAction {
01641     /// The list of original operands.
01642     SmallVector<Value *, 4> OriginalValues;
01643 
01644   public:
01645     /// \brief Remove \p Inst from the uses of the operands of \p Inst.
01646     OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
01647       DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
01648       unsigned NumOpnds = Inst->getNumOperands();
01649       OriginalValues.reserve(NumOpnds);
01650       for (unsigned It = 0; It < NumOpnds; ++It) {
01651         // Save the current operand.
01652         Value *Val = Inst->getOperand(It);
01653         OriginalValues.push_back(Val);
01654         // Set a dummy one.
01655         // We could use OperandSetter here, but that would implied an overhead
01656         // that we are not willing to pay.
01657         Inst->setOperand(It, UndefValue::get(Val->getType()));
01658       }
01659     }
01660 
01661     /// \brief Restore the original list of uses.
01662     void undo() override {
01663       DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
01664       for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
01665         Inst->setOperand(It, OriginalValues[It]);
01666     }
01667   };
01668 
01669   /// \brief Build a truncate instruction.
01670   class TruncBuilder : public TypePromotionAction {
01671     Value *Val;
01672   public:
01673     /// \brief Build a truncate instruction of \p Opnd producing a \p Ty
01674     /// result.
01675     /// trunc Opnd to Ty.
01676     TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
01677       IRBuilder<> Builder(Opnd);
01678       Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
01679       DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
01680     }
01681 
01682     /// \brief Get the built value.
01683     Value *getBuiltValue() { return Val; }
01684 
01685     /// \brief Remove the built instruction.
01686     void undo() override {
01687       DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
01688       if (Instruction *IVal = dyn_cast<Instruction>(Val))
01689         IVal->eraseFromParent();
01690     }
01691   };
01692 
01693   /// \brief Build a sign extension instruction.
01694   class SExtBuilder : public TypePromotionAction {
01695     Value *Val;
01696   public:
01697     /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty
01698     /// result.
01699     /// sext Opnd to Ty.
01700     SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
01701         : TypePromotionAction(InsertPt) {
01702       IRBuilder<> Builder(InsertPt);
01703       Val = Builder.CreateSExt(Opnd, Ty, "promoted");
01704       DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
01705     }
01706 
01707     /// \brief Get the built value.
01708     Value *getBuiltValue() { return Val; }
01709 
01710     /// \brief Remove the built instruction.
01711     void undo() override {
01712       DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
01713       if (Instruction *IVal = dyn_cast<Instruction>(Val))
01714         IVal->eraseFromParent();
01715     }
01716   };
01717 
01718   /// \brief Build a zero extension instruction.
01719   class ZExtBuilder : public TypePromotionAction {
01720     Value *Val;
01721   public:
01722     /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty
01723     /// result.
01724     /// zext Opnd to Ty.
01725     ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
01726         : TypePromotionAction(InsertPt) {
01727       IRBuilder<> Builder(InsertPt);
01728       Val = Builder.CreateZExt(Opnd, Ty, "promoted");
01729       DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
01730     }
01731 
01732     /// \brief Get the built value.
01733     Value *getBuiltValue() { return Val; }
01734 
01735     /// \brief Remove the built instruction.
01736     void undo() override {
01737       DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
01738       if (Instruction *IVal = dyn_cast<Instruction>(Val))
01739         IVal->eraseFromParent();
01740     }
01741   };
01742 
01743   /// \brief Mutate an instruction to another type.
01744   class TypeMutator : public TypePromotionAction {
01745     /// Record the original type.
01746     Type *OrigTy;
01747 
01748   public:
01749     /// \brief Mutate the type of \p Inst into \p NewTy.
01750     TypeMutator(Instruction *Inst, Type *NewTy)
01751         : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
01752       DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
01753                    << "\n");
01754       Inst->mutateType(NewTy);
01755     }
01756 
01757     /// \brief Mutate the instruction back to its original type.
01758     void undo() override {
01759       DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
01760                    << "\n");
01761       Inst->mutateType(OrigTy);
01762     }
01763   };
01764 
01765   /// \brief Replace the uses of an instruction by another instruction.
01766   class UsesReplacer : public TypePromotionAction {
01767     /// Helper structure to keep track of the replaced uses.
01768     struct InstructionAndIdx {
01769       /// The instruction using the instruction.
01770       Instruction *Inst;
01771       /// The index where this instruction is used for Inst.
01772       unsigned Idx;
01773       InstructionAndIdx(Instruction *Inst, unsigned Idx)
01774           : Inst(Inst), Idx(Idx) {}
01775     };
01776 
01777     /// Keep track of the original uses (pair Instruction, Index).
01778     SmallVector<InstructionAndIdx, 4> OriginalUses;
01779     typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator;
01780 
01781   public:
01782     /// \brief Replace all the use of \p Inst by \p New.
01783     UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {
01784       DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
01785                    << "\n");
01786       // Record the original uses.
01787       for (Use &U : Inst->uses()) {
01788         Instruction *UserI = cast<Instruction>(U.getUser());
01789         OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
01790       }
01791       // Now, we can replace the uses.
01792       Inst->replaceAllUsesWith(New);
01793     }
01794 
01795     /// \brief Reassign the original uses of Inst to Inst.
01796     void undo() override {
01797       DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
01798       for (use_iterator UseIt = OriginalUses.begin(),
01799                         EndIt = OriginalUses.end();
01800            UseIt != EndIt; ++UseIt) {
01801         UseIt->Inst->setOperand(UseIt->Idx, Inst);
01802       }
01803     }
01804   };
01805 
01806   /// \brief Remove an instruction from the IR.
01807   class InstructionRemover : public TypePromotionAction {
01808     /// Original position of the instruction.
01809     InsertionHandler Inserter;
01810     /// Helper structure to hide all the link to the instruction. In other
01811     /// words, this helps to do as if the instruction was removed.
01812     OperandsHider Hider;
01813     /// Keep track of the uses replaced, if any.
01814     UsesReplacer *Replacer;
01815 
01816   public:
01817     /// \brief Remove all reference of \p Inst and optinally replace all its
01818     /// uses with New.
01819     /// \pre If !Inst->use_empty(), then New != nullptr
01820     InstructionRemover(Instruction *Inst, Value *New = nullptr)
01821         : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
01822           Replacer(nullptr) {
01823       if (New)
01824         Replacer = new UsesReplacer(Inst, New);
01825       DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
01826       Inst->removeFromParent();
01827     }
01828 
01829     ~InstructionRemover() { delete Replacer; }
01830 
01831     /// \brief Really remove the instruction.
01832     void commit() override { delete Inst; }
01833 
01834     /// \brief Resurrect the instruction and reassign it to the proper uses if
01835     /// new value was provided when build this action.
01836     void undo() override {
01837       DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
01838       Inserter.insert(Inst);
01839       if (Replacer)
01840         Replacer->undo();
01841       Hider.undo();
01842     }
01843   };
01844 
01845 public:
01846   /// Restoration point.
01847   /// The restoration point is a pointer to an action instead of an iterator
01848   /// because the iterator may be invalidated but not the pointer.
01849   typedef const TypePromotionAction *ConstRestorationPt;
01850   /// Advocate every changes made in that transaction.
01851   void commit();
01852   /// Undo all the changes made after the given point.
01853   void rollback(ConstRestorationPt Point);
01854   /// Get the current restoration point.
01855   ConstRestorationPt getRestorationPoint() const;
01856 
01857   /// \name API for IR modification with state keeping to support rollback.
01858   /// @{
01859   /// Same as Instruction::setOperand.
01860   void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
01861   /// Same as Instruction::eraseFromParent.
01862   void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
01863   /// Same as Value::replaceAllUsesWith.
01864   void replaceAllUsesWith(Instruction *Inst, Value *New);
01865   /// Same as Value::mutateType.
01866   void mutateType(Instruction *Inst, Type *NewTy);
01867   /// Same as IRBuilder::createTrunc.
01868   Value *createTrunc(Instruction *Opnd, Type *Ty);
01869   /// Same as IRBuilder::createSExt.
01870   Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
01871   /// Same as IRBuilder::createZExt.
01872   Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
01873   /// Same as Instruction::moveBefore.
01874   void moveBefore(Instruction *Inst, Instruction *Before);
01875   /// @}
01876 
01877 private:
01878   /// The ordered list of actions made so far.
01879   SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
01880   typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt;
01881 };
01882 
01883 void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
01884                                           Value *NewVal) {
01885   Actions.push_back(
01886       make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal));
01887 }
01888 
01889 void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
01890                                                 Value *NewVal) {
01891   Actions.push_back(
01892       make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal));
01893 }
01894 
01895 void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
01896                                                   Value *New) {
01897   Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
01898 }
01899 
01900 void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
01901   Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
01902 }
01903 
01904 Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
01905                                              Type *Ty) {
01906   std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
01907   Value *Val = Ptr->getBuiltValue();
01908   Actions.push_back(std::move(Ptr));
01909   return Val;
01910 }
01911 
01912 Value *TypePromotionTransaction::createSExt(Instruction *Inst,
01913                                             Value *Opnd, Type *Ty) {
01914   std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
01915   Value *Val = Ptr->getBuiltValue();
01916   Actions.push_back(std::move(Ptr));
01917   return Val;
01918 }
01919 
01920 Value *TypePromotionTransaction::createZExt(Instruction *Inst,
01921                                             Value *Opnd, Type *Ty) {
01922   std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
01923   Value *Val = Ptr->getBuiltValue();
01924   Actions.push_back(std::move(Ptr));
01925   return Val;
01926 }
01927 
01928 void TypePromotionTransaction::moveBefore(Instruction *Inst,
01929                                           Instruction *Before) {
01930   Actions.push_back(
01931       make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before));
01932 }
01933 
01934 TypePromotionTransaction::ConstRestorationPt
01935 TypePromotionTransaction::getRestorationPoint() const {
01936   return !Actions.empty() ? Actions.back().get() : nullptr;
01937 }
01938 
01939 void TypePromotionTransaction::commit() {
01940   for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
01941        ++It)
01942     (*It)->commit();
01943   Actions.clear();
01944 }
01945 
01946 void TypePromotionTransaction::rollback(
01947     TypePromotionTransaction::ConstRestorationPt Point) {
01948   while (!Actions.empty() && Point != Actions.back().get()) {
01949     std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
01950     Curr->undo();
01951   }
01952 }
01953 
01954 /// \brief A helper class for matching addressing modes.
01955 ///
01956 /// This encapsulates the logic for matching the target-legal addressing modes.
01957 class AddressingModeMatcher {
01958   SmallVectorImpl<Instruction*> &AddrModeInsts;
01959   const TargetLowering &TLI;
01960 
01961   /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
01962   /// the memory instruction that we're computing this address for.
01963   Type *AccessTy;
01964   Instruction *MemoryInst;
01965 
01966   /// AddrMode - This is the addressing mode that we're building up.  This is
01967   /// part of the return value of this addressing mode matching stuff.
01968   ExtAddrMode &AddrMode;
01969 
01970   /// The truncate instruction inserted by other CodeGenPrepare optimizations.
01971   const SetOfInstrs &InsertedTruncs;
01972   /// A map from the instructions to their type before promotion.
01973   InstrToOrigTy &PromotedInsts;
01974   /// The ongoing transaction where every action should be registered.
01975   TypePromotionTransaction &TPT;
01976 
01977   /// IgnoreProfitability - This is set to true when we should not do
01978   /// profitability checks.  When true, IsProfitableToFoldIntoAddressingMode
01979   /// always returns true.
01980   bool IgnoreProfitability;
01981 
01982   AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
01983                         const TargetLowering &T, Type *AT,
01984                         Instruction *MI, ExtAddrMode &AM,
01985                         const SetOfInstrs &InsertedTruncs,
01986                         InstrToOrigTy &PromotedInsts,
01987                         TypePromotionTransaction &TPT)
01988       : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM),
01989         InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) {
01990     IgnoreProfitability = false;
01991   }
01992 public:
01993 
01994   /// Match - Find the maximal addressing mode that a load/store of V can fold,
01995   /// give an access type of AccessTy.  This returns a list of involved
01996   /// instructions in AddrModeInsts.
01997   /// \p InsertedTruncs The truncate instruction inserted by other
01998   /// CodeGenPrepare
01999   /// optimizations.
02000   /// \p PromotedInsts maps the instructions to their type before promotion.
02001   /// \p The ongoing transaction where every action should be registered.
02002   static ExtAddrMode Match(Value *V, Type *AccessTy,
02003                            Instruction *MemoryInst,
02004                            SmallVectorImpl<Instruction*> &AddrModeInsts,
02005                            const TargetLowering &TLI,
02006                            const SetOfInstrs &InsertedTruncs,
02007                            InstrToOrigTy &PromotedInsts,
02008                            TypePromotionTransaction &TPT) {
02009     ExtAddrMode Result;
02010 
02011     bool Success = AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
02012                                          MemoryInst, Result, InsertedTruncs,
02013                                          PromotedInsts, TPT).MatchAddr(V, 0);
02014     (void)Success; assert(Success && "Couldn't select *anything*?");
02015     return Result;
02016   }
02017 private:
02018   bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
02019   bool MatchAddr(Value *V, unsigned Depth);
02020   bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth,
02021                           bool *MovedAway = nullptr);
02022   bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
02023                                             ExtAddrMode &AMBefore,
02024                                             ExtAddrMode &AMAfter);
02025   bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
02026   bool IsPromotionProfitable(unsigned MatchedSize, unsigned SizeWithPromotion,
02027                              Value *PromotedOperand) const;
02028 };
02029 
02030 /// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
02031 /// Return true and update AddrMode if this addr mode is legal for the target,
02032 /// false if not.
02033 bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
02034                                              unsigned Depth) {
02035   // If Scale is 1, then this is the same as adding ScaleReg to the addressing
02036   // mode.  Just process that directly.
02037   if (Scale == 1)
02038     return MatchAddr(ScaleReg, Depth);
02039 
02040   // If the scale is 0, it takes nothing to add this.
02041   if (Scale == 0)
02042     return true;
02043 
02044   // If we already have a scale of this value, we can add to it, otherwise, we
02045   // need an available scale field.
02046   if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
02047     return false;
02048 
02049   ExtAddrMode TestAddrMode = AddrMode;
02050 
02051   // Add scale to turn X*4+X*3 -> X*7.  This could also do things like
02052   // [A+B + A*7] -> [B+A*8].
02053   TestAddrMode.Scale += Scale;
02054   TestAddrMode.ScaledReg = ScaleReg;
02055 
02056   // If the new address isn't legal, bail out.
02057   if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
02058     return false;
02059 
02060   // It was legal, so commit it.
02061   AddrMode = TestAddrMode;
02062 
02063   // Okay, we decided that we can add ScaleReg+Scale to AddrMode.  Check now
02064   // to see if ScaleReg is actually X+C.  If so, we can turn this into adding
02065   // X*Scale + C*Scale to addr mode.
02066   ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
02067   if (isa<Instruction>(ScaleReg) &&  // not a constant expr.
02068       match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
02069     TestAddrMode.ScaledReg = AddLHS;
02070     TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
02071 
02072     // If this addressing mode is legal, commit it and remember that we folded
02073     // this instruction.
02074     if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
02075       AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
02076       AddrMode = TestAddrMode;
02077       return true;
02078     }
02079   }
02080 
02081   // Otherwise, not (x+c)*scale, just return what we have.
02082   return true;
02083 }
02084 
02085 /// MightBeFoldableInst - This is a little filter, which returns true if an
02086 /// addressing computation involving I might be folded into a load/store
02087 /// accessing it.  This doesn't need to be perfect, but needs to accept at least
02088 /// the set of instructions that MatchOperationAddr can.
02089 static bool MightBeFoldableInst(Instruction *I) {
02090   switch (I->getOpcode()) {
02091   case Instruction::BitCast:
02092   case Instruction::AddrSpaceCast:
02093     // Don't touch identity bitcasts.
02094     if (I->getType() == I->getOperand(0)->getType())
02095       return false;
02096     return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
02097   case Instruction::PtrToInt:
02098     // PtrToInt is always a noop, as we know that the int type is pointer sized.
02099     return true;
02100   case Instruction::IntToPtr:
02101     // We know the input is intptr_t, so this is foldable.
02102     return true;
02103   case Instruction::Add:
02104     return true;
02105   case Instruction::Mul:
02106   case Instruction::Shl:
02107     // Can only handle X*C and X << C.
02108     return isa<ConstantInt>(I->getOperand(1));
02109   case Instruction::GetElementPtr:
02110     return true;
02111   default:
02112     return false;
02113   }
02114 }
02115 
02116 /// \brief Check whether or not \p Val is a legal instruction for \p TLI.
02117 /// \note \p Val is assumed to be the product of some type promotion.
02118 /// Therefore if \p Val has an undefined state in \p TLI, this is assumed
02119 /// to be legal, as the non-promoted value would have had the same state.
02120 static bool isPromotedInstructionLegal(const TargetLowering &TLI, Value *Val) {
02121   Instruction *PromotedInst = dyn_cast<Instruction>(Val);
02122   if (!PromotedInst)
02123     return false;
02124   int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
02125   // If the ISDOpcode is undefined, it was undefined before the promotion.
02126   if (!ISDOpcode)
02127     return true;
02128   // Otherwise, check if the promoted instruction is legal or not.
02129   return TLI.isOperationLegalOrCustom(
02130       ISDOpcode, TLI.getValueType(PromotedInst->getType()));
02131 }
02132 
02133 /// \brief Hepler class to perform type promotion.
02134 class TypePromotionHelper {
02135   /// \brief Utility function to check whether or not a sign or zero extension
02136   /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
02137   /// either using the operands of \p Inst or promoting \p Inst.
02138   /// The type of the extension is defined by \p IsSExt.
02139   /// In other words, check if:
02140   /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
02141   /// #1 Promotion applies:
02142   /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
02143   /// #2 Operand reuses:
02144   /// ext opnd1 to ConsideredExtType.
02145   /// \p PromotedInsts maps the instructions to their type before promotion.
02146   static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
02147                             const InstrToOrigTy &PromotedInsts, bool IsSExt);
02148 
02149   /// \brief Utility function to determine if \p OpIdx should be promoted when
02150   /// promoting \p Inst.
02151   static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
02152     if (isa<SelectInst>(Inst) && OpIdx == 0)
02153       return false;
02154     return true;
02155   }
02156 
02157   /// \brief Utility function to promote the operand of \p Ext when this
02158   /// operand is a promotable trunc or sext or zext.
02159   /// \p PromotedInsts maps the instructions to their type before promotion.
02160   /// \p CreatedInsts[out] contains how many non-free instructions have been
02161   /// created to promote the operand of Ext.
02162   /// Newly added extensions are inserted in \p Exts.
02163   /// Newly added truncates are inserted in \p Truncs.
02164   /// Should never be called directly.
02165   /// \return The promoted value which is used instead of Ext.
02166   static Value *promoteOperandForTruncAndAnyExt(
02167       Instruction *Ext, TypePromotionTransaction &TPT,
02168       InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
02169       SmallVectorImpl<Instruction *> *Exts,
02170       SmallVectorImpl<Instruction *> *Truncs);
02171 
02172   /// \brief Utility function to promote the operand of \p Ext when this
02173   /// operand is promotable and is not a supported trunc or sext.
02174   /// \p PromotedInsts maps the instructions to their type before promotion.
02175   /// \p CreatedInsts[out] contains how many non-free instructions have been
02176   /// created to promote the operand of Ext.
02177   /// Newly added extensions are inserted in \p Exts.
02178   /// Newly added truncates are inserted in \p Truncs.
02179   /// Should never be called directly.
02180   /// \return The promoted value which is used instead of Ext.
02181   static Value *
02182   promoteOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
02183                          InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
02184                          SmallVectorImpl<Instruction *> *Exts,
02185                          SmallVectorImpl<Instruction *> *Truncs, bool IsSExt);
02186 
02187   /// \see promoteOperandForOther.
02188   static Value *
02189   signExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
02190                             InstrToOrigTy &PromotedInsts,
02191                             unsigned &CreatedInsts,
02192                             SmallVectorImpl<Instruction *> *Exts,
02193                             SmallVectorImpl<Instruction *> *Truncs) {
02194     return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
02195                                   Truncs, true);
02196   }
02197 
02198   /// \see promoteOperandForOther.
02199   static Value *
02200   zeroExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
02201                             InstrToOrigTy &PromotedInsts,
02202                             unsigned &CreatedInsts,
02203                             SmallVectorImpl<Instruction *> *Exts,
02204                             SmallVectorImpl<Instruction *> *Truncs) {
02205     return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
02206                                   Truncs, false);
02207   }
02208 
02209 public:
02210   /// Type for the utility function that promotes the operand of Ext.
02211   typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
02212                            InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
02213                            SmallVectorImpl<Instruction *> *Exts,
02214                            SmallVectorImpl<Instruction *> *Truncs);
02215   /// \brief Given a sign/zero extend instruction \p Ext, return the approriate
02216   /// action to promote the operand of \p Ext instead of using Ext.
02217   /// \return NULL if no promotable action is possible with the current
02218   /// sign extension.
02219   /// \p InsertedTruncs keeps track of all the truncate instructions inserted by
02220   /// the others CodeGenPrepare optimizations. This information is important
02221   /// because we do not want to promote these instructions as CodeGenPrepare
02222   /// will reinsert them later. Thus creating an infinite loop: create/remove.
02223   /// \p PromotedInsts maps the instructions to their type before promotion.
02224   static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedTruncs,
02225                           const TargetLowering &TLI,
02226                           const InstrToOrigTy &PromotedInsts);
02227 };
02228 
02229 bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
02230                                         Type *ConsideredExtType,
02231                                         const InstrToOrigTy &PromotedInsts,
02232                                         bool IsSExt) {
02233   // The promotion helper does not know how to deal with vector types yet.
02234   // To be able to fix that, we would need to fix the places where we
02235   // statically extend, e.g., constants and such.
02236   if (Inst->getType()->isVectorTy())
02237     return false;
02238 
02239   // We can always get through zext.
02240   if (isa<ZExtInst>(Inst))
02241     return true;
02242 
02243   // sext(sext) is ok too.
02244   if (IsSExt && isa<SExtInst>(Inst))
02245     return true;
02246 
02247   // We can get through binary operator, if it is legal. In other words, the
02248   // binary operator must have a nuw or nsw flag.
02249   const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
02250   if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
02251       ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
02252        (IsSExt && BinOp->hasNoSignedWrap())))
02253     return true;
02254 
02255   // Check if we can do the following simplification.
02256   // ext(trunc(opnd)) --> ext(opnd)
02257   if (!isa<TruncInst>(Inst))
02258     return false;
02259 
02260   Value *OpndVal = Inst->getOperand(0);
02261   // Check if we can use this operand in the extension.
02262   // If the type is larger than the result type of the extension,
02263   // we cannot.
02264   if (!OpndVal->getType()->isIntegerTy() ||
02265       OpndVal->getType()->getIntegerBitWidth() >
02266           ConsideredExtType->getIntegerBitWidth())
02267     return false;
02268 
02269   // If the operand of the truncate is not an instruction, we will not have
02270   // any information on the dropped bits.
02271   // (Actually we could for constant but it is not worth the extra logic).
02272   Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
02273   if (!Opnd)
02274     return false;
02275 
02276   // Check if the source of the type is narrow enough.
02277   // I.e., check that trunc just drops extended bits of the same kind of
02278   // the extension.
02279   // #1 get the type of the operand and check the kind of the extended bits.
02280   const Type *OpndType;
02281   InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
02282   if (It != PromotedInsts.end() && It->second.IsSExt == IsSExt)
02283     OpndType = It->second.Ty;
02284   else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
02285     OpndType = Opnd->getOperand(0)->getType();
02286   else
02287     return false;
02288 
02289   // #2 check that the truncate just drop extended bits.
02290   if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth())
02291     return true;
02292 
02293   return false;
02294 }
02295 
02296 TypePromotionHelper::Action TypePromotionHelper::getAction(
02297     Instruction *Ext, const SetOfInstrs &InsertedTruncs,
02298     const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
02299   assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
02300          "Unexpected instruction type");
02301   Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
02302   Type *ExtTy = Ext->getType();
02303   bool IsSExt = isa<SExtInst>(Ext);
02304   // If the operand of the extension is not an instruction, we cannot
02305   // get through.
02306   // If it, check we can get through.
02307   if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
02308     return nullptr;
02309 
02310   // Do not promote if the operand has been added by codegenprepare.
02311   // Otherwise, it means we are undoing an optimization that is likely to be
02312   // redone, thus causing potential infinite loop.
02313   if (isa<TruncInst>(ExtOpnd) && InsertedTruncs.count(ExtOpnd))
02314     return nullptr;
02315 
02316   // SExt or Trunc instructions.
02317   // Return the related handler.
02318   if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
02319       isa<ZExtInst>(ExtOpnd))
02320     return promoteOperandForTruncAndAnyExt;
02321 
02322   // Regular instruction.
02323   // Abort early if we will have to insert non-free instructions.
02324   if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
02325     return nullptr;
02326   return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
02327 }
02328 
02329 Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
02330     llvm::Instruction *SExt, TypePromotionTransaction &TPT,
02331     InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
02332     SmallVectorImpl<Instruction *> *Exts,
02333     SmallVectorImpl<Instruction *> *Truncs) {
02334   // By construction, the operand of SExt is an instruction. Otherwise we cannot
02335   // get through it and this method should not be called.
02336   Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
02337   Value *ExtVal = SExt;
02338   if (isa<ZExtInst>(SExtOpnd)) {
02339     // Replace s|zext(zext(opnd))
02340     // => zext(opnd).
02341     Value *ZExt =
02342         TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
02343     TPT.replaceAllUsesWith(SExt, ZExt);
02344     TPT.eraseInstruction(SExt);
02345     ExtVal = ZExt;
02346   } else {
02347     // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
02348     // => z|sext(opnd).
02349     TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
02350   }
02351   CreatedInsts = 0;
02352 
02353   // Remove dead code.
02354   if (SExtOpnd->use_empty())
02355     TPT.eraseInstruction(SExtOpnd);
02356 
02357   // Check if the extension is still needed.
02358   Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
02359   if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
02360     if (ExtInst && Exts)
02361       Exts->push_back(ExtInst);
02362     return ExtVal;
02363   }
02364 
02365   // At this point we have: ext ty opnd to ty.
02366   // Reassign the uses of ExtInst to the opnd and remove ExtInst.
02367   Value *NextVal = ExtInst->getOperand(0);
02368   TPT.eraseInstruction(ExtInst, NextVal);
02369   return NextVal;
02370 }
02371 
02372 Value *TypePromotionHelper::promoteOperandForOther(
02373     Instruction *Ext, TypePromotionTransaction &TPT,
02374     InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
02375     SmallVectorImpl<Instruction *> *Exts,
02376     SmallVectorImpl<Instruction *> *Truncs, bool IsSExt) {
02377   // By construction, the operand of Ext is an instruction. Otherwise we cannot
02378   // get through it and this method should not be called.
02379   Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
02380   CreatedInsts = 0;
02381   if (!ExtOpnd->hasOneUse()) {
02382     // ExtOpnd will be promoted.
02383     // All its uses, but Ext, will need to use a truncated value of the
02384     // promoted version.
02385     // Create the truncate now.
02386     Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
02387     if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
02388       ITrunc->removeFromParent();
02389       // Insert it just after the definition.
02390       ITrunc->insertAfter(ExtOpnd);
02391       if (Truncs)
02392         Truncs->push_back(ITrunc);
02393     }
02394 
02395     TPT.replaceAllUsesWith(ExtOpnd, Trunc);
02396     // Restore the operand of Ext (which has been replace by the previous call
02397     // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
02398     TPT.setOperand(Ext, 0, ExtOpnd);
02399   }
02400 
02401   // Get through the Instruction:
02402   // 1. Update its type.
02403   // 2. Replace the uses of Ext by Inst.
02404   // 3. Extend each operand that needs to be extended.
02405 
02406   // Remember the original type of the instruction before promotion.
02407   // This is useful to know that the high bits are sign extended bits.
02408   PromotedInsts.insert(std::pair<Instruction *, TypeIsSExt>(
02409       ExtOpnd, TypeIsSExt(ExtOpnd->getType(), IsSExt)));
02410   // Step #1.
02411   TPT.mutateType(ExtOpnd, Ext->getType());
02412   // Step #2.
02413   TPT.replaceAllUsesWith(Ext, ExtOpnd);
02414   // Step #3.
02415   Instruction *ExtForOpnd = Ext;
02416 
02417   DEBUG(dbgs() << "Propagate Ext to operands\n");
02418   for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
02419        ++OpIdx) {
02420     DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
02421     if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
02422         !shouldExtOperand(ExtOpnd, OpIdx)) {
02423       DEBUG(dbgs() << "No need to propagate\n");
02424       continue;
02425     }
02426     // Check if we can statically extend the operand.
02427     Value *Opnd = ExtOpnd->getOperand(OpIdx);
02428     if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
02429       DEBUG(dbgs() << "Statically extend\n");
02430       unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
02431       APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
02432                             : Cst->getValue().zext(BitWidth);
02433       TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
02434       continue;
02435     }
02436     // UndefValue are typed, so we have to statically sign extend them.
02437     if (isa<UndefValue>(Opnd)) {
02438       DEBUG(dbgs() << "Statically extend\n");
02439       TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
02440       continue;
02441     }
02442 
02443     // Otherwise we have to explicity sign extend the operand.
02444     // Check if Ext was reused to extend an operand.
02445     if (!ExtForOpnd) {
02446       // If yes, create a new one.
02447       DEBUG(dbgs() << "More operands to ext\n");
02448       Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
02449         : TPT.createZExt(Ext, Opnd, Ext->getType());
02450       if (!isa<Instruction>(ValForExtOpnd)) {
02451         TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
02452         continue;
02453       }
02454       ExtForOpnd = cast<Instruction>(ValForExtOpnd);
02455       ++CreatedInsts;
02456     }
02457     if (Exts)
02458       Exts->push_back(ExtForOpnd);
02459     TPT.setOperand(ExtForOpnd, 0, Opnd);
02460 
02461     // Move the sign extension before the insertion point.
02462     TPT.moveBefore(ExtForOpnd, ExtOpnd);
02463     TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
02464     // If more sext are required, new instructions will have to be created.
02465     ExtForOpnd = nullptr;
02466   }
02467   if (ExtForOpnd == Ext) {
02468     DEBUG(dbgs() << "Extension is useless now\n");
02469     TPT.eraseInstruction(Ext);
02470   }
02471   return ExtOpnd;
02472 }
02473 
02474 /// IsPromotionProfitable - Check whether or not promoting an instruction
02475 /// to a wider type was profitable.
02476 /// \p MatchedSize gives the number of instructions that have been matched
02477 /// in the addressing mode after the promotion was applied.
02478 /// \p SizeWithPromotion gives the number of created instructions for
02479 /// the promotion plus the number of instructions that have been
02480 /// matched in the addressing mode before the promotion.
02481 /// \p PromotedOperand is the value that has been promoted.
02482 /// \return True if the promotion is profitable, false otherwise.
02483 bool
02484 AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize,
02485                                              unsigned SizeWithPromotion,
02486                                              Value *PromotedOperand) const {
02487   // We folded less instructions than what we created to promote the operand.
02488   // This is not profitable.
02489   if (MatchedSize < SizeWithPromotion)
02490     return false;
02491   if (MatchedSize > SizeWithPromotion)
02492     return true;
02493   // The promotion is neutral but it may help folding the sign extension in
02494   // loads for instance.
02495   // Check that we did not create an illegal instruction.
02496   return isPromotedInstructionLegal(TLI, PromotedOperand);
02497 }
02498 
02499 /// MatchOperationAddr - Given an instruction or constant expr, see if we can
02500 /// fold the operation into the addressing mode.  If so, update the addressing
02501 /// mode and return true, otherwise return false without modifying AddrMode.
02502 /// If \p MovedAway is not NULL, it contains the information of whether or
02503 /// not AddrInst has to be folded into the addressing mode on success.
02504 /// If \p MovedAway == true, \p AddrInst will not be part of the addressing
02505 /// because it has been moved away.
02506 /// Thus AddrInst must not be added in the matched instructions.
02507 /// This state can happen when AddrInst is a sext, since it may be moved away.
02508 /// Therefore, AddrInst may not be valid when MovedAway is true and it must
02509 /// not be referenced anymore.
02510 bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
02511                                                unsigned Depth,
02512                                                bool *MovedAway) {
02513   // Avoid exponential behavior on extremely deep expression trees.
02514   if (Depth >= 5) return false;
02515 
02516   // By default, all matched instructions stay in place.
02517   if (MovedAway)
02518     *MovedAway = false;
02519 
02520   switch (Opcode) {
02521   case Instruction::PtrToInt:
02522     // PtrToInt is always a noop, as we know that the int type is pointer sized.
02523     return MatchAddr(AddrInst->getOperand(0), Depth);
02524   case Instruction::IntToPtr:
02525     // This inttoptr is a no-op if the integer type is pointer sized.
02526     if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
02527         TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace()))
02528       return MatchAddr(AddrInst->getOperand(0), Depth);
02529     return false;
02530   case Instruction::BitCast:
02531   case Instruction::AddrSpaceCast:
02532     // BitCast is always a noop, and we can handle it as long as it is
02533     // int->int or pointer->pointer (we don't want int<->fp or something).
02534     if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
02535          AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
02536         // Don't touch identity bitcasts.  These were probably put here by LSR,
02537         // and we don't want to mess around with them.  Assume it knows what it
02538         // is doing.
02539         AddrInst->getOperand(0)->getType() != AddrInst->getType())
02540       return MatchAddr(AddrInst->getOperand(0), Depth);
02541     return false;
02542   case Instruction::Add: {
02543     // Check to see if we can merge in the RHS then the LHS.  If so, we win.
02544     ExtAddrMode BackupAddrMode = AddrMode;
02545     unsigned OldSize = AddrModeInsts.size();
02546     // Start a transaction at this point.
02547     // The LHS may match but not the RHS.
02548     // Therefore, we need a higher level restoration point to undo partially
02549     // matched operation.
02550     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
02551         TPT.getRestorationPoint();
02552 
02553     if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
02554         MatchAddr(AddrInst->getOperand(0), Depth+1))
02555       return true;
02556 
02557     // Restore the old addr mode info.
02558     AddrMode = BackupAddrMode;
02559     AddrModeInsts.resize(OldSize);
02560     TPT.rollback(LastKnownGood);
02561 
02562     // Otherwise this was over-aggressive.  Try merging in the LHS then the RHS.
02563     if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
02564         MatchAddr(AddrInst->getOperand(1), Depth+1))
02565       return true;
02566 
02567     // Otherwise we definitely can't merge the ADD in.
02568     AddrMode = BackupAddrMode;
02569     AddrModeInsts.resize(OldSize);
02570     TPT.rollback(LastKnownGood);
02571     break;
02572   }
02573   //case Instruction::Or:
02574   // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
02575   //break;
02576   case Instruction::Mul:
02577   case Instruction::Shl: {
02578     // Can only handle X*C and X << C.
02579     ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
02580     if (!RHS)
02581       return false;
02582     int64_t Scale = RHS->getSExtValue();
02583     if (Opcode == Instruction::Shl)
02584       Scale = 1LL << Scale;
02585 
02586     return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
02587   }
02588   case Instruction::GetElementPtr: {
02589     // Scan the GEP.  We check it if it contains constant offsets and at most
02590     // one variable offset.
02591     int VariableOperand = -1;
02592     unsigned VariableScale = 0;
02593 
02594     int64_t ConstantOffset = 0;
02595     const DataLayout *TD = TLI.getDataLayout();
02596     gep_type_iterator GTI = gep_type_begin(AddrInst);
02597     for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
02598       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
02599         const StructLayout *SL = TD->getStructLayout(STy);
02600         unsigned Idx =
02601           cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
02602         ConstantOffset += SL->getElementOffset(Idx);
02603       } else {
02604         uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
02605         if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
02606           ConstantOffset += CI->getSExtValue()*TypeSize;
02607         } else if (TypeSize) {  // Scales of zero don't do anything.
02608           // We only allow one variable index at the moment.
02609           if (VariableOperand != -1)
02610             return false;
02611 
02612           // Remember the variable index.
02613           VariableOperand = i;
02614           VariableScale = TypeSize;
02615         }
02616       }
02617     }
02618 
02619     // A common case is for the GEP to only do a constant offset.  In this case,
02620     // just add it to the disp field and check validity.
02621     if (VariableOperand == -1) {
02622       AddrMode.BaseOffs += ConstantOffset;
02623       if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
02624         // Check to see if we can fold the base pointer in too.
02625         if (MatchAddr(AddrInst->getOperand(0), Depth+1))
02626           return true;
02627       }
02628       AddrMode.BaseOffs -= ConstantOffset;
02629       return false;
02630     }
02631 
02632     // Save the valid addressing mode in case we can't match.
02633     ExtAddrMode BackupAddrMode = AddrMode;
02634     unsigned OldSize = AddrModeInsts.size();
02635 
02636     // See if the scale and offset amount is valid for this target.
02637     AddrMode.BaseOffs += ConstantOffset;
02638 
02639     // Match the base operand of the GEP.
02640     if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
02641       // If it couldn't be matched, just stuff the value in a register.
02642       if (AddrMode.HasBaseReg) {
02643         AddrMode = BackupAddrMode;
02644         AddrModeInsts.resize(OldSize);
02645         return false;
02646       }
02647       AddrMode.HasBaseReg = true;
02648       AddrMode.BaseReg = AddrInst->getOperand(0);
02649     }
02650 
02651     // Match the remaining variable portion of the GEP.
02652     if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
02653                           Depth)) {
02654       // If it couldn't be matched, try stuffing the base into a register
02655       // instead of matching it, and retrying the match of the scale.
02656       AddrMode = BackupAddrMode;
02657       AddrModeInsts.resize(OldSize);
02658       if (AddrMode.HasBaseReg)
02659         return false;
02660       AddrMode.HasBaseReg = true;
02661       AddrMode.BaseReg = AddrInst->getOperand(0);
02662       AddrMode.BaseOffs += ConstantOffset;
02663       if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
02664                             VariableScale, Depth)) {
02665         // If even that didn't work, bail.
02666         AddrMode = BackupAddrMode;
02667         AddrModeInsts.resize(OldSize);
02668         return false;
02669       }
02670     }
02671 
02672     return true;
02673   }
02674   case Instruction::SExt:
02675   case Instruction::ZExt: {
02676     Instruction *Ext = dyn_cast<Instruction>(AddrInst);
02677     if (!Ext)
02678       return false;
02679 
02680     // Try to move this ext out of the way of the addressing mode.
02681     // Ask for a method for doing so.
02682     TypePromotionHelper::Action TPH =
02683         TypePromotionHelper::getAction(Ext, InsertedTruncs, TLI, PromotedInsts);
02684     if (!TPH)
02685       return false;
02686 
02687     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
02688         TPT.getRestorationPoint();
02689     unsigned CreatedInsts = 0;
02690     Value *PromotedOperand =
02691         TPH(Ext, TPT, PromotedInsts, CreatedInsts, nullptr, nullptr);
02692     // SExt has been moved away.
02693     // Thus either it will be rematched later in the recursive calls or it is
02694     // gone. Anyway, we must not fold it into the addressing mode at this point.
02695     // E.g.,
02696     // op = add opnd, 1
02697     // idx = ext op
02698     // addr = gep base, idx
02699     // is now:
02700     // promotedOpnd = ext opnd            <- no match here
02701     // op = promoted_add promotedOpnd, 1  <- match (later in recursive calls)
02702     // addr = gep base, op                <- match
02703     if (MovedAway)
02704       *MovedAway = true;
02705 
02706     assert(PromotedOperand &&
02707            "TypePromotionHelper should have filtered out those cases");
02708 
02709     ExtAddrMode BackupAddrMode = AddrMode;
02710     unsigned OldSize = AddrModeInsts.size();
02711 
02712     if (!MatchAddr(PromotedOperand, Depth) ||
02713         !IsPromotionProfitable(AddrModeInsts.size(), OldSize + CreatedInsts,
02714                                PromotedOperand)) {
02715       AddrMode = BackupAddrMode;
02716       AddrModeInsts.resize(OldSize);
02717       DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
02718       TPT.rollback(LastKnownGood);
02719       return false;
02720     }
02721     return true;
02722   }
02723   }
02724   return false;
02725 }
02726 
02727 /// MatchAddr - If we can, try to add the value of 'Addr' into the current
02728 /// addressing mode.  If Addr can't be added to AddrMode this returns false and
02729 /// leaves AddrMode unmodified.  This assumes that Addr is either a pointer type
02730 /// or intptr_t for the target.
02731 ///
02732 bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
02733   // Start a transaction at this point that we will rollback if the matching
02734   // fails.
02735   TypePromotionTransaction::ConstRestorationPt LastKnownGood =
02736       TPT.getRestorationPoint();
02737   if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
02738     // Fold in immediates if legal for the target.
02739     AddrMode.BaseOffs += CI->getSExtValue();
02740     if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
02741       return true;
02742     AddrMode.BaseOffs -= CI->getSExtValue();
02743   } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
02744     // If this is a global variable, try to fold it into the addressing mode.
02745     if (!AddrMode.BaseGV) {
02746       AddrMode.BaseGV = GV;
02747       if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
02748         return true;
02749       AddrMode.BaseGV = nullptr;
02750     }
02751   } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
02752     ExtAddrMode BackupAddrMode = AddrMode;
02753     unsigned OldSize = AddrModeInsts.size();
02754 
02755     // Check to see if it is possible to fold this operation.
02756     bool MovedAway = false;
02757     if (MatchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
02758       // This instruction may have been move away. If so, there is nothing
02759       // to check here.
02760       if (MovedAway)
02761         return true;
02762       // Okay, it's possible to fold this.  Check to see if it is actually
02763       // *profitable* to do so.  We use a simple cost model to avoid increasing
02764       // register pressure too much.
02765       if (I->hasOneUse() ||
02766           IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
02767         AddrModeInsts.push_back(I);
02768         return true;
02769       }
02770 
02771       // It isn't profitable to do this, roll back.
02772       //cerr << "NOT FOLDING: " << *I;
02773       AddrMode = BackupAddrMode;
02774       AddrModeInsts.resize(OldSize);
02775       TPT.rollback(LastKnownGood);
02776     }
02777   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
02778     if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
02779       return true;
02780     TPT.rollback(LastKnownGood);
02781   } else if (isa<ConstantPointerNull>(Addr)) {
02782     // Null pointer gets folded without affecting the addressing mode.
02783     return true;
02784   }
02785 
02786   // Worse case, the target should support [reg] addressing modes. :)
02787   if (!AddrMode.HasBaseReg) {
02788     AddrMode.HasBaseReg = true;
02789     AddrMode.BaseReg = Addr;
02790     // Still check for legality in case the target supports [imm] but not [i+r].
02791     if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
02792       return true;
02793     AddrMode.HasBaseReg = false;
02794     AddrMode.BaseReg = nullptr;
02795   }
02796 
02797   // If the base register is already taken, see if we can do [r+r].
02798   if (AddrMode.Scale == 0) {
02799     AddrMode.Scale = 1;
02800     AddrMode.ScaledReg = Addr;
02801     if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
02802       return true;
02803     AddrMode.Scale = 0;
02804     AddrMode.ScaledReg = nullptr;
02805   }
02806   // Couldn't match.
02807   TPT.rollback(LastKnownGood);
02808   return false;
02809 }
02810 
02811 /// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
02812 /// inline asm call are due to memory operands.  If so, return true, otherwise
02813 /// return false.
02814 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
02815                                     const TargetLowering &TLI) {
02816   TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
02817   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
02818     TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
02819 
02820     // Compute the constraint code and ConstraintType to use.
02821     TLI.ComputeConstraintToUse(OpInfo, SDValue());
02822 
02823     // If this asm operand is our Value*, and if it isn't an indirect memory
02824     // operand, we can't fold it!
02825     if (OpInfo.CallOperandVal == OpVal &&
02826         (OpInfo.ConstraintType != TargetLowering::C_Memory ||
02827          !OpInfo.isIndirect))
02828       return false;
02829   }
02830 
02831   return true;
02832 }
02833 
02834 /// FindAllMemoryUses - Recursively walk all the uses of I until we find a
02835 /// memory use.  If we find an obviously non-foldable instruction, return true.
02836 /// Add the ultimately found memory instructions to MemoryUses.
02837 static bool FindAllMemoryUses(Instruction *I,
02838                 SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
02839                               SmallPtrSetImpl<Instruction*> &ConsideredInsts,
02840                               const TargetLowering &TLI) {
02841   // If we already considered this instruction, we're done.
02842   if (!ConsideredInsts.insert(I).second)
02843     return false;
02844 
02845   // If this is an obviously unfoldable instruction, bail out.
02846   if (!MightBeFoldableInst(I))
02847     return true;
02848 
02849   // Loop over all the uses, recursively processing them.
02850   for (Use &U : I->uses()) {
02851     Instruction *UserI = cast<Instruction>(U.getUser());
02852 
02853     if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
02854       MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
02855       continue;
02856     }
02857 
02858     if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
02859       unsigned opNo = U.getOperandNo();
02860       if (opNo == 0) return true; // Storing addr, not into addr.
02861       MemoryUses.push_back(std::make_pair(SI, opNo));
02862       continue;
02863     }
02864 
02865     if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
02866       InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
02867       if (!IA) return true;
02868 
02869       // If this is a memory operand, we're cool, otherwise bail out.
02870       if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
02871         return true;
02872       continue;
02873     }
02874 
02875     if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI))
02876       return true;
02877   }
02878 
02879   return false;
02880 }
02881 
02882 /// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
02883 /// the use site that we're folding it into.  If so, there is no cost to
02884 /// include it in the addressing mode.  KnownLive1 and KnownLive2 are two values
02885 /// that we know are live at the instruction already.
02886 bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
02887                                                    Value *KnownLive2) {
02888   // If Val is either of the known-live values, we know it is live!
02889   if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
02890     return true;
02891 
02892   // All values other than instructions and arguments (e.g. constants) are live.
02893   if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
02894 
02895   // If Val is a constant sized alloca in the entry block, it is live, this is
02896   // true because it is just a reference to the stack/frame pointer, which is
02897   // live for the whole function.
02898   if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
02899     if (AI->isStaticAlloca())
02900       return true;
02901 
02902   // Check to see if this value is already used in the memory instruction's
02903   // block.  If so, it's already live into the block at the very least, so we
02904   // can reasonably fold it.
02905   return Val->isUsedInBasicBlock(MemoryInst->getParent());
02906 }
02907 
02908 /// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
02909 /// mode of the machine to fold the specified instruction into a load or store
02910 /// that ultimately uses it.  However, the specified instruction has multiple
02911 /// uses.  Given this, it may actually increase register pressure to fold it
02912 /// into the load.  For example, consider this code:
02913 ///
02914 ///     X = ...
02915 ///     Y = X+1
02916 ///     use(Y)   -> nonload/store
02917 ///     Z = Y+1
02918 ///     load Z
02919 ///
02920 /// In this case, Y has multiple uses, and can be folded into the load of Z
02921 /// (yielding load [X+2]).  However, doing this will cause both "X" and "X+1" to
02922 /// be live at the use(Y) line.  If we don't fold Y into load Z, we use one
02923 /// fewer register.  Since Y can't be folded into "use(Y)" we don't increase the
02924 /// number of computations either.
02925 ///
02926 /// Note that this (like most of CodeGenPrepare) is just a rough heuristic.  If
02927 /// X was live across 'load Z' for other reasons, we actually *would* want to
02928 /// fold the addressing mode in the Z case.  This would make Y die earlier.
02929 bool AddressingModeMatcher::
02930 IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
02931                                      ExtAddrMode &AMAfter) {
02932   if (IgnoreProfitability) return true;
02933 
02934   // AMBefore is the addressing mode before this instruction was folded into it,
02935   // and AMAfter is the addressing mode after the instruction was folded.  Get
02936   // the set of registers referenced by AMAfter and subtract out those
02937   // referenced by AMBefore: this is the set of values which folding in this
02938   // address extends the lifetime of.
02939   //
02940   // Note that there are only two potential values being referenced here,
02941   // BaseReg and ScaleReg (global addresses are always available, as are any
02942   // folded immediates).
02943   Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
02944 
02945   // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
02946   // lifetime wasn't extended by adding this instruction.
02947   if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
02948     BaseReg = nullptr;
02949   if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
02950     ScaledReg = nullptr;
02951 
02952   // If folding this instruction (and it's subexprs) didn't extend any live
02953   // ranges, we're ok with it.
02954   if (!BaseReg && !ScaledReg)
02955     return true;
02956 
02957   // If all uses of this instruction are ultimately load/store/inlineasm's,
02958   // check to see if their addressing modes will include this instruction.  If
02959   // so, we can fold it into all uses, so it doesn't matter if it has multiple
02960   // uses.
02961   SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
02962   SmallPtrSet<Instruction*, 16> ConsideredInsts;
02963   if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
02964     return false;  // Has a non-memory, non-foldable use!
02965 
02966   // Now that we know that all uses of this instruction are part of a chain of
02967   // computation involving only operations that could theoretically be folded
02968   // into a memory use, loop over each of these uses and see if they could
02969   // *actually* fold the instruction.
02970   SmallVector<Instruction*, 32> MatchedAddrModeInsts;
02971   for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
02972     Instruction *User = MemoryUses[i].first;
02973     unsigned OpNo = MemoryUses[i].second;
02974 
02975     // Get the access type of this use.  If the use isn't a pointer, we don't
02976     // know what it accesses.
02977     Value *Address = User->getOperand(OpNo);
02978     if (!Address->getType()->isPointerTy())
02979       return false;
02980     Type *AddressAccessTy = Address->getType()->getPointerElementType();
02981 
02982     // Do a match against the root of this address, ignoring profitability. This
02983     // will tell us if the addressing mode for the memory operation will
02984     // *actually* cover the shared instruction.
02985     ExtAddrMode Result;
02986     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
02987         TPT.getRestorationPoint();
02988     AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
02989                                   MemoryInst, Result, InsertedTruncs,
02990                                   PromotedInsts, TPT);
02991     Matcher.IgnoreProfitability = true;
02992     bool Success = Matcher.MatchAddr(Address, 0);
02993     (void)Success; assert(Success && "Couldn't select *anything*?");
02994 
02995     // The match was to check the profitability, the changes made are not
02996     // part of the original matcher. Therefore, they should be dropped
02997     // otherwise the original matcher will not present the right state.
02998     TPT.rollback(LastKnownGood);
02999 
03000     // If the match didn't cover I, then it won't be shared by it.
03001     if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
03002                   I) == MatchedAddrModeInsts.end())
03003       return false;
03004 
03005     MatchedAddrModeInsts.clear();
03006   }
03007 
03008   return true;
03009 }
03010 
03011 } // end anonymous namespace
03012 
03013 /// IsNonLocalValue - Return true if the specified values are defined in a
03014 /// different basic block than BB.
03015 static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
03016   if (Instruction *I = dyn_cast<Instruction>(V))
03017     return I->getParent() != BB;
03018   return false;
03019 }
03020 
03021 /// OptimizeMemoryInst - Load and Store Instructions often have
03022 /// addressing modes that can do significant amounts of computation.  As such,
03023 /// instruction selection will try to get the load or store to do as much
03024 /// computation as possible for the program.  The problem is that isel can only
03025 /// see within a single block.  As such, we sink as much legal addressing mode
03026 /// stuff into the block as possible.
03027 ///
03028 /// This method is used to optimize both load/store and inline asms with memory
03029 /// operands.
03030 bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
03031                                         Type *AccessTy) {
03032   Value *Repl = Addr;
03033 
03034   // Try to collapse single-value PHI nodes.  This is necessary to undo
03035   // unprofitable PRE transformations.
03036   SmallVector<Value*, 8> worklist;
03037   SmallPtrSet<Value*, 16> Visited;
03038   worklist.push_back(Addr);
03039 
03040   // Use a worklist to iteratively look through PHI nodes, and ensure that
03041   // the addressing mode obtained from the non-PHI roots of the graph
03042   // are equivalent.
03043   Value *Consensus = nullptr;
03044   unsigned NumUsesConsensus = 0;
03045   bool IsNumUsesConsensusValid = false;
03046   SmallVector<Instruction*, 16> AddrModeInsts;
03047   ExtAddrMode AddrMode;
03048   TypePromotionTransaction TPT;
03049   TypePromotionTransaction::ConstRestorationPt LastKnownGood =
03050       TPT.getRestorationPoint();
03051   while (!worklist.empty()) {
03052     Value *V = worklist.back();
03053     worklist.pop_back();
03054 
03055     // Break use-def graph loops.
03056     if (!Visited.insert(V).second) {
03057       Consensus = nullptr;
03058       break;
03059     }
03060 
03061     // For a PHI node, push all of its incoming values.
03062     if (PHINode *P = dyn_cast<PHINode>(V)) {
03063       for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
03064         worklist.push_back(P->getIncomingValue(i));
03065       continue;
03066     }
03067 
03068     // For non-PHIs, determine the addressing mode being computed.
03069     SmallVector<Instruction*, 16> NewAddrModeInsts;
03070     ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
03071         V, AccessTy, MemoryInst, NewAddrModeInsts, *TLI, InsertedTruncsSet,
03072         PromotedInsts, TPT);
03073 
03074     // This check is broken into two cases with very similar code to avoid using
03075     // getNumUses() as much as possible. Some values have a lot of uses, so
03076     // calling getNumUses() unconditionally caused a significant compile-time
03077     // regression.
03078     if (!Consensus) {
03079       Consensus = V;
03080       AddrMode = NewAddrMode;
03081       AddrModeInsts = NewAddrModeInsts;
03082       continue;
03083     } else if (NewAddrMode == AddrMode) {
03084       if (!IsNumUsesConsensusValid) {
03085         NumUsesConsensus = Consensus->getNumUses();
03086         IsNumUsesConsensusValid = true;
03087       }
03088 
03089       // Ensure that the obtained addressing mode is equivalent to that obtained
03090       // for all other roots of the PHI traversal.  Also, when choosing one
03091       // such root as representative, select the one with the most uses in order
03092       // to keep the cost modeling heuristics in AddressingModeMatcher
03093       // applicable.
03094       unsigned NumUses = V->getNumUses();
03095       if (NumUses > NumUsesConsensus) {
03096         Consensus = V;
03097         NumUsesConsensus = NumUses;
03098         AddrModeInsts = NewAddrModeInsts;
03099       }
03100       continue;
03101     }
03102 
03103     Consensus = nullptr;
03104     break;
03105   }
03106 
03107   // If the addressing mode couldn't be determined, or if multiple different
03108   // ones were determined, bail out now.
03109   if (!Consensus) {
03110     TPT.rollback(LastKnownGood);
03111     return false;
03112   }
03113   TPT.commit();
03114 
03115   // Check to see if any of the instructions supersumed by this addr mode are
03116   // non-local to I's BB.
03117   bool AnyNonLocal = false;
03118   for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
03119     if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
03120       AnyNonLocal = true;
03121       break;
03122     }
03123   }
03124 
03125   // If all the instructions matched are already in this BB, don't do anything.
03126   if (!AnyNonLocal) {
03127     DEBUG(dbgs() << "CGP: Found      local addrmode: " << AddrMode << "\n");
03128     return false;
03129   }
03130 
03131   // Insert this computation right after this user.  Since our caller is
03132   // scanning from the top of the BB to the bottom, reuse of the expr are
03133   // guaranteed to happen later.
03134   IRBuilder<> Builder(MemoryInst);
03135 
03136   // Now that we determined the addressing expression we want to use and know
03137   // that we have to sink it into this block.  Check to see if we have already
03138   // done this for some other load/store instr in this block.  If so, reuse the
03139   // computation.
03140   Value *&SunkAddr = SunkAddrs[Addr];
03141   if (SunkAddr) {
03142     DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
03143                  << *MemoryInst << "\n");
03144     if (SunkAddr->getType() != Addr->getType())
03145       SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
03146   } else if (AddrSinkUsingGEPs ||
03147              (!AddrSinkUsingGEPs.getNumOccurrences() && TM &&
03148               TM->getSubtargetImpl(*MemoryInst->getParent()->getParent())
03149                   ->useAA())) {
03150     // By default, we use the GEP-based method when AA is used later. This
03151     // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
03152     DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
03153                  << *MemoryInst << "\n");
03154     Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
03155     Value *ResultPtr = nullptr, *ResultIndex = nullptr;
03156 
03157     // First, find the pointer.
03158     if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
03159       ResultPtr = AddrMode.BaseReg;
03160       AddrMode.BaseReg = nullptr;
03161     }
03162 
03163     if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
03164       // We can't add more than one pointer together, nor can we scale a
03165       // pointer (both of which seem meaningless).
03166       if (ResultPtr || AddrMode.Scale != 1)
03167         return false;
03168 
03169       ResultPtr = AddrMode.ScaledReg;
03170       AddrMode.Scale = 0;
03171     }
03172 
03173     if (AddrMode.BaseGV) {
03174       if (ResultPtr)
03175         return false;
03176 
03177       ResultPtr = AddrMode.BaseGV;
03178     }
03179 
03180     // If the real base value actually came from an inttoptr, then the matcher
03181     // will look through it and provide only the integer value. In that case,
03182     // use it here.
03183     if (!ResultPtr && AddrMode.BaseReg) {
03184       ResultPtr =
03185         Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr");
03186       AddrMode.BaseReg = nullptr;
03187     } else if (!ResultPtr && AddrMode.Scale == 1) {
03188       ResultPtr =
03189         Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr");
03190       AddrMode.Scale = 0;
03191     }
03192 
03193     if (!ResultPtr &&
03194         !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
03195       SunkAddr = Constant::getNullValue(Addr->getType());
03196     } else if (!ResultPtr) {
03197       return false;
03198     } else {
03199       Type *I8PtrTy =
03200         Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
03201 
03202       // Start with the base register. Do this first so that subsequent address
03203       // matching finds it last, which will prevent it from trying to match it
03204       // as the scaled value in case it happens to be a mul. That would be
03205       // problematic if we've sunk a different mul for the scale, because then
03206       // we'd end up sinking both muls.
03207       if (AddrMode.BaseReg) {
03208         Value *V = AddrMode.BaseReg;
03209         if (V->getType() != IntPtrTy)
03210           V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
03211 
03212         ResultIndex = V;
03213       }
03214 
03215       // Add the scale value.
03216       if (AddrMode.Scale) {
03217         Value *V = AddrMode.ScaledReg;
03218         if (V->getType() == IntPtrTy) {
03219           // done.
03220         } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
03221                    cast<IntegerType>(V->getType())->getBitWidth()) {
03222           V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
03223         } else {
03224           // It is only safe to sign extend the BaseReg if we know that the math
03225           // required to create it did not overflow before we extend it. Since
03226           // the original IR value was tossed in favor of a constant back when
03227           // the AddrMode was created we need to bail out gracefully if widths
03228           // do not match instead of extending it.
03229           Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex);
03230           if (I && (ResultIndex != AddrMode.BaseReg))
03231             I->eraseFromParent();
03232           return false;
03233         }
03234 
03235         if (AddrMode.Scale != 1)
03236           V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
03237                                 "sunkaddr");
03238         if (ResultIndex)
03239           ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
03240         else
03241           ResultIndex = V;
03242       }
03243 
03244       // Add in the Base Offset if present.
03245       if (AddrMode.BaseOffs) {
03246         Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
03247         if (ResultIndex) {
03248           // We need to add this separately from the scale above to help with
03249           // SDAG consecutive load/store merging.
03250           if (ResultPtr->getType() != I8PtrTy)
03251             ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
03252           ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
03253         }
03254 
03255         ResultIndex = V;
03256       }
03257 
03258       if (!ResultIndex) {
03259         SunkAddr = ResultPtr;
03260       } else {
03261         if (ResultPtr->getType() != I8PtrTy)
03262           ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
03263         SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
03264       }
03265 
03266       if (SunkAddr->getType() != Addr->getType())
03267         SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
03268     }
03269   } else {
03270     DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
03271                  << *MemoryInst << "\n");
03272     Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
03273     Value *Result = nullptr;
03274 
03275     // Start with the base register. Do this first so that subsequent address
03276     // matching finds it last, which will prevent it from trying to match it
03277     // as the scaled value in case it happens to be a mul. That would be
03278     // problematic if we've sunk a different mul for the scale, because then
03279     // we'd end up sinking both muls.
03280     if (AddrMode.BaseReg) {
03281       Value *V = AddrMode.BaseReg;
03282       if (V->getType()->isPointerTy())
03283         V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
03284       if (V->getType() != IntPtrTy)
03285         V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
03286       Result = V;
03287     }
03288 
03289     // Add the scale value.
03290     if (AddrMode.Scale) {
03291       Value *V = AddrMode.ScaledReg;
03292       if (V->getType() == IntPtrTy) {
03293         // done.
03294       } else if (V->getType()->isPointerTy()) {
03295         V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
03296       } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
03297                  cast<IntegerType>(V->getType())->getBitWidth()) {
03298         V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
03299       } else {
03300         // It is only safe to sign extend the BaseReg if we know that the math
03301         // required to create it did not overflow before we extend it. Since
03302         // the original IR value was tossed in favor of a constant back when
03303         // the AddrMode was created we need to bail out gracefully if widths
03304         // do not match instead of extending it.
03305         Instruction *I = dyn_cast_or_null<Instruction>(Result);
03306         if (I && (Result != AddrMode.BaseReg))
03307           I->eraseFromParent();
03308         return false;
03309       }
03310       if (AddrMode.Scale != 1)
03311         V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
03312                               "sunkaddr");
03313       if (Result)
03314         Result = Builder.CreateAdd(Result, V, "sunkaddr");
03315       else
03316         Result = V;
03317     }
03318 
03319     // Add in the BaseGV if present.
03320     if (AddrMode.BaseGV) {
03321       Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
03322       if (Result)
03323         Result = Builder.CreateAdd(Result, V, "sunkaddr");
03324       else
03325         Result = V;
03326     }
03327 
03328     // Add in the Base Offset if present.
03329     if (AddrMode.BaseOffs) {
03330       Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
03331       if (Result)
03332         Result = Builder.CreateAdd(Result, V, "sunkaddr");
03333       else
03334         Result = V;
03335     }
03336 
03337     if (!Result)
03338       SunkAddr = Constant::getNullValue(Addr->getType());
03339     else
03340       SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
03341   }
03342 
03343   MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
03344 
03345   // If we have no uses, recursively delete the value and all dead instructions
03346   // using it.
03347   if (Repl->use_empty()) {
03348     // This can cause recursive deletion, which can invalidate our iterator.
03349     // Use a WeakVH to hold onto it in case this happens.
03350     WeakVH IterHandle(CurInstIterator);
03351     BasicBlock *BB = CurInstIterator->getParent();
03352 
03353     RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
03354 
03355     if (IterHandle != CurInstIterator) {
03356       // If the iterator instruction was recursively deleted, start over at the
03357       // start of the block.
03358       CurInstIterator = BB->begin();
03359       SunkAddrs.clear();
03360     }
03361   }
03362   ++NumMemoryInsts;
03363   return true;
03364 }
03365 
03366 /// OptimizeInlineAsmInst - If there are any memory operands, use
03367 /// OptimizeMemoryInst to sink their address computing into the block when
03368 /// possible / profitable.
03369 bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
03370   bool MadeChange = false;
03371 
03372   TargetLowering::AsmOperandInfoVector
03373     TargetConstraints = TLI->ParseConstraints(CS);
03374   unsigned ArgNo = 0;
03375   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
03376     TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
03377 
03378     // Compute the constraint code and ConstraintType to use.
03379     TLI->ComputeConstraintToUse(OpInfo, SDValue());
03380 
03381     if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
03382         OpInfo.isIndirect) {
03383       Value *OpVal = CS->getArgOperand(ArgNo++);
03384       MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
03385     } else if (OpInfo.Type == InlineAsm::isInput)
03386       ArgNo++;
03387   }
03388 
03389   return MadeChange;
03390 }
03391 
03392 /// \brief Check if all the uses of \p Inst are equivalent (or free) zero or
03393 /// sign extensions.
03394 static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
03395   assert(!Inst->use_empty() && "Input must have at least one use");
03396   const Instruction *FirstUser = cast<Instruction>(*Inst->user_begin());
03397   bool IsSExt = isa<SExtInst>(FirstUser);
03398   Type *ExtTy = FirstUser->getType();
03399   for (const User *U : Inst->users()) {
03400     const Instruction *UI = cast<Instruction>(U);
03401     if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
03402       return false;
03403     Type *CurTy = UI->getType();
03404     // Same input and output types: Same instruction after CSE.
03405     if (CurTy == ExtTy)
03406       continue;
03407 
03408     // If IsSExt is true, we are in this situation:
03409     // a = Inst
03410     // b = sext ty1 a to ty2
03411     // c = sext ty1 a to ty3
03412     // Assuming ty2 is shorter than ty3, this could be turned into:
03413     // a = Inst
03414     // b = sext ty1 a to ty2
03415     // c = sext ty2 b to ty3
03416     // However, the last sext is not free.
03417     if (IsSExt)
03418       return false;
03419 
03420     // This is a ZExt, maybe this is free to extend from one type to another.
03421     // In that case, we would not account for a different use.
03422     Type *NarrowTy;
03423     Type *LargeTy;
03424     if (ExtTy->getScalarType()->getIntegerBitWidth() >
03425         CurTy->getScalarType()->getIntegerBitWidth()) {
03426       NarrowTy = CurTy;
03427       LargeTy = ExtTy;
03428     } else {
03429       NarrowTy = ExtTy;
03430       LargeTy = CurTy;
03431     }
03432 
03433     if (!TLI.isZExtFree(NarrowTy, LargeTy))
03434       return false;
03435   }
03436   // All uses are the same or can be derived from one another for free.
03437   return true;
03438 }
03439 
03440 /// \brief Try to form ExtLd by promoting \p Exts until they reach a
03441 /// load instruction.
03442 /// If an ext(load) can be formed, it is returned via \p LI for the load
03443 /// and \p Inst for the extension.
03444 /// Otherwise LI == nullptr and Inst == nullptr.
03445 /// When some promotion happened, \p TPT contains the proper state to
03446 /// revert them.
03447 ///
03448 /// \return true when promoting was necessary to expose the ext(load)
03449 /// opportunity, false otherwise.
03450 ///
03451 /// Example:
03452 /// \code
03453 /// %ld = load i32* %addr
03454 /// %add = add nuw i32 %ld, 4
03455 /// %zext = zext i32 %add to i64
03456 /// \endcode
03457 /// =>
03458 /// \code
03459 /// %ld = load i32* %addr
03460 /// %zext = zext i32 %ld to i64
03461 /// %add = add nuw i64 %zext, 4
03462 /// \encode
03463 /// Thanks to the promotion, we can match zext(load i32*) to i64.
03464 bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
03465                                     LoadInst *&LI, Instruction *&Inst,
03466                                     const SmallVectorImpl<Instruction *> &Exts,
03467                                     unsigned CreatedInsts = 0) {
03468   // Iterate over all the extensions to see if one form an ext(load).
03469   for (auto I : Exts) {
03470     // Check if we directly have ext(load).
03471     if ((LI = dyn_cast<LoadInst>(I->getOperand(0)))) {
03472       Inst = I;
03473       // No promotion happened here.
03474       return false;
03475     }
03476     // Check whether or not we want to do any promotion.
03477     if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
03478       continue;
03479     // Get the action to perform the promotion.
03480     TypePromotionHelper::Action TPH = TypePromotionHelper::getAction(
03481         I, InsertedTruncsSet, *TLI, PromotedInsts);
03482     // Check if we can promote.
03483     if (!TPH)
03484       continue;
03485     // Save the current state.
03486     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
03487         TPT.getRestorationPoint();
03488     SmallVector<Instruction *, 4> NewExts;
03489     unsigned NewCreatedInsts = 0;
03490     // Promote.
03491     Value *PromotedVal =
03492         TPH(I, TPT, PromotedInsts, NewCreatedInsts, &NewExts, nullptr);
03493     assert(PromotedVal &&
03494            "TypePromotionHelper should have filtered out those cases");
03495 
03496     // We would be able to merge only one extension in a load.
03497     // Therefore, if we have more than 1 new extension we heuristically
03498     // cut this search path, because it means we degrade the code quality.
03499     // With exactly 2, the transformation is neutral, because we will merge
03500     // one extension but leave one. However, we optimistically keep going,
03501     // because the new extension may be removed too.
03502     unsigned TotalCreatedInsts = CreatedInsts + NewCreatedInsts;
03503     if (!StressExtLdPromotion &&
03504         (TotalCreatedInsts > 1 ||
03505          !isPromotedInstructionLegal(*TLI, PromotedVal))) {
03506       // The promotion is not profitable, rollback to the previous state.
03507       TPT.rollback(LastKnownGood);
03508       continue;
03509     }
03510     // The promotion is profitable.
03511     // Check if it exposes an ext(load).
03512     (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInsts);
03513     if (LI && (StressExtLdPromotion || NewCreatedInsts == 0 ||
03514                // If we have created a new extension, i.e., now we have two
03515                // extensions. We must make sure one of them is merged with
03516                // the load, otherwise we may degrade the code quality.
03517                (LI->hasOneUse() || hasSameExtUse(LI, *TLI))))
03518       // Promotion happened.
03519       return true;
03520     // If this does not help to expose an ext(load) then, rollback.
03521     TPT.rollback(LastKnownGood);
03522   }
03523   // None of the extension can form an ext(load).
03524   LI = nullptr;
03525   Inst = nullptr;
03526   return false;
03527 }
03528 
03529 /// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
03530 /// basic block as the load, unless conditions are unfavorable. This allows
03531 /// SelectionDAG to fold the extend into the load.
03532 /// \p I[in/out] the extension may be modified during the process if some
03533 /// promotions apply.
03534 ///
03535 bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
03536   // Try to promote a chain of computation if it allows to form
03537   // an extended load.
03538   TypePromotionTransaction TPT;
03539   TypePromotionTransaction::ConstRestorationPt LastKnownGood =
03540     TPT.getRestorationPoint();
03541   SmallVector<Instruction *, 1> Exts;
03542   Exts.push_back(I);
03543   // Look for a load being extended.
03544   LoadInst *LI = nullptr;
03545   Instruction *OldExt = I;
03546   bool HasPromoted = ExtLdPromotion(TPT, LI, I, Exts);
03547   if (!LI || !I) {
03548     assert(!HasPromoted && !LI && "If we did not match any load instruction "
03549                                   "the code must remain the same");
03550     I = OldExt;
03551     return false;
03552   }
03553 
03554   // If they're already in the same block, there's nothing to do.
03555   // Make the cheap checks first if we did not promote.
03556   // If we promoted, we need to check if it is indeed profitable.
03557   if (!HasPromoted && LI->getParent() == I->getParent())
03558     return false;
03559 
03560   EVT VT = TLI->getValueType(I->getType());
03561   EVT LoadVT = TLI->getValueType(LI->getType());
03562 
03563   // If the load has other users and the truncate is not free, this probably
03564   // isn't worthwhile.
03565   if (!LI->hasOneUse() && TLI &&
03566       (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
03567       !TLI->isTruncateFree(I->getType(), LI->getType())) {
03568     I = OldExt;
03569     TPT.rollback(LastKnownGood);
03570     return false;
03571   }
03572 
03573   // Check whether the target supports casts folded into loads.
03574   unsigned LType;
03575   if (isa<ZExtInst>(I))
03576     LType = ISD::ZEXTLOAD;
03577   else {
03578     assert(isa<SExtInst>(I) && "Unexpected ext type!");
03579     LType = ISD::SEXTLOAD;
03580   }
03581   if (TLI && !TLI->isLoadExtLegal(LType, VT, LoadVT)) {
03582     I = OldExt;
03583     TPT.rollback(LastKnownGood);
03584     return false;
03585   }
03586 
03587   // Move the extend into the same block as the load, so that SelectionDAG
03588   // can fold it.
03589   TPT.commit();
03590   I->removeFromParent();
03591   I->insertAfter(LI);
03592   ++NumExtsMoved;
03593   return true;
03594 }
03595 
03596 bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
03597   BasicBlock *DefBB = I->getParent();
03598 
03599   // If the result of a {s|z}ext and its source are both live out, rewrite all
03600   // other uses of the source with result of extension.
03601   Value *Src = I->getOperand(0);
03602   if (Src->hasOneUse())
03603     return false;
03604 
03605   // Only do this xform if truncating is free.
03606   if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
03607     return false;
03608 
03609   // Only safe to perform the optimization if the source is also defined in
03610   // this block.
03611   if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
03612     return false;
03613 
03614   bool DefIsLiveOut = false;
03615   for (User *U : I->users()) {
03616     Instruction *UI = cast<Instruction>(U);
03617 
03618     // Figure out which BB this ext is used in.
03619     BasicBlock *UserBB = UI->getParent();
03620     if (UserBB == DefBB) continue;
03621     DefIsLiveOut = true;
03622     break;
03623   }
03624   if (!DefIsLiveOut)
03625     return false;
03626 
03627   // Make sure none of the uses are PHI nodes.
03628   for (User *U : Src->users()) {
03629     Instruction *UI = cast<Instruction>(U);
03630     BasicBlock *UserBB = UI->getParent();
03631     if (UserBB == DefBB) continue;
03632     // Be conservative. We don't want this xform to end up introducing
03633     // reloads just before load / store instructions.
03634     if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
03635       return false;
03636   }
03637 
03638   // InsertedTruncs - Only insert one trunc in each block once.
03639   DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
03640 
03641   bool MadeChange = false;
03642   for (Use &U : Src->uses()) {
03643     Instruction *User = cast<Instruction>(U.getUser());
03644 
03645     // Figure out which BB this ext is used in.
03646     BasicBlock *UserBB = User->getParent();
03647     if (UserBB == DefBB) continue;
03648 
03649     // Both src and def are live in this block. Rewrite the use.
03650     Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
03651 
03652     if (!InsertedTrunc) {
03653       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
03654       InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
03655       InsertedTruncsSet.insert(InsertedTrunc);
03656     }
03657 
03658     // Replace a use of the {s|z}ext source with a use of the result.
03659     U = InsertedTrunc;
03660     ++NumExtUses;
03661     MadeChange = true;
03662   }
03663 
03664   return MadeChange;
03665 }
03666 
03667 /// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be
03668 /// turned into an explicit branch.
03669 static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
03670   // FIXME: This should use the same heuristics as IfConversion to determine
03671   // whether a select is better represented as a branch.  This requires that
03672   // branch probability metadata is preserved for the select, which is not the
03673   // case currently.
03674 
03675   CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
03676 
03677   // If the branch is predicted right, an out of order CPU can avoid blocking on
03678   // the compare.  Emit cmovs on compares with a memory operand as branches to
03679   // avoid stalls on the load from memory.  If the compare has more than one use
03680   // there's probably another cmov or setcc around so it's not worth emitting a
03681   // branch.
03682   if (!Cmp)
03683     return false;
03684 
03685   Value *CmpOp0 = Cmp->getOperand(0);
03686   Value *CmpOp1 = Cmp->getOperand(1);
03687 
03688   // We check that the memory operand has one use to avoid uses of the loaded
03689   // value directly after the compare, making branches unprofitable.
03690   return Cmp->hasOneUse() &&
03691          ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
03692           (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()));
03693 }
03694 
03695 
03696 /// If we have a SelectInst that will likely profit from branch prediction,
03697 /// turn it into a branch.
03698 bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
03699   bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
03700 
03701   // Can we convert the 'select' to CF ?
03702   if (DisableSelectToBranch || OptSize || !TLI || VectorCond)
03703     return false;
03704 
03705   TargetLowering::SelectSupportKind SelectKind;
03706   if (VectorCond)
03707     SelectKind = TargetLowering::VectorMaskSelect;
03708   else if (SI->getType()->isVectorTy())
03709     SelectKind = TargetLowering::ScalarCondVectorVal;
03710   else
03711     SelectKind = TargetLowering::ScalarValSelect;
03712 
03713   // Do we have efficient codegen support for this kind of 'selects' ?
03714   if (TLI->isSelectSupported(SelectKind)) {
03715     // We have efficient codegen support for the select instruction.
03716     // Check if it is profitable to keep this 'select'.
03717     if (!TLI->isPredictableSelectExpensive() ||
03718         !isFormingBranchFromSelectProfitable(SI))
03719       return false;
03720   }
03721 
03722   ModifiedDT = true;
03723 
03724   // First, we split the block containing the select into 2 blocks.
03725   BasicBlock *StartBlock = SI->getParent();
03726   BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
03727   BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
03728 
03729   // Create a new block serving as the landing pad for the branch.
03730   BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid",
03731                                              NextBlock->getParent(), NextBlock);
03732 
03733   // Move the unconditional branch from the block with the select in it into our
03734   // landing pad block.
03735   StartBlock->getTerminator()->eraseFromParent();
03736   BranchInst::Create(NextBlock, SmallBlock);
03737 
03738   // Insert the real conditional branch based on the original condition.
03739   BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI);
03740 
03741   // The select itself is replaced with a PHI Node.
03742   PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin());
03743   PN->takeName(SI);
03744   PN->addIncoming(SI->getTrueValue(), StartBlock);
03745   PN->addIncoming(SI->getFalseValue(), SmallBlock);
03746   SI->replaceAllUsesWith(PN);
03747   SI->eraseFromParent();
03748 
03749   // Instruct OptimizeBlock to skip to the next block.
03750   CurInstIterator = StartBlock->end();
03751   ++NumSelectsExpanded;
03752   return true;
03753 }
03754 
03755 static bool isBroadcastShuffle(ShuffleVectorInst *SVI) {
03756   SmallVector<int, 16> Mask(SVI->getShuffleMask());
03757   int SplatElem = -1;
03758   for (unsigned i = 0; i < Mask.size(); ++i) {
03759     if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem)
03760       return false;
03761     SplatElem = Mask[i];
03762   }
03763 
03764   return true;
03765 }
03766 
03767 /// Some targets have expensive vector shifts if the lanes aren't all the same
03768 /// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
03769 /// it's often worth sinking a shufflevector splat down to its use so that
03770 /// codegen can spot all lanes are identical.
03771 bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
03772   BasicBlock *DefBB = SVI->getParent();
03773 
03774   // Only do this xform if variable vector shifts are particularly expensive.
03775   if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType()))
03776     return false;
03777 
03778   // We only expect better codegen by sinking a shuffle if we can recognise a
03779   // constant splat.
03780   if (!isBroadcastShuffle(SVI))
03781     return false;
03782 
03783   // InsertedShuffles - Only insert a shuffle in each block once.
03784   DenseMap<BasicBlock*, Instruction*> InsertedShuffles;
03785 
03786   bool MadeChange = false;
03787   for (User *U : SVI->users()) {
03788     Instruction *UI = cast<Instruction>(U);
03789 
03790     // Figure out which BB this ext is used in.
03791     BasicBlock *UserBB = UI->getParent();
03792     if (UserBB == DefBB) continue;
03793 
03794     // For now only apply this when the splat is used by a shift instruction.
03795     if (!UI->isShift()) continue;
03796 
03797     // Everything checks out, sink the shuffle if the user's block doesn't
03798     // already have a copy.
03799     Instruction *&InsertedShuffle = InsertedShuffles[UserBB];
03800 
03801     if (!InsertedShuffle) {
03802       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
03803       InsertedShuffle = new ShuffleVectorInst(SVI->getOperand(0),
03804                                               SVI->getOperand(1),
03805                                               SVI->getOperand(2), "", InsertPt);
03806     }
03807 
03808     UI->replaceUsesOfWith(SVI, InsertedShuffle);
03809     MadeChange = true;
03810   }
03811 
03812   // If we removed all uses, nuke the shuffle.
03813   if (SVI->use_empty()) {
03814     SVI->eraseFromParent();
03815     MadeChange = true;
03816   }
03817 
03818   return MadeChange;
03819 }
03820 
03821 namespace {
03822 /// \brief Helper class to promote a scalar operation to a vector one.
03823 /// This class is used to move downward extractelement transition.
03824 /// E.g.,
03825 /// a = vector_op <2 x i32>
03826 /// b = extractelement <2 x i32> a, i32 0
03827 /// c = scalar_op b
03828 /// store c
03829 ///
03830 /// =>
03831 /// a = vector_op <2 x i32>
03832 /// c = vector_op a (equivalent to scalar_op on the related lane)
03833 /// * d = extractelement <2 x i32> c, i32 0
03834 /// * store d
03835 /// Assuming both extractelement and store can be combine, we get rid of the
03836 /// transition.
03837 class VectorPromoteHelper {
03838   /// Used to perform some checks on the legality of vector operations.
03839   const TargetLowering &TLI;
03840 
03841   /// Used to estimated the cost of the promoted chain.
03842   const TargetTransformInfo &TTI;
03843 
03844   /// The transition being moved downwards.
03845   Instruction *Transition;
03846   /// The sequence of instructions to be promoted.
03847   SmallVector<Instruction *, 4> InstsToBePromoted;
03848   /// Cost of combining a store and an extract.
03849   unsigned StoreExtractCombineCost;
03850   /// Instruction that will be combined with the transition.
03851   Instruction *CombineInst;
03852 
03853   /// \brief The instruction that represents the current end of the transition.
03854   /// Since we are faking the promotion until we reach the end of the chain
03855   /// of computation, we need a way to get the current end of the transition.
03856   Instruction *getEndOfTransition() const {
03857     if (InstsToBePromoted.empty())
03858       return Transition;
03859     return InstsToBePromoted.back();
03860   }
03861 
03862   /// \brief Return the index of the original value in the transition.
03863   /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
03864   /// c, is at index 0.
03865   unsigned getTransitionOriginalValueIdx() const {
03866     assert(isa<ExtractElementInst>(Transition) &&
03867            "Other kind of transitions are not supported yet");
03868     return 0;
03869   }
03870 
03871   /// \brief Return the index of the index in the transition.
03872   /// E.g., for "extractelement <2 x i32> c, i32 0" the index
03873   /// is at index 1.
03874   unsigned getTransitionIdx() const {
03875     assert(isa<ExtractElementInst>(Transition) &&
03876            "Other kind of transitions are not supported yet");
03877     return 1;
03878   }
03879 
03880   /// \brief Get the type of the transition.
03881   /// This is the type of the original value.
03882   /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
03883   /// transition is <2 x i32>.
03884   Type *getTransitionType() const {
03885     return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
03886   }
03887 
03888   /// \brief Promote \p ToBePromoted by moving \p Def downward through.
03889   /// I.e., we have the following sequence:
03890   /// Def = Transition <ty1> a to <ty2>
03891   /// b = ToBePromoted <ty2> Def, ...
03892   /// =>
03893   /// b = ToBePromoted <ty1> a, ...
03894   /// Def = Transition <ty1> ToBePromoted to <ty2>
03895   void promoteImpl(Instruction *ToBePromoted);
03896 
03897   /// \brief Check whether or not it is profitable to promote all the
03898   /// instructions enqueued to be promoted.
03899   bool isProfitableToPromote() {
03900     Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
03901     unsigned Index = isa<ConstantInt>(ValIdx)
03902                          ? cast<ConstantInt>(ValIdx)->getZExtValue()
03903                          : -1;
03904     Type *PromotedType = getTransitionType();
03905 
03906     StoreInst *ST = cast<StoreInst>(CombineInst);
03907     unsigned AS = ST->getPointerAddressSpace();
03908     unsigned Align = ST->getAlignment();
03909     // Check if this store is supported.
03910     if (!TLI.allowsMisalignedMemoryAccesses(
03911             TLI.getValueType(ST->getValueOperand()->getType()), AS, Align)) {
03912       // If this is not supported, there is no way we can combine
03913       // the extract with the store.
03914       return false;
03915     }
03916 
03917     // The scalar chain of computation has to pay for the transition
03918     // scalar to vector.
03919     // The vector chain has to account for the combining cost.
03920     uint64_t ScalarCost =
03921         TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index);
03922     uint64_t VectorCost = StoreExtractCombineCost;
03923     for (const auto &Inst : InstsToBePromoted) {
03924       // Compute the cost.
03925       // By construction, all instructions being promoted are arithmetic ones.
03926       // Moreover, one argument is a constant that can be viewed as a splat
03927       // constant.
03928       Value *Arg0 = Inst->getOperand(0);
03929       bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
03930                             isa<ConstantFP>(Arg0);
03931       TargetTransformInfo::OperandValueKind Arg0OVK =
03932           IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
03933                          : TargetTransformInfo::OK_AnyValue;
03934       TargetTransformInfo::OperandValueKind Arg1OVK =
03935           !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
03936                           : TargetTransformInfo::OK_AnyValue;
03937       ScalarCost += TTI.getArithmeticInstrCost(
03938           Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK);
03939       VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
03940                                                Arg0OVK, Arg1OVK);
03941     }
03942     DEBUG(dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
03943                  << ScalarCost << "\nVector: " << VectorCost << '\n');
03944     return ScalarCost > VectorCost;
03945   }
03946 
03947   /// \brief Generate a constant vector with \p Val with the same
03948   /// number of elements as the transition.
03949   /// \p UseSplat defines whether or not \p Val should be replicated
03950   /// accross the whole vector.
03951   /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
03952   /// otherwise we generate a vector with as many undef as possible:
03953   /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
03954   /// used at the index of the extract.
03955   Value *getConstantVector(Constant *Val, bool UseSplat) const {
03956     unsigned ExtractIdx = UINT_MAX;
03957     if (!UseSplat) {
03958       // If we cannot determine where the constant must be, we have to
03959       // use a splat constant.
03960       Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
03961       if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
03962         ExtractIdx = CstVal->getSExtValue();
03963       else
03964         UseSplat = true;
03965     }
03966 
03967     unsigned End = getTransitionType()->getVectorNumElements();
03968     if (UseSplat)
03969       return ConstantVector::getSplat(End, Val);
03970 
03971     SmallVector<Constant *, 4> ConstVec;
03972     UndefValue *UndefVal = UndefValue::get(Val->getType());
03973     for (unsigned Idx = 0; Idx != End; ++Idx) {
03974       if (Idx == ExtractIdx)
03975         ConstVec.push_back(Val);
03976       else
03977         ConstVec.push_back(UndefVal);
03978     }
03979     return ConstantVector::get(ConstVec);
03980   }
03981 
03982   /// \brief Check if promoting to a vector type an operand at \p OperandIdx
03983   /// in \p Use can trigger undefined behavior.
03984   static bool canCauseUndefinedBehavior(const Instruction *Use,
03985                                         unsigned OperandIdx) {
03986     // This is not safe to introduce undef when the operand is on
03987     // the right hand side of a division-like instruction.
03988     if (OperandIdx != 1)
03989       return false;
03990     switch (Use->getOpcode()) {
03991     default:
03992       return false;
03993     case Instruction::SDiv:
03994     case Instruction::UDiv:
03995     case Instruction::SRem:
03996     case Instruction::URem:
03997       return true;
03998     case Instruction::FDiv:
03999     case Instruction::FRem:
04000       return !Use->hasNoNaNs();
04001     }
04002     llvm_unreachable(nullptr);
04003   }
04004 
04005 public:
04006   VectorPromoteHelper(const TargetLowering &TLI, const TargetTransformInfo &TTI,
04007                       Instruction *Transition, unsigned CombineCost)
04008       : TLI(TLI), TTI(TTI), Transition(Transition),
04009         StoreExtractCombineCost(CombineCost), CombineInst(nullptr) {
04010     assert(Transition && "Do not know how to promote null");
04011   }
04012 
04013   /// \brief Check if we can promote \p ToBePromoted to \p Type.
04014   bool canPromote(const Instruction *ToBePromoted) const {
04015     // We could support CastInst too.
04016     return isa<BinaryOperator>(ToBePromoted);
04017   }
04018 
04019   /// \brief Check if it is profitable to promote \p ToBePromoted
04020   /// by moving downward the transition through.
04021   bool shouldPromote(const Instruction *ToBePromoted) const {
04022     // Promote only if all the operands can be statically expanded.
04023     // Indeed, we do not want to introduce any new kind of transitions.
04024     for (const Use &U : ToBePromoted->operands()) {
04025       const Value *Val = U.get();
04026       if (Val == getEndOfTransition()) {
04027         // If the use is a division and the transition is on the rhs,
04028         // we cannot promote the operation, otherwise we may create a
04029         // division by zero.
04030         if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
04031           return false;
04032         continue;
04033       }
04034       if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
04035           !isa<ConstantFP>(Val))
04036         return false;
04037     }
04038     // Check that the resulting operation is legal.
04039     int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
04040     if (!ISDOpcode)
04041       return false;
04042     return StressStoreExtract ||
04043            TLI.isOperationLegalOrCustom(
04044                ISDOpcode, TLI.getValueType(getTransitionType(), true));
04045   }
04046 
04047   /// \brief Check whether or not \p Use can be combined
04048   /// with the transition.
04049   /// I.e., is it possible to do Use(Transition) => AnotherUse?
04050   bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
04051 
04052   /// \brief Record \p ToBePromoted as part of the chain to be promoted.
04053   void enqueueForPromotion(Instruction *ToBePromoted) {
04054     InstsToBePromoted.push_back(ToBePromoted);
04055   }
04056 
04057   /// \brief Set the instruction that will be combined with the transition.
04058   void recordCombineInstruction(Instruction *ToBeCombined) {
04059     assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
04060     CombineInst = ToBeCombined;
04061   }
04062 
04063   /// \brief Promote all the instructions enqueued for promotion if it is
04064   /// is profitable.
04065   /// \return True if the promotion happened, false otherwise.
04066   bool promote() {
04067     // Check if there is something to promote.
04068     // Right now, if we do not have anything to combine with,
04069     // we assume the promotion is not profitable.
04070     if (InstsToBePromoted.empty() || !CombineInst)
04071       return false;
04072 
04073     // Check cost.
04074     if (!StressStoreExtract && !isProfitableToPromote())
04075       return false;
04076 
04077     // Promote.
04078     for (auto &ToBePromoted : InstsToBePromoted)
04079       promoteImpl(ToBePromoted);
04080     InstsToBePromoted.clear();
04081     return true;
04082   }
04083 };
04084 } // End of anonymous namespace.
04085 
04086 void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
04087   // At this point, we know that all the operands of ToBePromoted but Def
04088   // can be statically promoted.
04089   // For Def, we need to use its parameter in ToBePromoted:
04090   // b = ToBePromoted ty1 a
04091   // Def = Transition ty1 b to ty2
04092   // Move the transition down.
04093   // 1. Replace all uses of the promoted operation by the transition.
04094   // = ... b => = ... Def.
04095   assert(ToBePromoted->getType() == Transition->getType() &&
04096          "The type of the result of the transition does not match "
04097          "the final type");
04098   ToBePromoted->replaceAllUsesWith(Transition);
04099   // 2. Update the type of the uses.
04100   // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
04101   Type *TransitionTy = getTransitionType();
04102   ToBePromoted->mutateType(TransitionTy);
04103   // 3. Update all the operands of the promoted operation with promoted
04104   // operands.
04105   // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
04106   for (Use &U : ToBePromoted->operands()) {
04107     Value *Val = U.get();
04108     Value *NewVal = nullptr;
04109     if (Val == Transition)
04110       NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
04111     else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
04112              isa<ConstantFP>(Val)) {
04113       // Use a splat constant if it is not safe to use undef.
04114       NewVal = getConstantVector(
04115           cast<Constant>(Val),
04116           isa<UndefValue>(Val) ||
04117               canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
04118     } else
04119       llvm_unreachable("Did you modified shouldPromote and forgot to update "
04120                        "this?");
04121     ToBePromoted->setOperand(U.getOperandNo(), NewVal);
04122   }
04123   Transition->removeFromParent();
04124   Transition->insertAfter(ToBePromoted);
04125   Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
04126 }
04127 
04128 // See if we can speculate calls to intrinsic cttz/ctlz.
04129 //
04130 // Example:
04131 // entry:
04132 //   ...
04133 //   %cmp = icmp eq i64 %val, 0
04134 //   br i1 %cmp, label %end.bb, label %then.bb
04135 //
04136 // then.bb:
04137 //   %c = tail call i64 @llvm.cttz.i64(i64 %val, i1 true)
04138 //   br label %EndBB
04139 //
04140 // end.bb:
04141 //   %cond = phi i64 [ %c, %then.bb ], [ 64, %entry ]
04142 //
04143 // ==>
04144 //
04145 // entry:
04146 //   ...
04147 //   %c = tail call i64 @llvm.cttz.i64(i64 %val, i1 false)
04148 //
04149 static bool OptimizeBranchInst(BranchInst *BrInst, const TargetLowering &TLI) {
04150   assert(BrInst->isConditional() && "Expected a conditional branch!");
04151   BasicBlock *ThenBB = BrInst->getSuccessor(1);
04152   BasicBlock *EndBB = BrInst->getSuccessor(0);
04153 
04154   // See if ThenBB contains only one instruction (excluding the
04155   // terminator and DbgInfoIntrinsic calls).
04156   IntrinsicInst *II = nullptr;
04157   CastInst *CI = nullptr;
04158   for (BasicBlock::iterator I = ThenBB->begin(),
04159                             E = std::prev(ThenBB->end()); I != E; ++I) {
04160     // Skip debug info.
04161     if (isa<DbgInfoIntrinsic>(I))
04162       continue;
04163 
04164     // Check if this is a zero extension or a truncate of a previously
04165     // matched call to intrinsic cttz/ctlz.
04166     if (II) {
04167       // Early exit if we already found a "free" zero extend/truncate.
04168       if (CI)
04169         return false;
04170 
04171       Type *SrcTy = II->getType();
04172       Type *DestTy = I->getType();
04173       Value *V;
04174  
04175       if (match(cast<Instruction>(I), m_ZExt(m_Value(V))) && V == II) {
04176         // Speculate this zero extend only if it is "free" for the target.
04177         if (TLI.isZExtFree(SrcTy, DestTy)) {
04178           CI = cast<CastInst>(I);
04179           continue;
04180         }
04181       } else if (match(cast<Instruction>(I), m_Trunc(m_Value(V))) && V == II) {
04182         // Speculate this truncate only if it is "free" for the target.
04183         if (TLI.isTruncateFree(SrcTy, DestTy)) {
04184           CI = cast<CastInst>(I);
04185           continue;
04186         }
04187       } else {
04188         // Avoid speculating more than one instruction.
04189         return false;
04190       }
04191     }
04192 
04193     // See if this is a call to intrinsic cttz/ctlz.
04194     if (match(cast<Instruction>(I), m_Intrinsic<Intrinsic::cttz>())) {
04195       // Avoid speculating expensive intrinsic calls.
04196       if (!TLI.isCheapToSpeculateCttz())
04197         return false;
04198     }
04199     else if (match(cast<Instruction>(I), m_Intrinsic<Intrinsic::ctlz>())) {
04200       // Avoid speculating expensive intrinsic calls.
04201       if (!TLI.isCheapToSpeculateCtlz())
04202         return false;
04203     } else
04204       return false;
04205     
04206     II = cast<IntrinsicInst>(I);
04207   }
04208 
04209   // Look for PHI nodes with 'II' as the incoming value from 'ThenBB'.
04210   BasicBlock *EntryBB = BrInst->getParent();
04211   for (BasicBlock::iterator I = EndBB->begin();
04212        PHINode *PN = dyn_cast<PHINode>(I); ++I) {
04213     Value *ThenV = PN->getIncomingValueForBlock(ThenBB);
04214     Value *OrigV = PN->getIncomingValueForBlock(EntryBB);
04215 
04216     if (!OrigV)
04217       return false;
04218 
04219     if (ThenV != II && (!CI || ThenV != CI))
04220       return false;
04221     
04222     if (ConstantInt *CInt = dyn_cast<ConstantInt>(OrigV)) {
04223       unsigned BitWidth = II->getType()->getIntegerBitWidth();
04224 
04225       // Don't try to simplify this phi node if 'ThenV' is a cttz/ctlz
04226       // intrinsic call, but 'OrigV' is not equal to the 'size-of' in bits
04227       // of the value in input to the cttz/ctlz.
04228       if (CInt->getValue() != BitWidth)
04229         return false;
04230 
04231       // Hoist the call to cttz/ctlz from ThenBB into EntryBB.
04232       EntryBB->getInstList().splice(BrInst, ThenBB->getInstList(),
04233                                     ThenBB->begin(), std::prev(ThenBB->end()));
04234  
04235       // Update PN setting ThenV as the incoming value from both 'EntryBB'
04236       // and 'ThenBB'. Eventually, method 'OptimizeInst' will fold this
04237       // phi node if all the incoming values are the same.
04238       PN->setIncomingValue(PN->getBasicBlockIndex(EntryBB), ThenV);
04239       PN->setIncomingValue(PN->getBasicBlockIndex(ThenBB), ThenV);
04240 
04241       // Clear the 'undef on zero' flag of the cttz/ctlz intrinsic call.
04242       if (cast<ConstantInt>(II->getArgOperand(1))->isOne()) {
04243         Type *Ty = II->getArgOperand(0)->getType();
04244         Value *Args[] = { II->getArgOperand(0),
04245                           ConstantInt::getFalse(II->getContext()) };
04246         Module *M = EntryBB->getParent()->getParent();
04247         Value *IF = Intrinsic::getDeclaration(M, II->getIntrinsicID(), Ty);
04248         IRBuilder<> Builder(II);
04249         Instruction *NewI = Builder.CreateCall(IF, Args);
04250 
04251         // Replace the old call to cttz/ctlz.
04252         II->replaceAllUsesWith(NewI);
04253         II->eraseFromParent();
04254       }
04255  
04256       // Update BrInst condition so that the branch to EndBB is always taken.
04257       // Later on, method 'ConstantFoldTerminator' will simplify this branch
04258       // replacing it with a direct branch to 'EndBB'.
04259       // As a side effect, CodeGenPrepare will attempt to simplify the control
04260       // flow graph by deleting basic block 'ThenBB' and merging 'EntryBB' into
04261       // 'EndBB' (calling method 'EliminateFallThrough').
04262       BrInst->setCondition(ConstantInt::getTrue(BrInst->getContext()));
04263       return true;
04264     }
04265   }
04266 
04267   return false;
04268 }
04269 
04270 /// Some targets can do store(extractelement) with one instruction.
04271 /// Try to push the extractelement towards the stores when the target
04272 /// has this feature and this is profitable.
04273 bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) {
04274   unsigned CombineCost = UINT_MAX;
04275   if (DisableStoreExtract || !TLI ||
04276       (!StressStoreExtract &&
04277        !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(),
04278                                        Inst->getOperand(1), CombineCost)))
04279     return false;
04280 
04281   // At this point we know that Inst is a vector to scalar transition.
04282   // Try to move it down the def-use chain, until:
04283   // - We can combine the transition with its single use
04284   //   => we got rid of the transition.
04285   // - We escape the current basic block
04286   //   => we would need to check that we are moving it at a cheaper place and
04287   //      we do not do that for now.
04288   BasicBlock *Parent = Inst->getParent();
04289   DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
04290   VectorPromoteHelper VPH(*TLI, *TTI, Inst, CombineCost);
04291   // If the transition has more than one use, assume this is not going to be
04292   // beneficial.
04293   while (Inst->hasOneUse()) {
04294     Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
04295     DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
04296 
04297     if (ToBePromoted->getParent() != Parent) {
04298       DEBUG(dbgs() << "Instruction to promote is in a different block ("
04299                    << ToBePromoted->getParent()->getName()
04300                    << ") than the transition (" << Parent->getName() << ").\n");
04301       return false;
04302     }
04303 
04304     if (VPH.canCombine(ToBePromoted)) {
04305       DEBUG(dbgs() << "Assume " << *Inst << '\n'
04306                    << "will be combined with: " << *ToBePromoted << '\n');
04307       VPH.recordCombineInstruction(ToBePromoted);
04308       bool Changed = VPH.promote();
04309       NumStoreExtractExposed += Changed;
04310       return Changed;
04311     }
04312 
04313     DEBUG(dbgs() << "Try promoting.\n");
04314     if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
04315       return false;
04316 
04317     DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
04318 
04319     VPH.enqueueForPromotion(ToBePromoted);
04320     Inst = ToBePromoted;
04321   }
04322   return false;
04323 }
04324 
04325 bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
04326   if (PHINode *P = dyn_cast<PHINode>(I)) {
04327     // It is possible for very late stage optimizations (such as SimplifyCFG)
04328     // to introduce PHI nodes too late to be cleaned up.  If we detect such a
04329     // trivial PHI, go ahead and zap it here.
04330     if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : nullptr,
04331                                        TLInfo, DT)) {
04332       P->replaceAllUsesWith(V);
04333       P->eraseFromParent();
04334       ++NumPHIsElim;
04335       return true;
04336     }
04337     return false;
04338   }
04339 
04340   if (CastInst *CI = dyn_cast<CastInst>(I)) {
04341     // If the source of the cast is a constant, then this should have
04342     // already been constant folded.  The only reason NOT to constant fold
04343     // it is if something (e.g. LSR) was careful to place the constant
04344     // evaluation in a block other than then one that uses it (e.g. to hoist
04345     // the address of globals out of a loop).  If this is the case, we don't
04346     // want to forward-subst the cast.
04347     if (isa<Constant>(CI->getOperand(0)))
04348       return false;
04349 
04350     if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
04351       return true;
04352 
04353     if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
04354       /// Sink a zext or sext into its user blocks if the target type doesn't
04355       /// fit in one register
04356       if (TLI && TLI->getTypeAction(CI->getContext(),
04357                                     TLI->getValueType(CI->getType())) ==
04358                      TargetLowering::TypeExpandInteger) {
04359         return SinkCast(CI);
04360       } else {
04361         bool MadeChange = MoveExtToFormExtLoad(I);
04362         return MadeChange | OptimizeExtUses(I);
04363       }
04364     }
04365     return false;
04366   }
04367 
04368   if (CmpInst *CI = dyn_cast<CmpInst>(I))
04369     if (!TLI || !TLI->hasMultipleConditionRegisters())
04370       return OptimizeCmpExpression(CI);
04371 
04372   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
04373     if (TLI)
04374       return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
04375     return false;
04376   }
04377 
04378   if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
04379     if (TLI)
04380       return OptimizeMemoryInst(I, SI->getOperand(1),
04381                                 SI->getOperand(0)->getType());
04382     return false;
04383   }
04384 
04385   BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
04386 
04387   if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
04388                 BinOp->getOpcode() == Instruction::LShr)) {
04389     ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
04390     if (TLI && CI && TLI->hasExtractBitsInsn())
04391       return OptimizeExtractBits(BinOp, CI, *TLI);
04392 
04393     return false;
04394   }
04395 
04396   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
04397     if (GEPI->hasAllZeroIndices()) {
04398       /// The GEP operand must be a pointer, so must its result -> BitCast
04399       Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
04400                                         GEPI->getName(), GEPI);
04401       GEPI->replaceAllUsesWith(NC);
04402       GEPI->eraseFromParent();
04403       ++NumGEPsElim;
04404       OptimizeInst(NC, ModifiedDT);
04405       return true;
04406     }
04407     return false;
04408   }
04409 
04410   if (CallInst *CI = dyn_cast<CallInst>(I))
04411     return OptimizeCallInst(CI, ModifiedDT);
04412 
04413   if (SelectInst *SI = dyn_cast<SelectInst>(I))
04414     return OptimizeSelectInst(SI);
04415 
04416   if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
04417     return OptimizeShuffleVectorInst(SVI);
04418 
04419   if (isa<ExtractElementInst>(I))
04420     return OptimizeExtractElementInst(I);
04421 
04422   if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
04423     if (TLI && BI->isConditional() && BI->getCondition()->hasOneUse()) {
04424       // Check if the branch condition compares a value agaist zero.
04425       if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
04426         if (ICI->getPredicate() == ICmpInst::ICMP_EQ &&
04427             match(ICI->getOperand(1), m_Zero())) {
04428           BasicBlock *ThenBB = BI->getSuccessor(1);
04429           BasicBlock *EndBB = BI->getSuccessor(0);
04430 
04431           // Check if ThenBB is only reachable from this basic block; also,
04432           // check if EndBB has more than one predecessor.
04433           if (ThenBB->getSinglePredecessor() &&
04434               !EndBB->getSinglePredecessor()) {
04435             TerminatorInst *TI = ThenBB->getTerminator();
04436 
04437             if (TI->getNumSuccessors() == 1 && TI->getSuccessor(0) == EndBB &&
04438                 // Try to speculate calls to intrinsic cttz/ctlz from 'ThenBB'.
04439                 OptimizeBranchInst(BI, *TLI)) {
04440               ModifiedDT = true;
04441               return true;
04442             }
04443           }
04444         }
04445       }
04446     }
04447     return false;
04448   }
04449 
04450   return false;
04451 }
04452 
04453 // In this pass we look for GEP and cast instructions that are used
04454 // across basic blocks and rewrite them to improve basic-block-at-a-time
04455 // selection.
04456 bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
04457   SunkAddrs.clear();
04458   bool MadeChange = false;
04459 
04460   CurInstIterator = BB.begin();
04461   while (CurInstIterator != BB.end()) {
04462     MadeChange |= OptimizeInst(CurInstIterator++, ModifiedDT);
04463     if (ModifiedDT)
04464       return true;
04465   }
04466   MadeChange |= DupRetToEnableTailCallOpts(&BB);
04467 
04468   return MadeChange;
04469 }
04470 
04471 // llvm.dbg.value is far away from the value then iSel may not be able
04472 // handle it properly. iSel will drop llvm.dbg.value if it can not
04473 // find a node corresponding to the value.
04474 bool CodeGenPrepare::PlaceDbgValues(Function &F) {
04475   bool MadeChange = false;
04476   for (BasicBlock &BB : F) {
04477     Instruction *PrevNonDbgInst = nullptr;
04478     for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
04479       Instruction *Insn = BI++;
04480       DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
04481       // Leave dbg.values that refer to an alloca alone. These
04482       // instrinsics describe the address of a variable (= the alloca)
04483       // being taken.  They should not be moved next to the alloca
04484       // (and to the beginning of the scope), but rather stay close to
04485       // where said address is used.
04486       if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) {
04487         PrevNonDbgInst = Insn;
04488         continue;
04489       }
04490 
04491       Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
04492       if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
04493         DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
04494         DVI->removeFromParent();
04495         if (isa<PHINode>(VI))
04496           DVI->insertBefore(VI->getParent()->getFirstInsertionPt());
04497         else
04498           DVI->insertAfter(VI);
04499         MadeChange = true;
04500         ++NumDbgValueMoved;
04501       }
04502     }
04503   }
04504   return MadeChange;
04505 }
04506 
04507 // If there is a sequence that branches based on comparing a single bit
04508 // against zero that can be combined into a single instruction, and the
04509 // target supports folding these into a single instruction, sink the
04510 // mask and compare into the branch uses. Do this before OptimizeBlock ->
04511 // OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being
04512 // searched for.
04513 bool CodeGenPrepare::sinkAndCmp(Function &F) {
04514   if (!EnableAndCmpSinking)
04515     return false;
04516   if (!TLI || !TLI->isMaskAndBranchFoldingLegal())
04517     return false;
04518   bool MadeChange = false;
04519   for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
04520     BasicBlock *BB = I++;
04521 
04522     // Does this BB end with the following?
04523     //   %andVal = and %val, #single-bit-set
04524     //   %icmpVal = icmp %andResult, 0
04525     //   br i1 %cmpVal label %dest1, label %dest2"
04526     BranchInst *Brcc = dyn_cast<BranchInst>(BB->getTerminator());
04527     if (!Brcc || !Brcc->isConditional())
04528       continue;
04529     ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0));
04530     if (!Cmp || Cmp->getParent() != BB)
04531       continue;
04532     ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1));
04533     if (!Zero || !Zero->isZero())
04534       continue;
04535     Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0));
04536     if (!And || And->getOpcode() != Instruction::And || And->getParent() != BB)
04537       continue;
04538     ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1));
04539     if (!Mask || !Mask->getUniqueInteger().isPowerOf2())
04540       continue;
04541     DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB->dump());
04542 
04543     // Push the "and; icmp" for any users that are conditional branches.
04544     // Since there can only be one branch use per BB, we don't need to keep
04545     // track of which BBs we insert into.
04546     for (Value::use_iterator UI = Cmp->use_begin(), E = Cmp->use_end();
04547          UI != E; ) {
04548       Use &TheUse = *UI;
04549       // Find brcc use.
04550       BranchInst *BrccUser = dyn_cast<BranchInst>(*UI);
04551       ++UI;
04552       if (!BrccUser || !BrccUser->isConditional())
04553         continue;
04554       BasicBlock *UserBB = BrccUser->getParent();
04555       if (UserBB == BB) continue;
04556       DEBUG(dbgs() << "found Brcc use\n");
04557 
04558       // Sink the "and; icmp" to use.
04559       MadeChange = true;
04560       BinaryOperator *NewAnd =
04561         BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "",
04562                                   BrccUser);
04563       CmpInst *NewCmp =
04564         CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero,
04565                         "", BrccUser);
04566       TheUse = NewCmp;
04567       ++NumAndCmpsMoved;
04568       DEBUG(BrccUser->getParent()->dump());
04569     }
04570   }
04571   return MadeChange;
04572 }
04573 
04574 /// \brief Retrieve the probabilities of a conditional branch. Returns true on
04575 /// success, or returns false if no or invalid metadata was found.
04576 static bool extractBranchMetadata(BranchInst *BI,
04577                                   uint64_t &ProbTrue, uint64_t &ProbFalse) {
04578   assert(BI->isConditional() &&
04579          "Looking for probabilities on unconditional branch?");
04580   auto *ProfileData = BI->getMetadata(LLVMContext::MD_prof);
04581   if (!ProfileData || ProfileData->getNumOperands() != 3)
04582     return false;
04583 
04584   const auto *CITrue =
04585       mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1));
04586   const auto *CIFalse =
04587       mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2));
04588   if (!CITrue || !CIFalse)
04589     return false;
04590 
04591   ProbTrue = CITrue->getValue().getZExtValue();
04592   ProbFalse = CIFalse->getValue().getZExtValue();
04593 
04594   return true;
04595 }
04596 
04597 /// \brief Scale down both weights to fit into uint32_t.
04598 static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
04599   uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
04600   uint32_t Scale = (NewMax / UINT32_MAX) + 1;
04601   NewTrue = NewTrue / Scale;
04602   NewFalse = NewFalse / Scale;
04603 }
04604 
04605 /// \brief Some targets prefer to split a conditional branch like:
04606 /// \code
04607 ///   %0 = icmp ne i32 %a, 0
04608 ///   %1 = icmp ne i32 %b, 0
04609 ///   %or.cond = or i1 %0, %1
04610 ///   br i1 %or.cond, label %TrueBB, label %FalseBB
04611 /// \endcode
04612 /// into multiple branch instructions like:
04613 /// \code
04614 ///   bb1:
04615 ///     %0 = icmp ne i32 %a, 0
04616 ///     br i1 %0, label %TrueBB, label %bb2
04617 ///   bb2:
04618 ///     %1 = icmp ne i32 %b, 0
04619 ///     br i1 %1, label %TrueBB, label %FalseBB
04620 /// \endcode
04621 /// This usually allows instruction selection to do even further optimizations
04622 /// and combine the compare with the branch instruction. Currently this is
04623 /// applied for targets which have "cheap" jump instructions.
04624 ///
04625 /// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
04626 ///
04627 bool CodeGenPrepare::splitBranchCondition(Function &F) {
04628   if (!TM || TM->Options.EnableFastISel != true ||
04629       !TLI || TLI->isJumpExpensive())
04630     return false;
04631 
04632   bool MadeChange = false;
04633   for (auto &BB : F) {
04634     // Does this BB end with the following?
04635     //   %cond1 = icmp|fcmp|binary instruction ...
04636     //   %cond2 = icmp|fcmp|binary instruction ...
04637     //   %cond.or = or|and i1 %cond1, cond2
04638     //   br i1 %cond.or label %dest1, label %dest2"
04639     BinaryOperator *LogicOp;
04640     BasicBlock *TBB, *FBB;
04641     if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB)))
04642       continue;
04643 
04644     unsigned Opc;
04645     Value *Cond1, *Cond2;
04646     if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
04647                              m_OneUse(m_Value(Cond2)))))
04648       Opc = Instruction::And;
04649     else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)),
04650                                  m_OneUse(m_Value(Cond2)))))
04651       Opc = Instruction::Or;
04652     else
04653       continue;
04654 
04655     if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) ||
04656         !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp()))   )
04657       continue;
04658 
04659     DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
04660 
04661     // Create a new BB.
04662     auto *InsertBefore = std::next(Function::iterator(BB))
04663         .getNodePtrUnchecked();
04664     auto TmpBB = BasicBlock::Create(BB.getContext(),
04665                                     BB.getName() + ".cond.split",
04666                                     BB.getParent(), InsertBefore);
04667 
04668     // Update original basic block by using the first condition directly by the
04669     // branch instruction and removing the no longer needed and/or instruction.
04670     auto *Br1 = cast<BranchInst>(BB.getTerminator());
04671     Br1->setCondition(Cond1);
04672     LogicOp->eraseFromParent();
04673 
04674     // Depending on the conditon we have to either replace the true or the false
04675     // successor of the original branch instruction.
04676     if (Opc == Instruction::And)
04677       Br1->setSuccessor(0, TmpBB);
04678     else
04679       Br1->setSuccessor(1, TmpBB);
04680 
04681     // Fill in the new basic block.
04682     auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
04683     if (auto *I = dyn_cast<Instruction>(Cond2)) {
04684       I->removeFromParent();
04685       I->insertBefore(Br2);
04686     }
04687 
04688     // Update PHI nodes in both successors. The original BB needs to be
04689     // replaced in one succesor's PHI nodes, because the branch comes now from
04690     // the newly generated BB (NewBB). In the other successor we need to add one
04691     // incoming edge to the PHI nodes, because both branch instructions target
04692     // now the same successor. Depending on the original branch condition
04693     // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
04694     // we perfrom the correct update for the PHI nodes.
04695     // This doesn't change the successor order of the just created branch
04696     // instruction (or any other instruction).
04697     if (Opc == Instruction::Or)
04698       std::swap(TBB, FBB);
04699 
04700     // Replace the old BB with the new BB.
04701     for (auto &I : *TBB) {
04702       PHINode *PN = dyn_cast<PHINode>(&I);
04703       if (!PN)
04704         break;
04705       int i;
04706       while ((i = PN->getBasicBlockIndex(&BB)) >= 0)
04707         PN->setIncomingBlock(i, TmpBB);
04708     }
04709 
04710     // Add another incoming edge form the new BB.
04711     for (auto &I : *FBB) {
04712       PHINode *PN = dyn_cast<PHINode>(&I);
04713       if (!PN)
04714         break;
04715       auto *Val = PN->getIncomingValueForBlock(&BB);
04716       PN->addIncoming(Val, TmpBB);
04717     }
04718 
04719     // Update the branch weights (from SelectionDAGBuilder::
04720     // FindMergedConditions).
04721     if (Opc == Instruction::Or) {
04722       // Codegen X | Y as:
04723       // BB1:
04724       //   jmp_if_X TBB
04725       //   jmp TmpBB
04726       // TmpBB:
04727       //   jmp_if_Y TBB
04728       //   jmp FBB
04729       //
04730 
04731       // We have flexibility in setting Prob for BB1 and Prob for NewBB.
04732       // The requirement is that
04733       //   TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
04734       //     = TrueProb for orignal BB.
04735       // Assuming the orignal weights are A and B, one choice is to set BB1's
04736       // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
04737       // assumes that
04738       //   TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
04739       // Another choice is to assume TrueProb for BB1 equals to TrueProb for
04740       // TmpBB, but the math is more complicated.
04741       uint64_t TrueWeight, FalseWeight;
04742       if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) {
04743         uint64_t NewTrueWeight = TrueWeight;
04744         uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
04745         scaleWeights(NewTrueWeight, NewFalseWeight);
04746         Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
04747                          .createBranchWeights(TrueWeight, FalseWeight));
04748 
04749         NewTrueWeight = TrueWeight;
04750         NewFalseWeight = 2 * FalseWeight;
04751         scaleWeights(NewTrueWeight, NewFalseWeight);
04752         Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
04753                          .createBranchWeights(TrueWeight, FalseWeight));
04754       }
04755     } else {
04756       // Codegen X & Y as:
04757       // BB1:
04758       //   jmp_if_X TmpBB
04759       //   jmp FBB
04760       // TmpBB:
04761       //   jmp_if_Y TBB
04762       //   jmp FBB
04763       //
04764       //  This requires creation of TmpBB after CurBB.
04765 
04766       // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
04767       // The requirement is that
04768       //   FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
04769       //     = FalseProb for orignal BB.
04770       // Assuming the orignal weights are A and B, one choice is to set BB1's
04771       // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
04772       // assumes that
04773       //   FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
04774       uint64_t TrueWeight, FalseWeight;
04775       if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) {
04776         uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
04777         uint64_t NewFalseWeight = FalseWeight;
04778         scaleWeights(NewTrueWeight, NewFalseWeight);
04779         Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
04780                          .createBranchWeights(TrueWeight, FalseWeight));
04781 
04782         NewTrueWeight = 2 * TrueWeight;
04783         NewFalseWeight = FalseWeight;
04784         scaleWeights(NewTrueWeight, NewFalseWeight);
04785         Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
04786                          .createBranchWeights(TrueWeight, FalseWeight));
04787       }
04788     }
04789 
04790     // Request DOM Tree update.
04791     // Note: No point in getting fancy here, since the DT info is never
04792     // available to CodeGenPrepare and the existing update code is broken
04793     // anyways.
04794     ModifiedDT = true;
04795 
04796     MadeChange = true;
04797 
04798     DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
04799           TmpBB->dump());
04800   }
04801   return MadeChange;
04802 }