LLVM API Documentation

CodeGenPrepare.cpp
Go to the documentation of this file.
00001 //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This pass munges the code in the input function to better prepare it for
00011 // SelectionDAG-based code generation. This works around limitations in it's
00012 // basic-block-at-a-time approach. It should eventually be removed.
00013 //
00014 //===----------------------------------------------------------------------===//
00015 
00016 #include "llvm/CodeGen/Passes.h"
00017 #include "llvm/ADT/DenseMap.h"
00018 #include "llvm/ADT/SmallSet.h"
00019 #include "llvm/ADT/Statistic.h"
00020 #include "llvm/Analysis/InstructionSimplify.h"
00021 #include "llvm/Analysis/TargetTransformInfo.h"
00022 #include "llvm/IR/CallSite.h"
00023 #include "llvm/IR/Constants.h"
00024 #include "llvm/IR/DataLayout.h"
00025 #include "llvm/IR/DerivedTypes.h"
00026 #include "llvm/IR/Dominators.h"
00027 #include "llvm/IR/Function.h"
00028 #include "llvm/IR/GetElementPtrTypeIterator.h"
00029 #include "llvm/IR/IRBuilder.h"
00030 #include "llvm/IR/InlineAsm.h"
00031 #include "llvm/IR/Instructions.h"
00032 #include "llvm/IR/IntrinsicInst.h"
00033 #include "llvm/IR/MDBuilder.h"
00034 #include "llvm/IR/PatternMatch.h"
00035 #include "llvm/IR/ValueHandle.h"
00036 #include "llvm/IR/ValueMap.h"
00037 #include "llvm/Pass.h"
00038 #include "llvm/Support/CommandLine.h"
00039 #include "llvm/Support/Debug.h"
00040 #include "llvm/Support/raw_ostream.h"
00041 #include "llvm/Target/TargetLibraryInfo.h"
00042 #include "llvm/Target/TargetLowering.h"
00043 #include "llvm/Target/TargetSubtargetInfo.h"
00044 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
00045 #include "llvm/Transforms/Utils/BuildLibCalls.h"
00046 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
00047 #include "llvm/Transforms/Utils/Local.h"
00048 using namespace llvm;
00049 using namespace llvm::PatternMatch;
00050 
00051 #define DEBUG_TYPE "codegenprepare"
00052 
00053 STATISTIC(NumBlocksElim, "Number of blocks eliminated");
00054 STATISTIC(NumPHIsElim,   "Number of trivial PHIs eliminated");
00055 STATISTIC(NumGEPsElim,   "Number of GEPs converted to casts");
00056 STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
00057                       "sunken Cmps");
00058 STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
00059                        "of sunken Casts");
00060 STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
00061                           "computations were sunk");
00062 STATISTIC(NumExtsMoved,  "Number of [s|z]ext instructions combined with loads");
00063 STATISTIC(NumExtUses,    "Number of uses of [s|z]ext instructions optimized");
00064 STATISTIC(NumRetsDup,    "Number of return instructions duplicated");
00065 STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
00066 STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
00067 STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches");
00068 STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
00069 
00070 static cl::opt<bool> DisableBranchOpts(
00071   "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
00072   cl::desc("Disable branch optimizations in CodeGenPrepare"));
00073 
00074 static cl::opt<bool> DisableSelectToBranch(
00075   "disable-cgp-select2branch", cl::Hidden, cl::init(false),
00076   cl::desc("Disable select to branch conversion."));
00077 
00078 static cl::opt<bool> AddrSinkUsingGEPs(
00079   "addr-sink-using-gep", cl::Hidden, cl::init(false),
00080   cl::desc("Address sinking in CGP using GEPs."));
00081 
00082 static cl::opt<bool> EnableAndCmpSinking(
00083    "enable-andcmp-sinking", cl::Hidden, cl::init(true),
00084    cl::desc("Enable sinkinig and/cmp into branches."));
00085 
00086 static cl::opt<bool> DisableStoreExtract(
00087     "disable-cgp-store-extract", cl::Hidden, cl::init(false),
00088     cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
00089 
00090 static cl::opt<bool> StressStoreExtract(
00091     "stress-cgp-store-extract", cl::Hidden, cl::init(false),
00092     cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
00093 
00094 static cl::opt<bool> DisableExtLdPromotion(
00095     "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
00096     cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
00097              "CodeGenPrepare"));
00098 
00099 static cl::opt<bool> StressExtLdPromotion(
00100     "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
00101     cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
00102              "optimization in CodeGenPrepare"));
00103 
00104 namespace {
00105 typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
00106 struct TypeIsSExt {
00107   Type *Ty;
00108   bool IsSExt;
00109   TypeIsSExt(Type *Ty, bool IsSExt) : Ty(Ty), IsSExt(IsSExt) {}
00110 };
00111 typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
00112 class TypePromotionTransaction;
00113 
00114   class CodeGenPrepare : public FunctionPass {
00115     /// TLI - Keep a pointer of a TargetLowering to consult for determining
00116     /// transformation profitability.
00117     const TargetMachine *TM;
00118     const TargetLowering *TLI;
00119     const TargetTransformInfo *TTI;
00120     const TargetLibraryInfo *TLInfo;
00121     DominatorTree *DT;
00122 
00123     /// CurInstIterator - As we scan instructions optimizing them, this is the
00124     /// next instruction to optimize.  Xforms that can invalidate this should
00125     /// update it.
00126     BasicBlock::iterator CurInstIterator;
00127 
00128     /// Keeps track of non-local addresses that have been sunk into a block.
00129     /// This allows us to avoid inserting duplicate code for blocks with
00130     /// multiple load/stores of the same address.
00131     ValueMap<Value*, Value*> SunkAddrs;
00132 
00133     /// Keeps track of all truncates inserted for the current function.
00134     SetOfInstrs InsertedTruncsSet;
00135     /// Keeps track of the type of the related instruction before their
00136     /// promotion for the current function.
00137     InstrToOrigTy PromotedInsts;
00138 
00139     /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
00140     /// be updated.
00141     bool ModifiedDT;
00142 
00143     /// OptSize - True if optimizing for size.
00144     bool OptSize;
00145 
00146   public:
00147     static char ID; // Pass identification, replacement for typeid
00148     explicit CodeGenPrepare(const TargetMachine *TM = nullptr)
00149         : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr) {
00150         initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
00151       }
00152     bool runOnFunction(Function &F) override;
00153 
00154     const char *getPassName() const override { return "CodeGen Prepare"; }
00155 
00156     void getAnalysisUsage(AnalysisUsage &AU) const override {
00157       AU.addPreserved<DominatorTreeWrapperPass>();
00158       AU.addRequired<TargetLibraryInfo>();
00159       AU.addRequired<TargetTransformInfo>();
00160     }
00161 
00162   private:
00163     bool EliminateFallThrough(Function &F);
00164     bool EliminateMostlyEmptyBlocks(Function &F);
00165     bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
00166     void EliminateMostlyEmptyBlock(BasicBlock *BB);
00167     bool OptimizeBlock(BasicBlock &BB);
00168     bool OptimizeInst(Instruction *I);
00169     bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
00170     bool OptimizeInlineAsmInst(CallInst *CS);
00171     bool OptimizeCallInst(CallInst *CI);
00172     bool MoveExtToFormExtLoad(Instruction *&I);
00173     bool OptimizeExtUses(Instruction *I);
00174     bool OptimizeSelectInst(SelectInst *SI);
00175     bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI);
00176     bool OptimizeExtractElementInst(Instruction *Inst);
00177     bool DupRetToEnableTailCallOpts(BasicBlock *BB);
00178     bool PlaceDbgValues(Function &F);
00179     bool sinkAndCmp(Function &F);
00180     bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
00181                         Instruction *&Inst,
00182                         const SmallVectorImpl<Instruction *> &Exts,
00183                         unsigned CreatedInst);
00184     bool splitBranchCondition(Function &F);
00185   };
00186 }
00187 
00188 char CodeGenPrepare::ID = 0;
00189 INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare",
00190                    "Optimize for code generation", false, false)
00191 
00192 FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
00193   return new CodeGenPrepare(TM);
00194 }
00195 
00196 bool CodeGenPrepare::runOnFunction(Function &F) {
00197   if (skipOptnoneFunction(F))
00198     return false;
00199 
00200   bool EverMadeChange = false;
00201   // Clear per function information.
00202   InsertedTruncsSet.clear();
00203   PromotedInsts.clear();
00204 
00205   ModifiedDT = false;
00206   if (TM)
00207     TLI = TM->getSubtargetImpl()->getTargetLowering();
00208   TLInfo = &getAnalysis<TargetLibraryInfo>();
00209   TTI = &getAnalysis<TargetTransformInfo>();
00210   DominatorTreeWrapperPass *DTWP =
00211       getAnalysisIfAvailable<DominatorTreeWrapperPass>();
00212   DT = DTWP ? &DTWP->getDomTree() : nullptr;
00213   OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
00214                                            Attribute::OptimizeForSize);
00215 
00216   /// This optimization identifies DIV instructions that can be
00217   /// profitably bypassed and carried out with a shorter, faster divide.
00218   if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
00219     const DenseMap<unsigned int, unsigned int> &BypassWidths =
00220        TLI->getBypassSlowDivWidths();
00221     for (Function::iterator I = F.begin(); I != F.end(); I++)
00222       EverMadeChange |= bypassSlowDivision(F, I, BypassWidths);
00223   }
00224 
00225   // Eliminate blocks that contain only PHI nodes and an
00226   // unconditional branch.
00227   EverMadeChange |= EliminateMostlyEmptyBlocks(F);
00228 
00229   // llvm.dbg.value is far away from the value then iSel may not be able
00230   // handle it properly. iSel will drop llvm.dbg.value if it can not
00231   // find a node corresponding to the value.
00232   EverMadeChange |= PlaceDbgValues(F);
00233 
00234   // If there is a mask, compare against zero, and branch that can be combined
00235   // into a single target instruction, push the mask and compare into branch
00236   // users. Do this before OptimizeBlock -> OptimizeInst ->
00237   // OptimizeCmpExpression, which perturbs the pattern being searched for.
00238   if (!DisableBranchOpts) {
00239     EverMadeChange |= sinkAndCmp(F);
00240     EverMadeChange |= splitBranchCondition(F);
00241   }
00242 
00243   bool MadeChange = true;
00244   while (MadeChange) {
00245     MadeChange = false;
00246     for (Function::iterator I = F.begin(); I != F.end(); ) {
00247       BasicBlock *BB = I++;
00248       MadeChange |= OptimizeBlock(*BB);
00249     }
00250     EverMadeChange |= MadeChange;
00251   }
00252 
00253   SunkAddrs.clear();
00254 
00255   if (!DisableBranchOpts) {
00256     MadeChange = false;
00257     SmallPtrSet<BasicBlock*, 8> WorkList;
00258     for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
00259       SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
00260       MadeChange |= ConstantFoldTerminator(BB, true);
00261       if (!MadeChange) continue;
00262 
00263       for (SmallVectorImpl<BasicBlock*>::iterator
00264              II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
00265         if (pred_begin(*II) == pred_end(*II))
00266           WorkList.insert(*II);
00267     }
00268 
00269     // Delete the dead blocks and any of their dead successors.
00270     MadeChange |= !WorkList.empty();
00271     while (!WorkList.empty()) {
00272       BasicBlock *BB = *WorkList.begin();
00273       WorkList.erase(BB);
00274       SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
00275 
00276       DeleteDeadBlock(BB);
00277 
00278       for (SmallVectorImpl<BasicBlock*>::iterator
00279              II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
00280         if (pred_begin(*II) == pred_end(*II))
00281           WorkList.insert(*II);
00282     }
00283 
00284     // Merge pairs of basic blocks with unconditional branches, connected by
00285     // a single edge.
00286     if (EverMadeChange || MadeChange)
00287       MadeChange |= EliminateFallThrough(F);
00288 
00289     if (MadeChange)
00290       ModifiedDT = true;
00291     EverMadeChange |= MadeChange;
00292   }
00293 
00294   if (ModifiedDT && DT)
00295     DT->recalculate(F);
00296 
00297   return EverMadeChange;
00298 }
00299 
00300 /// EliminateFallThrough - Merge basic blocks which are connected
00301 /// by a single edge, where one of the basic blocks has a single successor
00302 /// pointing to the other basic block, which has a single predecessor.
00303 bool CodeGenPrepare::EliminateFallThrough(Function &F) {
00304   bool Changed = false;
00305   // Scan all of the blocks in the function, except for the entry block.
00306   for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
00307     BasicBlock *BB = I++;
00308     // If the destination block has a single pred, then this is a trivial
00309     // edge, just collapse it.
00310     BasicBlock *SinglePred = BB->getSinglePredecessor();
00311 
00312     // Don't merge if BB's address is taken.
00313     if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
00314 
00315     BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
00316     if (Term && !Term->isConditional()) {
00317       Changed = true;
00318       DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n");
00319       // Remember if SinglePred was the entry block of the function.
00320       // If so, we will need to move BB back to the entry position.
00321       bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
00322       MergeBasicBlockIntoOnlyPred(BB, this);
00323 
00324       if (isEntry && BB != &BB->getParent()->getEntryBlock())
00325         BB->moveBefore(&BB->getParent()->getEntryBlock());
00326 
00327       // We have erased a block. Update the iterator.
00328       I = BB;
00329     }
00330   }
00331   return Changed;
00332 }
00333 
00334 /// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
00335 /// debug info directives, and an unconditional branch.  Passes before isel
00336 /// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
00337 /// isel.  Start by eliminating these blocks so we can split them the way we
00338 /// want them.
00339 bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
00340   bool MadeChange = false;
00341   // Note that this intentionally skips the entry block.
00342   for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
00343     BasicBlock *BB = I++;
00344 
00345     // If this block doesn't end with an uncond branch, ignore it.
00346     BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
00347     if (!BI || !BI->isUnconditional())
00348       continue;
00349 
00350     // If the instruction before the branch (skipping debug info) isn't a phi
00351     // node, then other stuff is happening here.
00352     BasicBlock::iterator BBI = BI;
00353     if (BBI != BB->begin()) {
00354       --BBI;
00355       while (isa<DbgInfoIntrinsic>(BBI)) {
00356         if (BBI == BB->begin())
00357           break;
00358         --BBI;
00359       }
00360       if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
00361         continue;
00362     }
00363 
00364     // Do not break infinite loops.
00365     BasicBlock *DestBB = BI->getSuccessor(0);
00366     if (DestBB == BB)
00367       continue;
00368 
00369     if (!CanMergeBlocks(BB, DestBB))
00370       continue;
00371 
00372     EliminateMostlyEmptyBlock(BB);
00373     MadeChange = true;
00374   }
00375   return MadeChange;
00376 }
00377 
00378 /// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
00379 /// single uncond branch between them, and BB contains no other non-phi
00380 /// instructions.
00381 bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
00382                                     const BasicBlock *DestBB) const {
00383   // We only want to eliminate blocks whose phi nodes are used by phi nodes in
00384   // the successor.  If there are more complex condition (e.g. preheaders),
00385   // don't mess around with them.
00386   BasicBlock::const_iterator BBI = BB->begin();
00387   while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
00388     for (const User *U : PN->users()) {
00389       const Instruction *UI = cast<Instruction>(U);
00390       if (UI->getParent() != DestBB || !isa<PHINode>(UI))
00391         return false;
00392       // If User is inside DestBB block and it is a PHINode then check
00393       // incoming value. If incoming value is not from BB then this is
00394       // a complex condition (e.g. preheaders) we want to avoid here.
00395       if (UI->getParent() == DestBB) {
00396         if (const PHINode *UPN = dyn_cast<PHINode>(UI))
00397           for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
00398             Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
00399             if (Insn && Insn->getParent() == BB &&
00400                 Insn->getParent() != UPN->getIncomingBlock(I))
00401               return false;
00402           }
00403       }
00404     }
00405   }
00406 
00407   // If BB and DestBB contain any common predecessors, then the phi nodes in BB
00408   // and DestBB may have conflicting incoming values for the block.  If so, we
00409   // can't merge the block.
00410   const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
00411   if (!DestBBPN) return true;  // no conflict.
00412 
00413   // Collect the preds of BB.
00414   SmallPtrSet<const BasicBlock*, 16> BBPreds;
00415   if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
00416     // It is faster to get preds from a PHI than with pred_iterator.
00417     for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
00418       BBPreds.insert(BBPN->getIncomingBlock(i));
00419   } else {
00420     BBPreds.insert(pred_begin(BB), pred_end(BB));
00421   }
00422 
00423   // Walk the preds of DestBB.
00424   for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
00425     BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
00426     if (BBPreds.count(Pred)) {   // Common predecessor?
00427       BBI = DestBB->begin();
00428       while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
00429         const Value *V1 = PN->getIncomingValueForBlock(Pred);
00430         const Value *V2 = PN->getIncomingValueForBlock(BB);
00431 
00432         // If V2 is a phi node in BB, look up what the mapped value will be.
00433         if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
00434           if (V2PN->getParent() == BB)
00435             V2 = V2PN->getIncomingValueForBlock(Pred);
00436 
00437         // If there is a conflict, bail out.
00438         if (V1 != V2) return false;
00439       }
00440     }
00441   }
00442 
00443   return true;
00444 }
00445 
00446 
00447 /// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
00448 /// an unconditional branch in it.
00449 void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
00450   BranchInst *BI = cast<BranchInst>(BB->getTerminator());
00451   BasicBlock *DestBB = BI->getSuccessor(0);
00452 
00453   DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
00454 
00455   // If the destination block has a single pred, then this is a trivial edge,
00456   // just collapse it.
00457   if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
00458     if (SinglePred != DestBB) {
00459       // Remember if SinglePred was the entry block of the function.  If so, we
00460       // will need to move BB back to the entry position.
00461       bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
00462       MergeBasicBlockIntoOnlyPred(DestBB, this);
00463 
00464       if (isEntry && BB != &BB->getParent()->getEntryBlock())
00465         BB->moveBefore(&BB->getParent()->getEntryBlock());
00466 
00467       DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
00468       return;
00469     }
00470   }
00471 
00472   // Otherwise, we have multiple predecessors of BB.  Update the PHIs in DestBB
00473   // to handle the new incoming edges it is about to have.
00474   PHINode *PN;
00475   for (BasicBlock::iterator BBI = DestBB->begin();
00476        (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
00477     // Remove the incoming value for BB, and remember it.
00478     Value *InVal = PN->removeIncomingValue(BB, false);
00479 
00480     // Two options: either the InVal is a phi node defined in BB or it is some
00481     // value that dominates BB.
00482     PHINode *InValPhi = dyn_cast<PHINode>(InVal);
00483     if (InValPhi && InValPhi->getParent() == BB) {
00484       // Add all of the input values of the input PHI as inputs of this phi.
00485       for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
00486         PN->addIncoming(InValPhi->getIncomingValue(i),
00487                         InValPhi->getIncomingBlock(i));
00488     } else {
00489       // Otherwise, add one instance of the dominating value for each edge that
00490       // we will be adding.
00491       if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
00492         for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
00493           PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
00494       } else {
00495         for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
00496           PN->addIncoming(InVal, *PI);
00497       }
00498     }
00499   }
00500 
00501   // The PHIs are now updated, change everything that refers to BB to use
00502   // DestBB and remove BB.
00503   BB->replaceAllUsesWith(DestBB);
00504   if (DT && !ModifiedDT) {
00505     BasicBlock *BBIDom  = DT->getNode(BB)->getIDom()->getBlock();
00506     BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
00507     BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
00508     DT->changeImmediateDominator(DestBB, NewIDom);
00509     DT->eraseNode(BB);
00510   }
00511   BB->eraseFromParent();
00512   ++NumBlocksElim;
00513 
00514   DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
00515 }
00516 
00517 /// SinkCast - Sink the specified cast instruction into its user blocks
00518 static bool SinkCast(CastInst *CI) {
00519   BasicBlock *DefBB = CI->getParent();
00520 
00521   /// InsertedCasts - Only insert a cast in each block once.
00522   DenseMap<BasicBlock*, CastInst*> InsertedCasts;
00523 
00524   bool MadeChange = false;
00525   for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
00526        UI != E; ) {
00527     Use &TheUse = UI.getUse();
00528     Instruction *User = cast<Instruction>(*UI);
00529 
00530     // Figure out which BB this cast is used in.  For PHI's this is the
00531     // appropriate predecessor block.
00532     BasicBlock *UserBB = User->getParent();
00533     if (PHINode *PN = dyn_cast<PHINode>(User)) {
00534       UserBB = PN->getIncomingBlock(TheUse);
00535     }
00536 
00537     // Preincrement use iterator so we don't invalidate it.
00538     ++UI;
00539 
00540     // If this user is in the same block as the cast, don't change the cast.
00541     if (UserBB == DefBB) continue;
00542 
00543     // If we have already inserted a cast into this block, use it.
00544     CastInst *&InsertedCast = InsertedCasts[UserBB];
00545 
00546     if (!InsertedCast) {
00547       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
00548       InsertedCast =
00549         CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
00550                          InsertPt);
00551       MadeChange = true;
00552     }
00553 
00554     // Replace a use of the cast with a use of the new cast.
00555     TheUse = InsertedCast;
00556     ++NumCastUses;
00557   }
00558 
00559   // If we removed all uses, nuke the cast.
00560   if (CI->use_empty()) {
00561     CI->eraseFromParent();
00562     MadeChange = true;
00563   }
00564 
00565   return MadeChange;
00566 }
00567 
00568 /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
00569 /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
00570 /// sink it into user blocks to reduce the number of virtual
00571 /// registers that must be created and coalesced.
00572 ///
00573 /// Return true if any changes are made.
00574 ///
00575 static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
00576   // If this is a noop copy,
00577   EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
00578   EVT DstVT = TLI.getValueType(CI->getType());
00579 
00580   // This is an fp<->int conversion?
00581   if (SrcVT.isInteger() != DstVT.isInteger())
00582     return false;
00583 
00584   // If this is an extension, it will be a zero or sign extension, which
00585   // isn't a noop.
00586   if (SrcVT.bitsLT(DstVT)) return false;
00587 
00588   // If these values will be promoted, find out what they will be promoted
00589   // to.  This helps us consider truncates on PPC as noop copies when they
00590   // are.
00591   if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
00592       TargetLowering::TypePromoteInteger)
00593     SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
00594   if (TLI.getTypeAction(CI->getContext(), DstVT) ==
00595       TargetLowering::TypePromoteInteger)
00596     DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
00597 
00598   // If, after promotion, these are the same types, this is a noop copy.
00599   if (SrcVT != DstVT)
00600     return false;
00601 
00602   return SinkCast(CI);
00603 }
00604 
00605 /// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
00606 /// the number of virtual registers that must be created and coalesced.  This is
00607 /// a clear win except on targets with multiple condition code registers
00608 ///  (PowerPC), where it might lose; some adjustment may be wanted there.
00609 ///
00610 /// Return true if any changes are made.
00611 static bool OptimizeCmpExpression(CmpInst *CI) {
00612   BasicBlock *DefBB = CI->getParent();
00613 
00614   /// InsertedCmp - Only insert a cmp in each block once.
00615   DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
00616 
00617   bool MadeChange = false;
00618   for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
00619        UI != E; ) {
00620     Use &TheUse = UI.getUse();
00621     Instruction *User = cast<Instruction>(*UI);
00622 
00623     // Preincrement use iterator so we don't invalidate it.
00624     ++UI;
00625 
00626     // Don't bother for PHI nodes.
00627     if (isa<PHINode>(User))
00628       continue;
00629 
00630     // Figure out which BB this cmp is used in.
00631     BasicBlock *UserBB = User->getParent();
00632 
00633     // If this user is in the same block as the cmp, don't change the cmp.
00634     if (UserBB == DefBB) continue;
00635 
00636     // If we have already inserted a cmp into this block, use it.
00637     CmpInst *&InsertedCmp = InsertedCmps[UserBB];
00638 
00639     if (!InsertedCmp) {
00640       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
00641       InsertedCmp =
00642         CmpInst::Create(CI->getOpcode(),
00643                         CI->getPredicate(),  CI->getOperand(0),
00644                         CI->getOperand(1), "", InsertPt);
00645       MadeChange = true;
00646     }
00647 
00648     // Replace a use of the cmp with a use of the new cmp.
00649     TheUse = InsertedCmp;
00650     ++NumCmpUses;
00651   }
00652 
00653   // If we removed all uses, nuke the cmp.
00654   if (CI->use_empty())
00655     CI->eraseFromParent();
00656 
00657   return MadeChange;
00658 }
00659 
00660 /// isExtractBitsCandidateUse - Check if the candidates could
00661 /// be combined with shift instruction, which includes:
00662 /// 1. Truncate instruction
00663 /// 2. And instruction and the imm is a mask of the low bits:
00664 /// imm & (imm+1) == 0
00665 static bool isExtractBitsCandidateUse(Instruction *User) {
00666   if (!isa<TruncInst>(User)) {
00667     if (User->getOpcode() != Instruction::And ||
00668         !isa<ConstantInt>(User->getOperand(1)))
00669       return false;
00670 
00671     const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
00672 
00673     if ((Cimm & (Cimm + 1)).getBoolValue())
00674       return false;
00675   }
00676   return true;
00677 }
00678 
00679 /// SinkShiftAndTruncate - sink both shift and truncate instruction
00680 /// to the use of truncate's BB.
00681 static bool
00682 SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
00683                      DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
00684                      const TargetLowering &TLI) {
00685   BasicBlock *UserBB = User->getParent();
00686   DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
00687   TruncInst *TruncI = dyn_cast<TruncInst>(User);
00688   bool MadeChange = false;
00689 
00690   for (Value::user_iterator TruncUI = TruncI->user_begin(),
00691                             TruncE = TruncI->user_end();
00692        TruncUI != TruncE;) {
00693 
00694     Use &TruncTheUse = TruncUI.getUse();
00695     Instruction *TruncUser = cast<Instruction>(*TruncUI);
00696     // Preincrement use iterator so we don't invalidate it.
00697 
00698     ++TruncUI;
00699 
00700     int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
00701     if (!ISDOpcode)
00702       continue;
00703 
00704     // If the use is actually a legal node, there will not be an
00705     // implicit truncate.
00706     // FIXME: always querying the result type is just an
00707     // approximation; some nodes' legality is determined by the
00708     // operand or other means. There's no good way to find out though.
00709     if (TLI.isOperationLegalOrCustom(
00710             ISDOpcode, TLI.getValueType(TruncUser->getType(), true)))
00711       continue;
00712 
00713     // Don't bother for PHI nodes.
00714     if (isa<PHINode>(TruncUser))
00715       continue;
00716 
00717     BasicBlock *TruncUserBB = TruncUser->getParent();
00718 
00719     if (UserBB == TruncUserBB)
00720       continue;
00721 
00722     BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
00723     CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
00724 
00725     if (!InsertedShift && !InsertedTrunc) {
00726       BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
00727       // Sink the shift
00728       if (ShiftI->getOpcode() == Instruction::AShr)
00729         InsertedShift =
00730             BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
00731       else
00732         InsertedShift =
00733             BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
00734 
00735       // Sink the trunc
00736       BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
00737       TruncInsertPt++;
00738 
00739       InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
00740                                        TruncI->getType(), "", TruncInsertPt);
00741 
00742       MadeChange = true;
00743 
00744       TruncTheUse = InsertedTrunc;
00745     }
00746   }
00747   return MadeChange;
00748 }
00749 
00750 /// OptimizeExtractBits - sink the shift *right* instruction into user blocks if
00751 /// the uses could potentially be combined with this shift instruction and
00752 /// generate BitExtract instruction. It will only be applied if the architecture
00753 /// supports BitExtract instruction. Here is an example:
00754 /// BB1:
00755 ///   %x.extract.shift = lshr i64 %arg1, 32
00756 /// BB2:
00757 ///   %x.extract.trunc = trunc i64 %x.extract.shift to i16
00758 /// ==>
00759 ///
00760 /// BB2:
00761 ///   %x.extract.shift.1 = lshr i64 %arg1, 32
00762 ///   %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
00763 ///
00764 /// CodeGen will recoginze the pattern in BB2 and generate BitExtract
00765 /// instruction.
00766 /// Return true if any changes are made.
00767 static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
00768                                 const TargetLowering &TLI) {
00769   BasicBlock *DefBB = ShiftI->getParent();
00770 
00771   /// Only insert instructions in each block once.
00772   DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
00773 
00774   bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType()));
00775 
00776   bool MadeChange = false;
00777   for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
00778        UI != E;) {
00779     Use &TheUse = UI.getUse();
00780     Instruction *User = cast<Instruction>(*UI);
00781     // Preincrement use iterator so we don't invalidate it.
00782     ++UI;
00783 
00784     // Don't bother for PHI nodes.
00785     if (isa<PHINode>(User))
00786       continue;
00787 
00788     if (!isExtractBitsCandidateUse(User))
00789       continue;
00790 
00791     BasicBlock *UserBB = User->getParent();
00792 
00793     if (UserBB == DefBB) {
00794       // If the shift and truncate instruction are in the same BB. The use of
00795       // the truncate(TruncUse) may still introduce another truncate if not
00796       // legal. In this case, we would like to sink both shift and truncate
00797       // instruction to the BB of TruncUse.
00798       // for example:
00799       // BB1:
00800       // i64 shift.result = lshr i64 opnd, imm
00801       // trunc.result = trunc shift.result to i16
00802       //
00803       // BB2:
00804       //   ----> We will have an implicit truncate here if the architecture does
00805       //   not have i16 compare.
00806       // cmp i16 trunc.result, opnd2
00807       //
00808       if (isa<TruncInst>(User) && shiftIsLegal
00809           // If the type of the truncate is legal, no trucate will be
00810           // introduced in other basic blocks.
00811           && (!TLI.isTypeLegal(TLI.getValueType(User->getType()))))
00812         MadeChange =
00813             SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI);
00814 
00815       continue;
00816     }
00817     // If we have already inserted a shift into this block, use it.
00818     BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
00819 
00820     if (!InsertedShift) {
00821       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
00822 
00823       if (ShiftI->getOpcode() == Instruction::AShr)
00824         InsertedShift =
00825             BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
00826       else
00827         InsertedShift =
00828             BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
00829 
00830       MadeChange = true;
00831     }
00832 
00833     // Replace a use of the shift with a use of the new shift.
00834     TheUse = InsertedShift;
00835   }
00836 
00837   // If we removed all uses, nuke the shift.
00838   if (ShiftI->use_empty())
00839     ShiftI->eraseFromParent();
00840 
00841   return MadeChange;
00842 }
00843 
00844 namespace {
00845 class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
00846 protected:
00847   void replaceCall(Value *With) override {
00848     CI->replaceAllUsesWith(With);
00849     CI->eraseFromParent();
00850   }
00851   bool isFoldable(unsigned SizeCIOp, unsigned, bool) const override {
00852       if (ConstantInt *SizeCI =
00853                              dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp)))
00854         return SizeCI->isAllOnesValue();
00855     return false;
00856   }
00857 };
00858 } // end anonymous namespace
00859 
00860 bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
00861   BasicBlock *BB = CI->getParent();
00862 
00863   // Lower inline assembly if we can.
00864   // If we found an inline asm expession, and if the target knows how to
00865   // lower it to normal LLVM code, do so now.
00866   if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
00867     if (TLI->ExpandInlineAsm(CI)) {
00868       // Avoid invalidating the iterator.
00869       CurInstIterator = BB->begin();
00870       // Avoid processing instructions out of order, which could cause
00871       // reuse before a value is defined.
00872       SunkAddrs.clear();
00873       return true;
00874     }
00875     // Sink address computing for memory operands into the block.
00876     if (OptimizeInlineAsmInst(CI))
00877       return true;
00878   }
00879 
00880   // Lower all uses of llvm.objectsize.*
00881   IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
00882   if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
00883     bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
00884     Type *ReturnTy = CI->getType();
00885     Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
00886 
00887     // Substituting this can cause recursive simplifications, which can
00888     // invalidate our iterator.  Use a WeakVH to hold onto it in case this
00889     // happens.
00890     WeakVH IterHandle(CurInstIterator);
00891 
00892     replaceAndRecursivelySimplify(CI, RetVal,
00893                                   TLI ? TLI->getDataLayout() : nullptr,
00894                                   TLInfo, ModifiedDT ? nullptr : DT);
00895 
00896     // If the iterator instruction was recursively deleted, start over at the
00897     // start of the block.
00898     if (IterHandle != CurInstIterator) {
00899       CurInstIterator = BB->begin();
00900       SunkAddrs.clear();
00901     }
00902     return true;
00903   }
00904 
00905   if (II && TLI) {
00906     SmallVector<Value*, 2> PtrOps;
00907     Type *AccessTy;
00908     if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
00909       while (!PtrOps.empty())
00910         if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
00911           return true;
00912   }
00913 
00914   // From here on out we're working with named functions.
00915   if (!CI->getCalledFunction()) return false;
00916 
00917   // We'll need DataLayout from here on out.
00918   const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr;
00919   if (!TD) return false;
00920 
00921   // Lower all default uses of _chk calls.  This is very similar
00922   // to what InstCombineCalls does, but here we are only lowering calls
00923   // that have the default "don't know" as the objectsize.  Anything else
00924   // should be left alone.
00925   CodeGenPrepareFortifiedLibCalls Simplifier;
00926   return Simplifier.fold(CI, TD, TLInfo);
00927 }
00928 
00929 /// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
00930 /// instructions to the predecessor to enable tail call optimizations. The
00931 /// case it is currently looking for is:
00932 /// @code
00933 /// bb0:
00934 ///   %tmp0 = tail call i32 @f0()
00935 ///   br label %return
00936 /// bb1:
00937 ///   %tmp1 = tail call i32 @f1()
00938 ///   br label %return
00939 /// bb2:
00940 ///   %tmp2 = tail call i32 @f2()
00941 ///   br label %return
00942 /// return:
00943 ///   %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
00944 ///   ret i32 %retval
00945 /// @endcode
00946 ///
00947 /// =>
00948 ///
00949 /// @code
00950 /// bb0:
00951 ///   %tmp0 = tail call i32 @f0()
00952 ///   ret i32 %tmp0
00953 /// bb1:
00954 ///   %tmp1 = tail call i32 @f1()
00955 ///   ret i32 %tmp1
00956 /// bb2:
00957 ///   %tmp2 = tail call i32 @f2()
00958 ///   ret i32 %tmp2
00959 /// @endcode
00960 bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
00961   if (!TLI)
00962     return false;
00963 
00964   ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
00965   if (!RI)
00966     return false;
00967 
00968   PHINode *PN = nullptr;
00969   BitCastInst *BCI = nullptr;
00970   Value *V = RI->getReturnValue();
00971   if (V) {
00972     BCI = dyn_cast<BitCastInst>(V);
00973     if (BCI)
00974       V = BCI->getOperand(0);
00975 
00976     PN = dyn_cast<PHINode>(V);
00977     if (!PN)
00978       return false;
00979   }
00980 
00981   if (PN && PN->getParent() != BB)
00982     return false;
00983 
00984   // It's not safe to eliminate the sign / zero extension of the return value.
00985   // See llvm::isInTailCallPosition().
00986   const Function *F = BB->getParent();
00987   AttributeSet CallerAttrs = F->getAttributes();
00988   if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
00989       CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
00990     return false;
00991 
00992   // Make sure there are no instructions between the PHI and return, or that the
00993   // return is the first instruction in the block.
00994   if (PN) {
00995     BasicBlock::iterator BI = BB->begin();
00996     do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
00997     if (&*BI == BCI)
00998       // Also skip over the bitcast.
00999       ++BI;
01000     if (&*BI != RI)
01001       return false;
01002   } else {
01003     BasicBlock::iterator BI = BB->begin();
01004     while (isa<DbgInfoIntrinsic>(BI)) ++BI;
01005     if (&*BI != RI)
01006       return false;
01007   }
01008 
01009   /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
01010   /// call.
01011   SmallVector<CallInst*, 4> TailCalls;
01012   if (PN) {
01013     for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
01014       CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
01015       // Make sure the phi value is indeed produced by the tail call.
01016       if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
01017           TLI->mayBeEmittedAsTailCall(CI))
01018         TailCalls.push_back(CI);
01019     }
01020   } else {
01021     SmallPtrSet<BasicBlock*, 4> VisitedBBs;
01022     for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
01023       if (!VisitedBBs.insert(*PI).second)
01024         continue;
01025 
01026       BasicBlock::InstListType &InstList = (*PI)->getInstList();
01027       BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
01028       BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
01029       do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
01030       if (RI == RE)
01031         continue;
01032 
01033       CallInst *CI = dyn_cast<CallInst>(&*RI);
01034       if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
01035         TailCalls.push_back(CI);
01036     }
01037   }
01038 
01039   bool Changed = false;
01040   for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
01041     CallInst *CI = TailCalls[i];
01042     CallSite CS(CI);
01043 
01044     // Conservatively require the attributes of the call to match those of the
01045     // return. Ignore noalias because it doesn't affect the call sequence.
01046     AttributeSet CalleeAttrs = CS.getAttributes();
01047     if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
01048           removeAttribute(Attribute::NoAlias) !=
01049         AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
01050           removeAttribute(Attribute::NoAlias))
01051       continue;
01052 
01053     // Make sure the call instruction is followed by an unconditional branch to
01054     // the return block.
01055     BasicBlock *CallBB = CI->getParent();
01056     BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
01057     if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
01058       continue;
01059 
01060     // Duplicate the return into CallBB.
01061     (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
01062     ModifiedDT = Changed = true;
01063     ++NumRetsDup;
01064   }
01065 
01066   // If we eliminated all predecessors of the block, delete the block now.
01067   if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
01068     BB->eraseFromParent();
01069 
01070   return Changed;
01071 }
01072 
01073 //===----------------------------------------------------------------------===//
01074 // Memory Optimization
01075 //===----------------------------------------------------------------------===//
01076 
01077 namespace {
01078 
01079 /// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
01080 /// which holds actual Value*'s for register values.
01081 struct ExtAddrMode : public TargetLowering::AddrMode {
01082   Value *BaseReg;
01083   Value *ScaledReg;
01084   ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {}
01085   void print(raw_ostream &OS) const;
01086   void dump() const;
01087 
01088   bool operator==(const ExtAddrMode& O) const {
01089     return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
01090            (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
01091            (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
01092   }
01093 };
01094 
01095 #ifndef NDEBUG
01096 static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
01097   AM.print(OS);
01098   return OS;
01099 }
01100 #endif
01101 
01102 void ExtAddrMode::print(raw_ostream &OS) const {
01103   bool NeedPlus = false;
01104   OS << "[";
01105   if (BaseGV) {
01106     OS << (NeedPlus ? " + " : "")
01107        << "GV:";
01108     BaseGV->printAsOperand(OS, /*PrintType=*/false);
01109     NeedPlus = true;
01110   }
01111 
01112   if (BaseOffs) {
01113     OS << (NeedPlus ? " + " : "")
01114        << BaseOffs;
01115     NeedPlus = true;
01116   }
01117 
01118   if (BaseReg) {
01119     OS << (NeedPlus ? " + " : "")
01120        << "Base:";
01121     BaseReg->printAsOperand(OS, /*PrintType=*/false);
01122     NeedPlus = true;
01123   }
01124   if (Scale) {
01125     OS << (NeedPlus ? " + " : "")
01126        << Scale << "*";
01127     ScaledReg->printAsOperand(OS, /*PrintType=*/false);
01128   }
01129 
01130   OS << ']';
01131 }
01132 
01133 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
01134 void ExtAddrMode::dump() const {
01135   print(dbgs());
01136   dbgs() << '\n';
01137 }
01138 #endif
01139 
01140 /// \brief This class provides transaction based operation on the IR.
01141 /// Every change made through this class is recorded in the internal state and
01142 /// can be undone (rollback) until commit is called.
01143 class TypePromotionTransaction {
01144 
01145   /// \brief This represents the common interface of the individual transaction.
01146   /// Each class implements the logic for doing one specific modification on
01147   /// the IR via the TypePromotionTransaction.
01148   class TypePromotionAction {
01149   protected:
01150     /// The Instruction modified.
01151     Instruction *Inst;
01152 
01153   public:
01154     /// \brief Constructor of the action.
01155     /// The constructor performs the related action on the IR.
01156     TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
01157 
01158     virtual ~TypePromotionAction() {}
01159 
01160     /// \brief Undo the modification done by this action.
01161     /// When this method is called, the IR must be in the same state as it was
01162     /// before this action was applied.
01163     /// \pre Undoing the action works if and only if the IR is in the exact same
01164     /// state as it was directly after this action was applied.
01165     virtual void undo() = 0;
01166 
01167     /// \brief Advocate every change made by this action.
01168     /// When the results on the IR of the action are to be kept, it is important
01169     /// to call this function, otherwise hidden information may be kept forever.
01170     virtual void commit() {
01171       // Nothing to be done, this action is not doing anything.
01172     }
01173   };
01174 
01175   /// \brief Utility to remember the position of an instruction.
01176   class InsertionHandler {
01177     /// Position of an instruction.
01178     /// Either an instruction:
01179     /// - Is the first in a basic block: BB is used.
01180     /// - Has a previous instructon: PrevInst is used.
01181     union {
01182       Instruction *PrevInst;
01183       BasicBlock *BB;
01184     } Point;
01185     /// Remember whether or not the instruction had a previous instruction.
01186     bool HasPrevInstruction;
01187 
01188   public:
01189     /// \brief Record the position of \p Inst.
01190     InsertionHandler(Instruction *Inst) {
01191       BasicBlock::iterator It = Inst;
01192       HasPrevInstruction = (It != (Inst->getParent()->begin()));
01193       if (HasPrevInstruction)
01194         Point.PrevInst = --It;
01195       else
01196         Point.BB = Inst->getParent();
01197     }
01198 
01199     /// \brief Insert \p Inst at the recorded position.
01200     void insert(Instruction *Inst) {
01201       if (HasPrevInstruction) {
01202         if (Inst->getParent())
01203           Inst->removeFromParent();
01204         Inst->insertAfter(Point.PrevInst);
01205       } else {
01206         Instruction *Position = Point.BB->getFirstInsertionPt();
01207         if (Inst->getParent())
01208           Inst->moveBefore(Position);
01209         else
01210           Inst->insertBefore(Position);
01211       }
01212     }
01213   };
01214 
01215   /// \brief Move an instruction before another.
01216   class InstructionMoveBefore : public TypePromotionAction {
01217     /// Original position of the instruction.
01218     InsertionHandler Position;
01219 
01220   public:
01221     /// \brief Move \p Inst before \p Before.
01222     InstructionMoveBefore(Instruction *Inst, Instruction *Before)
01223         : TypePromotionAction(Inst), Position(Inst) {
01224       DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n");
01225       Inst->moveBefore(Before);
01226     }
01227 
01228     /// \brief Move the instruction back to its original position.
01229     void undo() override {
01230       DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
01231       Position.insert(Inst);
01232     }
01233   };
01234 
01235   /// \brief Set the operand of an instruction with a new value.
01236   class OperandSetter : public TypePromotionAction {
01237     /// Original operand of the instruction.
01238     Value *Origin;
01239     /// Index of the modified instruction.
01240     unsigned Idx;
01241 
01242   public:
01243     /// \brief Set \p Idx operand of \p Inst with \p NewVal.
01244     OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
01245         : TypePromotionAction(Inst), Idx(Idx) {
01246       DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
01247                    << "for:" << *Inst << "\n"
01248                    << "with:" << *NewVal << "\n");
01249       Origin = Inst->getOperand(Idx);
01250       Inst->setOperand(Idx, NewVal);
01251     }
01252 
01253     /// \brief Restore the original value of the instruction.
01254     void undo() override {
01255       DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
01256                    << "for: " << *Inst << "\n"
01257                    << "with: " << *Origin << "\n");
01258       Inst->setOperand(Idx, Origin);
01259     }
01260   };
01261 
01262   /// \brief Hide the operands of an instruction.
01263   /// Do as if this instruction was not using any of its operands.
01264   class OperandsHider : public TypePromotionAction {
01265     /// The list of original operands.
01266     SmallVector<Value *, 4> OriginalValues;
01267 
01268   public:
01269     /// \brief Remove \p Inst from the uses of the operands of \p Inst.
01270     OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
01271       DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
01272       unsigned NumOpnds = Inst->getNumOperands();
01273       OriginalValues.reserve(NumOpnds);
01274       for (unsigned It = 0; It < NumOpnds; ++It) {
01275         // Save the current operand.
01276         Value *Val = Inst->getOperand(It);
01277         OriginalValues.push_back(Val);
01278         // Set a dummy one.
01279         // We could use OperandSetter here, but that would implied an overhead
01280         // that we are not willing to pay.
01281         Inst->setOperand(It, UndefValue::get(Val->getType()));
01282       }
01283     }
01284 
01285     /// \brief Restore the original list of uses.
01286     void undo() override {
01287       DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
01288       for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
01289         Inst->setOperand(It, OriginalValues[It]);
01290     }
01291   };
01292 
01293   /// \brief Build a truncate instruction.
01294   class TruncBuilder : public TypePromotionAction {
01295     Value *Val;
01296   public:
01297     /// \brief Build a truncate instruction of \p Opnd producing a \p Ty
01298     /// result.
01299     /// trunc Opnd to Ty.
01300     TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
01301       IRBuilder<> Builder(Opnd);
01302       Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
01303       DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
01304     }
01305 
01306     /// \brief Get the built value.
01307     Value *getBuiltValue() { return Val; }
01308 
01309     /// \brief Remove the built instruction.
01310     void undo() override {
01311       DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
01312       if (Instruction *IVal = dyn_cast<Instruction>(Val))
01313         IVal->eraseFromParent();
01314     }
01315   };
01316 
01317   /// \brief Build a sign extension instruction.
01318   class SExtBuilder : public TypePromotionAction {
01319     Value *Val;
01320   public:
01321     /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty
01322     /// result.
01323     /// sext Opnd to Ty.
01324     SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
01325         : TypePromotionAction(InsertPt) {
01326       IRBuilder<> Builder(InsertPt);
01327       Val = Builder.CreateSExt(Opnd, Ty, "promoted");
01328       DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
01329     }
01330 
01331     /// \brief Get the built value.
01332     Value *getBuiltValue() { return Val; }
01333 
01334     /// \brief Remove the built instruction.
01335     void undo() override {
01336       DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
01337       if (Instruction *IVal = dyn_cast<Instruction>(Val))
01338         IVal->eraseFromParent();
01339     }
01340   };
01341 
01342   /// \brief Build a zero extension instruction.
01343   class ZExtBuilder : public TypePromotionAction {
01344     Value *Val;
01345   public:
01346     /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty
01347     /// result.
01348     /// zext Opnd to Ty.
01349     ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
01350         : TypePromotionAction(InsertPt) {
01351       IRBuilder<> Builder(InsertPt);
01352       Val = Builder.CreateZExt(Opnd, Ty, "promoted");
01353       DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
01354     }
01355 
01356     /// \brief Get the built value.
01357     Value *getBuiltValue() { return Val; }
01358 
01359     /// \brief Remove the built instruction.
01360     void undo() override {
01361       DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
01362       if (Instruction *IVal = dyn_cast<Instruction>(Val))
01363         IVal->eraseFromParent();
01364     }
01365   };
01366 
01367   /// \brief Mutate an instruction to another type.
01368   class TypeMutator : public TypePromotionAction {
01369     /// Record the original type.
01370     Type *OrigTy;
01371 
01372   public:
01373     /// \brief Mutate the type of \p Inst into \p NewTy.
01374     TypeMutator(Instruction *Inst, Type *NewTy)
01375         : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
01376       DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
01377                    << "\n");
01378       Inst->mutateType(NewTy);
01379     }
01380 
01381     /// \brief Mutate the instruction back to its original type.
01382     void undo() override {
01383       DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
01384                    << "\n");
01385       Inst->mutateType(OrigTy);
01386     }
01387   };
01388 
01389   /// \brief Replace the uses of an instruction by another instruction.
01390   class UsesReplacer : public TypePromotionAction {
01391     /// Helper structure to keep track of the replaced uses.
01392     struct InstructionAndIdx {
01393       /// The instruction using the instruction.
01394       Instruction *Inst;
01395       /// The index where this instruction is used for Inst.
01396       unsigned Idx;
01397       InstructionAndIdx(Instruction *Inst, unsigned Idx)
01398           : Inst(Inst), Idx(Idx) {}
01399     };
01400 
01401     /// Keep track of the original uses (pair Instruction, Index).
01402     SmallVector<InstructionAndIdx, 4> OriginalUses;
01403     typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator;
01404 
01405   public:
01406     /// \brief Replace all the use of \p Inst by \p New.
01407     UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {
01408       DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
01409                    << "\n");
01410       // Record the original uses.
01411       for (Use &U : Inst->uses()) {
01412         Instruction *UserI = cast<Instruction>(U.getUser());
01413         OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
01414       }
01415       // Now, we can replace the uses.
01416       Inst->replaceAllUsesWith(New);
01417     }
01418 
01419     /// \brief Reassign the original uses of Inst to Inst.
01420     void undo() override {
01421       DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
01422       for (use_iterator UseIt = OriginalUses.begin(),
01423                         EndIt = OriginalUses.end();
01424            UseIt != EndIt; ++UseIt) {
01425         UseIt->Inst->setOperand(UseIt->Idx, Inst);
01426       }
01427     }
01428   };
01429 
01430   /// \brief Remove an instruction from the IR.
01431   class InstructionRemover : public TypePromotionAction {
01432     /// Original position of the instruction.
01433     InsertionHandler Inserter;
01434     /// Helper structure to hide all the link to the instruction. In other
01435     /// words, this helps to do as if the instruction was removed.
01436     OperandsHider Hider;
01437     /// Keep track of the uses replaced, if any.
01438     UsesReplacer *Replacer;
01439 
01440   public:
01441     /// \brief Remove all reference of \p Inst and optinally replace all its
01442     /// uses with New.
01443     /// \pre If !Inst->use_empty(), then New != nullptr
01444     InstructionRemover(Instruction *Inst, Value *New = nullptr)
01445         : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
01446           Replacer(nullptr) {
01447       if (New)
01448         Replacer = new UsesReplacer(Inst, New);
01449       DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
01450       Inst->removeFromParent();
01451     }
01452 
01453     ~InstructionRemover() { delete Replacer; }
01454 
01455     /// \brief Really remove the instruction.
01456     void commit() override { delete Inst; }
01457 
01458     /// \brief Resurrect the instruction and reassign it to the proper uses if
01459     /// new value was provided when build this action.
01460     void undo() override {
01461       DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
01462       Inserter.insert(Inst);
01463       if (Replacer)
01464         Replacer->undo();
01465       Hider.undo();
01466     }
01467   };
01468 
01469 public:
01470   /// Restoration point.
01471   /// The restoration point is a pointer to an action instead of an iterator
01472   /// because the iterator may be invalidated but not the pointer.
01473   typedef const TypePromotionAction *ConstRestorationPt;
01474   /// Advocate every changes made in that transaction.
01475   void commit();
01476   /// Undo all the changes made after the given point.
01477   void rollback(ConstRestorationPt Point);
01478   /// Get the current restoration point.
01479   ConstRestorationPt getRestorationPoint() const;
01480 
01481   /// \name API for IR modification with state keeping to support rollback.
01482   /// @{
01483   /// Same as Instruction::setOperand.
01484   void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
01485   /// Same as Instruction::eraseFromParent.
01486   void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
01487   /// Same as Value::replaceAllUsesWith.
01488   void replaceAllUsesWith(Instruction *Inst, Value *New);
01489   /// Same as Value::mutateType.
01490   void mutateType(Instruction *Inst, Type *NewTy);
01491   /// Same as IRBuilder::createTrunc.
01492   Value *createTrunc(Instruction *Opnd, Type *Ty);
01493   /// Same as IRBuilder::createSExt.
01494   Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
01495   /// Same as IRBuilder::createZExt.
01496   Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
01497   /// Same as Instruction::moveBefore.
01498   void moveBefore(Instruction *Inst, Instruction *Before);
01499   /// @}
01500 
01501 private:
01502   /// The ordered list of actions made so far.
01503   SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
01504   typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt;
01505 };
01506 
01507 void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
01508                                           Value *NewVal) {
01509   Actions.push_back(
01510       make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal));
01511 }
01512 
01513 void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
01514                                                 Value *NewVal) {
01515   Actions.push_back(
01516       make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal));
01517 }
01518 
01519 void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
01520                                                   Value *New) {
01521   Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
01522 }
01523 
01524 void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
01525   Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
01526 }
01527 
01528 Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
01529                                              Type *Ty) {
01530   std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
01531   Value *Val = Ptr->getBuiltValue();
01532   Actions.push_back(std::move(Ptr));
01533   return Val;
01534 }
01535 
01536 Value *TypePromotionTransaction::createSExt(Instruction *Inst,
01537                                             Value *Opnd, Type *Ty) {
01538   std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
01539   Value *Val = Ptr->getBuiltValue();
01540   Actions.push_back(std::move(Ptr));
01541   return Val;
01542 }
01543 
01544 Value *TypePromotionTransaction::createZExt(Instruction *Inst,
01545                                             Value *Opnd, Type *Ty) {
01546   std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
01547   Value *Val = Ptr->getBuiltValue();
01548   Actions.push_back(std::move(Ptr));
01549   return Val;
01550 }
01551 
01552 void TypePromotionTransaction::moveBefore(Instruction *Inst,
01553                                           Instruction *Before) {
01554   Actions.push_back(
01555       make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before));
01556 }
01557 
01558 TypePromotionTransaction::ConstRestorationPt
01559 TypePromotionTransaction::getRestorationPoint() const {
01560   return !Actions.empty() ? Actions.back().get() : nullptr;
01561 }
01562 
01563 void TypePromotionTransaction::commit() {
01564   for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
01565        ++It)
01566     (*It)->commit();
01567   Actions.clear();
01568 }
01569 
01570 void TypePromotionTransaction::rollback(
01571     TypePromotionTransaction::ConstRestorationPt Point) {
01572   while (!Actions.empty() && Point != Actions.back().get()) {
01573     std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
01574     Curr->undo();
01575   }
01576 }
01577 
01578 /// \brief A helper class for matching addressing modes.
01579 ///
01580 /// This encapsulates the logic for matching the target-legal addressing modes.
01581 class AddressingModeMatcher {
01582   SmallVectorImpl<Instruction*> &AddrModeInsts;
01583   const TargetLowering &TLI;
01584 
01585   /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
01586   /// the memory instruction that we're computing this address for.
01587   Type *AccessTy;
01588   Instruction *MemoryInst;
01589 
01590   /// AddrMode - This is the addressing mode that we're building up.  This is
01591   /// part of the return value of this addressing mode matching stuff.
01592   ExtAddrMode &AddrMode;
01593 
01594   /// The truncate instruction inserted by other CodeGenPrepare optimizations.
01595   const SetOfInstrs &InsertedTruncs;
01596   /// A map from the instructions to their type before promotion.
01597   InstrToOrigTy &PromotedInsts;
01598   /// The ongoing transaction where every action should be registered.
01599   TypePromotionTransaction &TPT;
01600 
01601   /// IgnoreProfitability - This is set to true when we should not do
01602   /// profitability checks.  When true, IsProfitableToFoldIntoAddressingMode
01603   /// always returns true.
01604   bool IgnoreProfitability;
01605 
01606   AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
01607                         const TargetLowering &T, Type *AT,
01608                         Instruction *MI, ExtAddrMode &AM,
01609                         const SetOfInstrs &InsertedTruncs,
01610                         InstrToOrigTy &PromotedInsts,
01611                         TypePromotionTransaction &TPT)
01612       : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM),
01613         InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) {
01614     IgnoreProfitability = false;
01615   }
01616 public:
01617 
01618   /// Match - Find the maximal addressing mode that a load/store of V can fold,
01619   /// give an access type of AccessTy.  This returns a list of involved
01620   /// instructions in AddrModeInsts.
01621   /// \p InsertedTruncs The truncate instruction inserted by other
01622   /// CodeGenPrepare
01623   /// optimizations.
01624   /// \p PromotedInsts maps the instructions to their type before promotion.
01625   /// \p The ongoing transaction where every action should be registered.
01626   static ExtAddrMode Match(Value *V, Type *AccessTy,
01627                            Instruction *MemoryInst,
01628                            SmallVectorImpl<Instruction*> &AddrModeInsts,
01629                            const TargetLowering &TLI,
01630                            const SetOfInstrs &InsertedTruncs,
01631                            InstrToOrigTy &PromotedInsts,
01632                            TypePromotionTransaction &TPT) {
01633     ExtAddrMode Result;
01634 
01635     bool Success = AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
01636                                          MemoryInst, Result, InsertedTruncs,
01637                                          PromotedInsts, TPT).MatchAddr(V, 0);
01638     (void)Success; assert(Success && "Couldn't select *anything*?");
01639     return Result;
01640   }
01641 private:
01642   bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
01643   bool MatchAddr(Value *V, unsigned Depth);
01644   bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth,
01645                           bool *MovedAway = nullptr);
01646   bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
01647                                             ExtAddrMode &AMBefore,
01648                                             ExtAddrMode &AMAfter);
01649   bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
01650   bool IsPromotionProfitable(unsigned MatchedSize, unsigned SizeWithPromotion,
01651                              Value *PromotedOperand) const;
01652 };
01653 
01654 /// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
01655 /// Return true and update AddrMode if this addr mode is legal for the target,
01656 /// false if not.
01657 bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
01658                                              unsigned Depth) {
01659   // If Scale is 1, then this is the same as adding ScaleReg to the addressing
01660   // mode.  Just process that directly.
01661   if (Scale == 1)
01662     return MatchAddr(ScaleReg, Depth);
01663 
01664   // If the scale is 0, it takes nothing to add this.
01665   if (Scale == 0)
01666     return true;
01667 
01668   // If we already have a scale of this value, we can add to it, otherwise, we
01669   // need an available scale field.
01670   if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
01671     return false;
01672 
01673   ExtAddrMode TestAddrMode = AddrMode;
01674 
01675   // Add scale to turn X*4+X*3 -> X*7.  This could also do things like
01676   // [A+B + A*7] -> [B+A*8].
01677   TestAddrMode.Scale += Scale;
01678   TestAddrMode.ScaledReg = ScaleReg;
01679 
01680   // If the new address isn't legal, bail out.
01681   if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
01682     return false;
01683 
01684   // It was legal, so commit it.
01685   AddrMode = TestAddrMode;
01686 
01687   // Okay, we decided that we can add ScaleReg+Scale to AddrMode.  Check now
01688   // to see if ScaleReg is actually X+C.  If so, we can turn this into adding
01689   // X*Scale + C*Scale to addr mode.
01690   ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
01691   if (isa<Instruction>(ScaleReg) &&  // not a constant expr.
01692       match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
01693     TestAddrMode.ScaledReg = AddLHS;
01694     TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
01695 
01696     // If this addressing mode is legal, commit it and remember that we folded
01697     // this instruction.
01698     if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
01699       AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
01700       AddrMode = TestAddrMode;
01701       return true;
01702     }
01703   }
01704 
01705   // Otherwise, not (x+c)*scale, just return what we have.
01706   return true;
01707 }
01708 
01709 /// MightBeFoldableInst - This is a little filter, which returns true if an
01710 /// addressing computation involving I might be folded into a load/store
01711 /// accessing it.  This doesn't need to be perfect, but needs to accept at least
01712 /// the set of instructions that MatchOperationAddr can.
01713 static bool MightBeFoldableInst(Instruction *I) {
01714   switch (I->getOpcode()) {
01715   case Instruction::BitCast:
01716   case Instruction::AddrSpaceCast:
01717     // Don't touch identity bitcasts.
01718     if (I->getType() == I->getOperand(0)->getType())
01719       return false;
01720     return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
01721   case Instruction::PtrToInt:
01722     // PtrToInt is always a noop, as we know that the int type is pointer sized.
01723     return true;
01724   case Instruction::IntToPtr:
01725     // We know the input is intptr_t, so this is foldable.
01726     return true;
01727   case Instruction::Add:
01728     return true;
01729   case Instruction::Mul:
01730   case Instruction::Shl:
01731     // Can only handle X*C and X << C.
01732     return isa<ConstantInt>(I->getOperand(1));
01733   case Instruction::GetElementPtr:
01734     return true;
01735   default:
01736     return false;
01737   }
01738 }
01739 
01740 /// \brief Check whether or not \p Val is a legal instruction for \p TLI.
01741 /// \note \p Val is assumed to be the product of some type promotion.
01742 /// Therefore if \p Val has an undefined state in \p TLI, this is assumed
01743 /// to be legal, as the non-promoted value would have had the same state.
01744 static bool isPromotedInstructionLegal(const TargetLowering &TLI, Value *Val) {
01745   Instruction *PromotedInst = dyn_cast<Instruction>(Val);
01746   if (!PromotedInst)
01747     return false;
01748   int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
01749   // If the ISDOpcode is undefined, it was undefined before the promotion.
01750   if (!ISDOpcode)
01751     return true;
01752   // Otherwise, check if the promoted instruction is legal or not.
01753   return TLI.isOperationLegalOrCustom(
01754       ISDOpcode, TLI.getValueType(PromotedInst->getType()));
01755 }
01756 
01757 /// \brief Hepler class to perform type promotion.
01758 class TypePromotionHelper {
01759   /// \brief Utility function to check whether or not a sign or zero extension
01760   /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
01761   /// either using the operands of \p Inst or promoting \p Inst.
01762   /// The type of the extension is defined by \p IsSExt.
01763   /// In other words, check if:
01764   /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
01765   /// #1 Promotion applies:
01766   /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
01767   /// #2 Operand reuses:
01768   /// ext opnd1 to ConsideredExtType.
01769   /// \p PromotedInsts maps the instructions to their type before promotion.
01770   static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
01771                             const InstrToOrigTy &PromotedInsts, bool IsSExt);
01772 
01773   /// \brief Utility function to determine if \p OpIdx should be promoted when
01774   /// promoting \p Inst.
01775   static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
01776     if (isa<SelectInst>(Inst) && OpIdx == 0)
01777       return false;
01778     return true;
01779   }
01780 
01781   /// \brief Utility function to promote the operand of \p Ext when this
01782   /// operand is a promotable trunc or sext or zext.
01783   /// \p PromotedInsts maps the instructions to their type before promotion.
01784   /// \p CreatedInsts[out] contains how many non-free instructions have been
01785   /// created to promote the operand of Ext.
01786   /// Newly added extensions are inserted in \p Exts.
01787   /// Newly added truncates are inserted in \p Truncs.
01788   /// Should never be called directly.
01789   /// \return The promoted value which is used instead of Ext.
01790   static Value *promoteOperandForTruncAndAnyExt(
01791       Instruction *Ext, TypePromotionTransaction &TPT,
01792       InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
01793       SmallVectorImpl<Instruction *> *Exts,
01794       SmallVectorImpl<Instruction *> *Truncs);
01795 
01796   /// \brief Utility function to promote the operand of \p Ext when this
01797   /// operand is promotable and is not a supported trunc or sext.
01798   /// \p PromotedInsts maps the instructions to their type before promotion.
01799   /// \p CreatedInsts[out] contains how many non-free instructions have been
01800   /// created to promote the operand of Ext.
01801   /// Newly added extensions are inserted in \p Exts.
01802   /// Newly added truncates are inserted in \p Truncs.
01803   /// Should never be called directly.
01804   /// \return The promoted value which is used instead of Ext.
01805   static Value *
01806   promoteOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
01807                          InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
01808                          SmallVectorImpl<Instruction *> *Exts,
01809                          SmallVectorImpl<Instruction *> *Truncs, bool IsSExt);
01810 
01811   /// \see promoteOperandForOther.
01812   static Value *
01813   signExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
01814                             InstrToOrigTy &PromotedInsts,
01815                             unsigned &CreatedInsts,
01816                             SmallVectorImpl<Instruction *> *Exts,
01817                             SmallVectorImpl<Instruction *> *Truncs) {
01818     return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
01819                                   Truncs, true);
01820   }
01821 
01822   /// \see promoteOperandForOther.
01823   static Value *
01824   zeroExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
01825                             InstrToOrigTy &PromotedInsts,
01826                             unsigned &CreatedInsts,
01827                             SmallVectorImpl<Instruction *> *Exts,
01828                             SmallVectorImpl<Instruction *> *Truncs) {
01829     return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
01830                                   Truncs, false);
01831   }
01832 
01833 public:
01834   /// Type for the utility function that promotes the operand of Ext.
01835   typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
01836                            InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
01837                            SmallVectorImpl<Instruction *> *Exts,
01838                            SmallVectorImpl<Instruction *> *Truncs);
01839   /// \brief Given a sign/zero extend instruction \p Ext, return the approriate
01840   /// action to promote the operand of \p Ext instead of using Ext.
01841   /// \return NULL if no promotable action is possible with the current
01842   /// sign extension.
01843   /// \p InsertedTruncs keeps track of all the truncate instructions inserted by
01844   /// the others CodeGenPrepare optimizations. This information is important
01845   /// because we do not want to promote these instructions as CodeGenPrepare
01846   /// will reinsert them later. Thus creating an infinite loop: create/remove.
01847   /// \p PromotedInsts maps the instructions to their type before promotion.
01848   static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedTruncs,
01849                           const TargetLowering &TLI,
01850                           const InstrToOrigTy &PromotedInsts);
01851 };
01852 
01853 bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
01854                                         Type *ConsideredExtType,
01855                                         const InstrToOrigTy &PromotedInsts,
01856                                         bool IsSExt) {
01857   // The promotion helper does not know how to deal with vector types yet.
01858   // To be able to fix that, we would need to fix the places where we
01859   // statically extend, e.g., constants and such.
01860   if (Inst->getType()->isVectorTy())
01861     return false;
01862 
01863   // We can always get through zext.
01864   if (isa<ZExtInst>(Inst))
01865     return true;
01866 
01867   // sext(sext) is ok too.
01868   if (IsSExt && isa<SExtInst>(Inst))
01869     return true;
01870 
01871   // We can get through binary operator, if it is legal. In other words, the
01872   // binary operator must have a nuw or nsw flag.
01873   const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
01874   if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
01875       ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
01876        (IsSExt && BinOp->hasNoSignedWrap())))
01877     return true;
01878 
01879   // Check if we can do the following simplification.
01880   // ext(trunc(opnd)) --> ext(opnd)
01881   if (!isa<TruncInst>(Inst))
01882     return false;
01883 
01884   Value *OpndVal = Inst->getOperand(0);
01885   // Check if we can use this operand in the extension.
01886   // If the type is larger than the result type of the extension,
01887   // we cannot.
01888   if (!OpndVal->getType()->isIntegerTy() ||
01889       OpndVal->getType()->getIntegerBitWidth() >
01890           ConsideredExtType->getIntegerBitWidth())
01891     return false;
01892 
01893   // If the operand of the truncate is not an instruction, we will not have
01894   // any information on the dropped bits.
01895   // (Actually we could for constant but it is not worth the extra logic).
01896   Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
01897   if (!Opnd)
01898     return false;
01899 
01900   // Check if the source of the type is narrow enough.
01901   // I.e., check that trunc just drops extended bits of the same kind of
01902   // the extension.
01903   // #1 get the type of the operand and check the kind of the extended bits.
01904   const Type *OpndType;
01905   InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
01906   if (It != PromotedInsts.end() && It->second.IsSExt == IsSExt)
01907     OpndType = It->second.Ty;
01908   else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
01909     OpndType = Opnd->getOperand(0)->getType();
01910   else
01911     return false;
01912 
01913   // #2 check that the truncate just drop extended bits.
01914   if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth())
01915     return true;
01916 
01917   return false;
01918 }
01919 
01920 TypePromotionHelper::Action TypePromotionHelper::getAction(
01921     Instruction *Ext, const SetOfInstrs &InsertedTruncs,
01922     const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
01923   assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
01924          "Unexpected instruction type");
01925   Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
01926   Type *ExtTy = Ext->getType();
01927   bool IsSExt = isa<SExtInst>(Ext);
01928   // If the operand of the extension is not an instruction, we cannot
01929   // get through.
01930   // If it, check we can get through.
01931   if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
01932     return nullptr;
01933 
01934   // Do not promote if the operand has been added by codegenprepare.
01935   // Otherwise, it means we are undoing an optimization that is likely to be
01936   // redone, thus causing potential infinite loop.
01937   if (isa<TruncInst>(ExtOpnd) && InsertedTruncs.count(ExtOpnd))
01938     return nullptr;
01939 
01940   // SExt or Trunc instructions.
01941   // Return the related handler.
01942   if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
01943       isa<ZExtInst>(ExtOpnd))
01944     return promoteOperandForTruncAndAnyExt;
01945 
01946   // Regular instruction.
01947   // Abort early if we will have to insert non-free instructions.
01948   if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
01949     return nullptr;
01950   return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
01951 }
01952 
01953 Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
01954     llvm::Instruction *SExt, TypePromotionTransaction &TPT,
01955     InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
01956     SmallVectorImpl<Instruction *> *Exts,
01957     SmallVectorImpl<Instruction *> *Truncs) {
01958   // By construction, the operand of SExt is an instruction. Otherwise we cannot
01959   // get through it and this method should not be called.
01960   Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
01961   Value *ExtVal = SExt;
01962   if (isa<ZExtInst>(SExtOpnd)) {
01963     // Replace s|zext(zext(opnd))
01964     // => zext(opnd).
01965     Value *ZExt =
01966         TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
01967     TPT.replaceAllUsesWith(SExt, ZExt);
01968     TPT.eraseInstruction(SExt);
01969     ExtVal = ZExt;
01970   } else {
01971     // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
01972     // => z|sext(opnd).
01973     TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
01974   }
01975   CreatedInsts = 0;
01976 
01977   // Remove dead code.
01978   if (SExtOpnd->use_empty())
01979     TPT.eraseInstruction(SExtOpnd);
01980 
01981   // Check if the extension is still needed.
01982   Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
01983   if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
01984     if (ExtInst && Exts)
01985       Exts->push_back(ExtInst);
01986     return ExtVal;
01987   }
01988 
01989   // At this point we have: ext ty opnd to ty.
01990   // Reassign the uses of ExtInst to the opnd and remove ExtInst.
01991   Value *NextVal = ExtInst->getOperand(0);
01992   TPT.eraseInstruction(ExtInst, NextVal);
01993   return NextVal;
01994 }
01995 
01996 Value *TypePromotionHelper::promoteOperandForOther(
01997     Instruction *Ext, TypePromotionTransaction &TPT,
01998     InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
01999     SmallVectorImpl<Instruction *> *Exts,
02000     SmallVectorImpl<Instruction *> *Truncs, bool IsSExt) {
02001   // By construction, the operand of Ext is an instruction. Otherwise we cannot
02002   // get through it and this method should not be called.
02003   Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
02004   CreatedInsts = 0;
02005   if (!ExtOpnd->hasOneUse()) {
02006     // ExtOpnd will be promoted.
02007     // All its uses, but Ext, will need to use a truncated value of the
02008     // promoted version.
02009     // Create the truncate now.
02010     Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
02011     if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
02012       ITrunc->removeFromParent();
02013       // Insert it just after the definition.
02014       ITrunc->insertAfter(ExtOpnd);
02015       if (Truncs)
02016         Truncs->push_back(ITrunc);
02017     }
02018 
02019     TPT.replaceAllUsesWith(ExtOpnd, Trunc);
02020     // Restore the operand of Ext (which has been replace by the previous call
02021     // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
02022     TPT.setOperand(Ext, 0, ExtOpnd);
02023   }
02024 
02025   // Get through the Instruction:
02026   // 1. Update its type.
02027   // 2. Replace the uses of Ext by Inst.
02028   // 3. Extend each operand that needs to be extended.
02029 
02030   // Remember the original type of the instruction before promotion.
02031   // This is useful to know that the high bits are sign extended bits.
02032   PromotedInsts.insert(std::pair<Instruction *, TypeIsSExt>(
02033       ExtOpnd, TypeIsSExt(ExtOpnd->getType(), IsSExt)));
02034   // Step #1.
02035   TPT.mutateType(ExtOpnd, Ext->getType());
02036   // Step #2.
02037   TPT.replaceAllUsesWith(Ext, ExtOpnd);
02038   // Step #3.
02039   Instruction *ExtForOpnd = Ext;
02040 
02041   DEBUG(dbgs() << "Propagate Ext to operands\n");
02042   for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
02043        ++OpIdx) {
02044     DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
02045     if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
02046         !shouldExtOperand(ExtOpnd, OpIdx)) {
02047       DEBUG(dbgs() << "No need to propagate\n");
02048       continue;
02049     }
02050     // Check if we can statically extend the operand.
02051     Value *Opnd = ExtOpnd->getOperand(OpIdx);
02052     if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
02053       DEBUG(dbgs() << "Statically extend\n");
02054       unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
02055       APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
02056                             : Cst->getValue().zext(BitWidth);
02057       TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
02058       continue;
02059     }
02060     // UndefValue are typed, so we have to statically sign extend them.
02061     if (isa<UndefValue>(Opnd)) {
02062       DEBUG(dbgs() << "Statically extend\n");
02063       TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
02064       continue;
02065     }
02066 
02067     // Otherwise we have to explicity sign extend the operand.
02068     // Check if Ext was reused to extend an operand.
02069     if (!ExtForOpnd) {
02070       // If yes, create a new one.
02071       DEBUG(dbgs() << "More operands to ext\n");
02072       ExtForOpnd =
02073           cast<Instruction>(IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
02074                                    : TPT.createZExt(Ext, Opnd, Ext->getType()));
02075       ++CreatedInsts;
02076     }
02077     if (Exts)
02078       Exts->push_back(ExtForOpnd);
02079     TPT.setOperand(ExtForOpnd, 0, Opnd);
02080 
02081     // Move the sign extension before the insertion point.
02082     TPT.moveBefore(ExtForOpnd, ExtOpnd);
02083     TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
02084     // If more sext are required, new instructions will have to be created.
02085     ExtForOpnd = nullptr;
02086   }
02087   if (ExtForOpnd == Ext) {
02088     DEBUG(dbgs() << "Extension is useless now\n");
02089     TPT.eraseInstruction(Ext);
02090   }
02091   return ExtOpnd;
02092 }
02093 
02094 /// IsPromotionProfitable - Check whether or not promoting an instruction
02095 /// to a wider type was profitable.
02096 /// \p MatchedSize gives the number of instructions that have been matched
02097 /// in the addressing mode after the promotion was applied.
02098 /// \p SizeWithPromotion gives the number of created instructions for
02099 /// the promotion plus the number of instructions that have been
02100 /// matched in the addressing mode before the promotion.
02101 /// \p PromotedOperand is the value that has been promoted.
02102 /// \return True if the promotion is profitable, false otherwise.
02103 bool
02104 AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize,
02105                                              unsigned SizeWithPromotion,
02106                                              Value *PromotedOperand) const {
02107   // We folded less instructions than what we created to promote the operand.
02108   // This is not profitable.
02109   if (MatchedSize < SizeWithPromotion)
02110     return false;
02111   if (MatchedSize > SizeWithPromotion)
02112     return true;
02113   // The promotion is neutral but it may help folding the sign extension in
02114   // loads for instance.
02115   // Check that we did not create an illegal instruction.
02116   return isPromotedInstructionLegal(TLI, PromotedOperand);
02117 }
02118 
02119 /// MatchOperationAddr - Given an instruction or constant expr, see if we can
02120 /// fold the operation into the addressing mode.  If so, update the addressing
02121 /// mode and return true, otherwise return false without modifying AddrMode.
02122 /// If \p MovedAway is not NULL, it contains the information of whether or
02123 /// not AddrInst has to be folded into the addressing mode on success.
02124 /// If \p MovedAway == true, \p AddrInst will not be part of the addressing
02125 /// because it has been moved away.
02126 /// Thus AddrInst must not be added in the matched instructions.
02127 /// This state can happen when AddrInst is a sext, since it may be moved away.
02128 /// Therefore, AddrInst may not be valid when MovedAway is true and it must
02129 /// not be referenced anymore.
02130 bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
02131                                                unsigned Depth,
02132                                                bool *MovedAway) {
02133   // Avoid exponential behavior on extremely deep expression trees.
02134   if (Depth >= 5) return false;
02135 
02136   // By default, all matched instructions stay in place.
02137   if (MovedAway)
02138     *MovedAway = false;
02139 
02140   switch (Opcode) {
02141   case Instruction::PtrToInt:
02142     // PtrToInt is always a noop, as we know that the int type is pointer sized.
02143     return MatchAddr(AddrInst->getOperand(0), Depth);
02144   case Instruction::IntToPtr:
02145     // This inttoptr is a no-op if the integer type is pointer sized.
02146     if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
02147         TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace()))
02148       return MatchAddr(AddrInst->getOperand(0), Depth);
02149     return false;
02150   case Instruction::BitCast:
02151   case Instruction::AddrSpaceCast:
02152     // BitCast is always a noop, and we can handle it as long as it is
02153     // int->int or pointer->pointer (we don't want int<->fp or something).
02154     if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
02155          AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
02156         // Don't touch identity bitcasts.  These were probably put here by LSR,
02157         // and we don't want to mess around with them.  Assume it knows what it
02158         // is doing.
02159         AddrInst->getOperand(0)->getType() != AddrInst->getType())
02160       return MatchAddr(AddrInst->getOperand(0), Depth);
02161     return false;
02162   case Instruction::Add: {
02163     // Check to see if we can merge in the RHS then the LHS.  If so, we win.
02164     ExtAddrMode BackupAddrMode = AddrMode;
02165     unsigned OldSize = AddrModeInsts.size();
02166     // Start a transaction at this point.
02167     // The LHS may match but not the RHS.
02168     // Therefore, we need a higher level restoration point to undo partially
02169     // matched operation.
02170     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
02171         TPT.getRestorationPoint();
02172 
02173     if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
02174         MatchAddr(AddrInst->getOperand(0), Depth+1))
02175       return true;
02176 
02177     // Restore the old addr mode info.
02178     AddrMode = BackupAddrMode;
02179     AddrModeInsts.resize(OldSize);
02180     TPT.rollback(LastKnownGood);
02181 
02182     // Otherwise this was over-aggressive.  Try merging in the LHS then the RHS.
02183     if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
02184         MatchAddr(AddrInst->getOperand(1), Depth+1))
02185       return true;
02186 
02187     // Otherwise we definitely can't merge the ADD in.
02188     AddrMode = BackupAddrMode;
02189     AddrModeInsts.resize(OldSize);
02190     TPT.rollback(LastKnownGood);
02191     break;
02192   }
02193   //case Instruction::Or:
02194   // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
02195   //break;
02196   case Instruction::Mul:
02197   case Instruction::Shl: {
02198     // Can only handle X*C and X << C.
02199     ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
02200     if (!RHS)
02201       return false;
02202     int64_t Scale = RHS->getSExtValue();
02203     if (Opcode == Instruction::Shl)
02204       Scale = 1LL << Scale;
02205 
02206     return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
02207   }
02208   case Instruction::GetElementPtr: {
02209     // Scan the GEP.  We check it if it contains constant offsets and at most
02210     // one variable offset.
02211     int VariableOperand = -1;
02212     unsigned VariableScale = 0;
02213 
02214     int64_t ConstantOffset = 0;
02215     const DataLayout *TD = TLI.getDataLayout();
02216     gep_type_iterator GTI = gep_type_begin(AddrInst);
02217     for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
02218       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
02219         const StructLayout *SL = TD->getStructLayout(STy);
02220         unsigned Idx =
02221           cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
02222         ConstantOffset += SL->getElementOffset(Idx);
02223       } else {
02224         uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
02225         if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
02226           ConstantOffset += CI->getSExtValue()*TypeSize;
02227         } else if (TypeSize) {  // Scales of zero don't do anything.
02228           // We only allow one variable index at the moment.
02229           if (VariableOperand != -1)
02230             return false;
02231 
02232           // Remember the variable index.
02233           VariableOperand = i;
02234           VariableScale = TypeSize;
02235         }
02236       }
02237     }
02238 
02239     // A common case is for the GEP to only do a constant offset.  In this case,
02240     // just add it to the disp field and check validity.
02241     if (VariableOperand == -1) {
02242       AddrMode.BaseOffs += ConstantOffset;
02243       if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
02244         // Check to see if we can fold the base pointer in too.
02245         if (MatchAddr(AddrInst->getOperand(0), Depth+1))
02246           return true;
02247       }
02248       AddrMode.BaseOffs -= ConstantOffset;
02249       return false;
02250     }
02251 
02252     // Save the valid addressing mode in case we can't match.
02253     ExtAddrMode BackupAddrMode = AddrMode;
02254     unsigned OldSize = AddrModeInsts.size();
02255 
02256     // See if the scale and offset amount is valid for this target.
02257     AddrMode.BaseOffs += ConstantOffset;
02258 
02259     // Match the base operand of the GEP.
02260     if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
02261       // If it couldn't be matched, just stuff the value in a register.
02262       if (AddrMode.HasBaseReg) {
02263         AddrMode = BackupAddrMode;
02264         AddrModeInsts.resize(OldSize);
02265         return false;
02266       }
02267       AddrMode.HasBaseReg = true;
02268       AddrMode.BaseReg = AddrInst->getOperand(0);
02269     }
02270 
02271     // Match the remaining variable portion of the GEP.
02272     if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
02273                           Depth)) {
02274       // If it couldn't be matched, try stuffing the base into a register
02275       // instead of matching it, and retrying the match of the scale.
02276       AddrMode = BackupAddrMode;
02277       AddrModeInsts.resize(OldSize);
02278       if (AddrMode.HasBaseReg)
02279         return false;
02280       AddrMode.HasBaseReg = true;
02281       AddrMode.BaseReg = AddrInst->getOperand(0);
02282       AddrMode.BaseOffs += ConstantOffset;
02283       if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
02284                             VariableScale, Depth)) {
02285         // If even that didn't work, bail.
02286         AddrMode = BackupAddrMode;
02287         AddrModeInsts.resize(OldSize);
02288         return false;
02289       }
02290     }
02291 
02292     return true;
02293   }
02294   case Instruction::SExt:
02295   case Instruction::ZExt: {
02296     Instruction *Ext = dyn_cast<Instruction>(AddrInst);
02297     if (!Ext)
02298       return false;
02299 
02300     // Try to move this ext out of the way of the addressing mode.
02301     // Ask for a method for doing so.
02302     TypePromotionHelper::Action TPH =
02303         TypePromotionHelper::getAction(Ext, InsertedTruncs, TLI, PromotedInsts);
02304     if (!TPH)
02305       return false;
02306 
02307     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
02308         TPT.getRestorationPoint();
02309     unsigned CreatedInsts = 0;
02310     Value *PromotedOperand =
02311         TPH(Ext, TPT, PromotedInsts, CreatedInsts, nullptr, nullptr);
02312     // SExt has been moved away.
02313     // Thus either it will be rematched later in the recursive calls or it is
02314     // gone. Anyway, we must not fold it into the addressing mode at this point.
02315     // E.g.,
02316     // op = add opnd, 1
02317     // idx = ext op
02318     // addr = gep base, idx
02319     // is now:
02320     // promotedOpnd = ext opnd            <- no match here
02321     // op = promoted_add promotedOpnd, 1  <- match (later in recursive calls)
02322     // addr = gep base, op                <- match
02323     if (MovedAway)
02324       *MovedAway = true;
02325 
02326     assert(PromotedOperand &&
02327            "TypePromotionHelper should have filtered out those cases");
02328 
02329     ExtAddrMode BackupAddrMode = AddrMode;
02330     unsigned OldSize = AddrModeInsts.size();
02331 
02332     if (!MatchAddr(PromotedOperand, Depth) ||
02333         !IsPromotionProfitable(AddrModeInsts.size(), OldSize + CreatedInsts,
02334                                PromotedOperand)) {
02335       AddrMode = BackupAddrMode;
02336       AddrModeInsts.resize(OldSize);
02337       DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
02338       TPT.rollback(LastKnownGood);
02339       return false;
02340     }
02341     return true;
02342   }
02343   }
02344   return false;
02345 }
02346 
02347 /// MatchAddr - If we can, try to add the value of 'Addr' into the current
02348 /// addressing mode.  If Addr can't be added to AddrMode this returns false and
02349 /// leaves AddrMode unmodified.  This assumes that Addr is either a pointer type
02350 /// or intptr_t for the target.
02351 ///
02352 bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
02353   // Start a transaction at this point that we will rollback if the matching
02354   // fails.
02355   TypePromotionTransaction::ConstRestorationPt LastKnownGood =
02356       TPT.getRestorationPoint();
02357   if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
02358     // Fold in immediates if legal for the target.
02359     AddrMode.BaseOffs += CI->getSExtValue();
02360     if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
02361       return true;
02362     AddrMode.BaseOffs -= CI->getSExtValue();
02363   } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
02364     // If this is a global variable, try to fold it into the addressing mode.
02365     if (!AddrMode.BaseGV) {
02366       AddrMode.BaseGV = GV;
02367       if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
02368         return true;
02369       AddrMode.BaseGV = nullptr;
02370     }
02371   } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
02372     ExtAddrMode BackupAddrMode = AddrMode;
02373     unsigned OldSize = AddrModeInsts.size();
02374 
02375     // Check to see if it is possible to fold this operation.
02376     bool MovedAway = false;
02377     if (MatchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
02378       // This instruction may have been move away. If so, there is nothing
02379       // to check here.
02380       if (MovedAway)
02381         return true;
02382       // Okay, it's possible to fold this.  Check to see if it is actually
02383       // *profitable* to do so.  We use a simple cost model to avoid increasing
02384       // register pressure too much.
02385       if (I->hasOneUse() ||
02386           IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
02387         AddrModeInsts.push_back(I);
02388         return true;
02389       }
02390 
02391       // It isn't profitable to do this, roll back.
02392       //cerr << "NOT FOLDING: " << *I;
02393       AddrMode = BackupAddrMode;
02394       AddrModeInsts.resize(OldSize);
02395       TPT.rollback(LastKnownGood);
02396     }
02397   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
02398     if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
02399       return true;
02400     TPT.rollback(LastKnownGood);
02401   } else if (isa<ConstantPointerNull>(Addr)) {
02402     // Null pointer gets folded without affecting the addressing mode.
02403     return true;
02404   }
02405 
02406   // Worse case, the target should support [reg] addressing modes. :)
02407   if (!AddrMode.HasBaseReg) {
02408     AddrMode.HasBaseReg = true;
02409     AddrMode.BaseReg = Addr;
02410     // Still check for legality in case the target supports [imm] but not [i+r].
02411     if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
02412       return true;
02413     AddrMode.HasBaseReg = false;
02414     AddrMode.BaseReg = nullptr;
02415   }
02416 
02417   // If the base register is already taken, see if we can do [r+r].
02418   if (AddrMode.Scale == 0) {
02419     AddrMode.Scale = 1;
02420     AddrMode.ScaledReg = Addr;
02421     if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
02422       return true;
02423     AddrMode.Scale = 0;
02424     AddrMode.ScaledReg = nullptr;
02425   }
02426   // Couldn't match.
02427   TPT.rollback(LastKnownGood);
02428   return false;
02429 }
02430 
02431 /// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
02432 /// inline asm call are due to memory operands.  If so, return true, otherwise
02433 /// return false.
02434 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
02435                                     const TargetLowering &TLI) {
02436   TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
02437   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
02438     TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
02439 
02440     // Compute the constraint code and ConstraintType to use.
02441     TLI.ComputeConstraintToUse(OpInfo, SDValue());
02442 
02443     // If this asm operand is our Value*, and if it isn't an indirect memory
02444     // operand, we can't fold it!
02445     if (OpInfo.CallOperandVal == OpVal &&
02446         (OpInfo.ConstraintType != TargetLowering::C_Memory ||
02447          !OpInfo.isIndirect))
02448       return false;
02449   }
02450 
02451   return true;
02452 }
02453 
02454 /// FindAllMemoryUses - Recursively walk all the uses of I until we find a
02455 /// memory use.  If we find an obviously non-foldable instruction, return true.
02456 /// Add the ultimately found memory instructions to MemoryUses.
02457 static bool FindAllMemoryUses(Instruction *I,
02458                 SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
02459                               SmallPtrSetImpl<Instruction*> &ConsideredInsts,
02460                               const TargetLowering &TLI) {
02461   // If we already considered this instruction, we're done.
02462   if (!ConsideredInsts.insert(I).second)
02463     return false;
02464 
02465   // If this is an obviously unfoldable instruction, bail out.
02466   if (!MightBeFoldableInst(I))
02467     return true;
02468 
02469   // Loop over all the uses, recursively processing them.
02470   for (Use &U : I->uses()) {
02471     Instruction *UserI = cast<Instruction>(U.getUser());
02472 
02473     if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
02474       MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
02475       continue;
02476     }
02477 
02478     if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
02479       unsigned opNo = U.getOperandNo();
02480       if (opNo == 0) return true; // Storing addr, not into addr.
02481       MemoryUses.push_back(std::make_pair(SI, opNo));
02482       continue;
02483     }
02484 
02485     if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
02486       InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
02487       if (!IA) return true;
02488 
02489       // If this is a memory operand, we're cool, otherwise bail out.
02490       if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
02491         return true;
02492       continue;
02493     }
02494 
02495     if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI))
02496       return true;
02497   }
02498 
02499   return false;
02500 }
02501 
02502 /// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
02503 /// the use site that we're folding it into.  If so, there is no cost to
02504 /// include it in the addressing mode.  KnownLive1 and KnownLive2 are two values
02505 /// that we know are live at the instruction already.
02506 bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
02507                                                    Value *KnownLive2) {
02508   // If Val is either of the known-live values, we know it is live!
02509   if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
02510     return true;
02511 
02512   // All values other than instructions and arguments (e.g. constants) are live.
02513   if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
02514 
02515   // If Val is a constant sized alloca in the entry block, it is live, this is
02516   // true because it is just a reference to the stack/frame pointer, which is
02517   // live for the whole function.
02518   if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
02519     if (AI->isStaticAlloca())
02520       return true;
02521 
02522   // Check to see if this value is already used in the memory instruction's
02523   // block.  If so, it's already live into the block at the very least, so we
02524   // can reasonably fold it.
02525   return Val->isUsedInBasicBlock(MemoryInst->getParent());
02526 }
02527 
02528 /// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
02529 /// mode of the machine to fold the specified instruction into a load or store
02530 /// that ultimately uses it.  However, the specified instruction has multiple
02531 /// uses.  Given this, it may actually increase register pressure to fold it
02532 /// into the load.  For example, consider this code:
02533 ///
02534 ///     X = ...
02535 ///     Y = X+1
02536 ///     use(Y)   -> nonload/store
02537 ///     Z = Y+1
02538 ///     load Z
02539 ///
02540 /// In this case, Y has multiple uses, and can be folded into the load of Z
02541 /// (yielding load [X+2]).  However, doing this will cause both "X" and "X+1" to
02542 /// be live at the use(Y) line.  If we don't fold Y into load Z, we use one
02543 /// fewer register.  Since Y can't be folded into "use(Y)" we don't increase the
02544 /// number of computations either.
02545 ///
02546 /// Note that this (like most of CodeGenPrepare) is just a rough heuristic.  If
02547 /// X was live across 'load Z' for other reasons, we actually *would* want to
02548 /// fold the addressing mode in the Z case.  This would make Y die earlier.
02549 bool AddressingModeMatcher::
02550 IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
02551                                      ExtAddrMode &AMAfter) {
02552   if (IgnoreProfitability) return true;
02553 
02554   // AMBefore is the addressing mode before this instruction was folded into it,
02555   // and AMAfter is the addressing mode after the instruction was folded.  Get
02556   // the set of registers referenced by AMAfter and subtract out those
02557   // referenced by AMBefore: this is the set of values which folding in this
02558   // address extends the lifetime of.
02559   //
02560   // Note that there are only two potential values being referenced here,
02561   // BaseReg and ScaleReg (global addresses are always available, as are any
02562   // folded immediates).
02563   Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
02564 
02565   // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
02566   // lifetime wasn't extended by adding this instruction.
02567   if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
02568     BaseReg = nullptr;
02569   if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
02570     ScaledReg = nullptr;
02571 
02572   // If folding this instruction (and it's subexprs) didn't extend any live
02573   // ranges, we're ok with it.
02574   if (!BaseReg && !ScaledReg)
02575     return true;
02576 
02577   // If all uses of this instruction are ultimately load/store/inlineasm's,
02578   // check to see if their addressing modes will include this instruction.  If
02579   // so, we can fold it into all uses, so it doesn't matter if it has multiple
02580   // uses.
02581   SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
02582   SmallPtrSet<Instruction*, 16> ConsideredInsts;
02583   if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
02584     return false;  // Has a non-memory, non-foldable use!
02585 
02586   // Now that we know that all uses of this instruction are part of a chain of
02587   // computation involving only operations that could theoretically be folded
02588   // into a memory use, loop over each of these uses and see if they could
02589   // *actually* fold the instruction.
02590   SmallVector<Instruction*, 32> MatchedAddrModeInsts;
02591   for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
02592     Instruction *User = MemoryUses[i].first;
02593     unsigned OpNo = MemoryUses[i].second;
02594 
02595     // Get the access type of this use.  If the use isn't a pointer, we don't
02596     // know what it accesses.
02597     Value *Address = User->getOperand(OpNo);
02598     if (!Address->getType()->isPointerTy())
02599       return false;
02600     Type *AddressAccessTy = Address->getType()->getPointerElementType();
02601 
02602     // Do a match against the root of this address, ignoring profitability. This
02603     // will tell us if the addressing mode for the memory operation will
02604     // *actually* cover the shared instruction.
02605     ExtAddrMode Result;
02606     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
02607         TPT.getRestorationPoint();
02608     AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
02609                                   MemoryInst, Result, InsertedTruncs,
02610                                   PromotedInsts, TPT);
02611     Matcher.IgnoreProfitability = true;
02612     bool Success = Matcher.MatchAddr(Address, 0);
02613     (void)Success; assert(Success && "Couldn't select *anything*?");
02614 
02615     // The match was to check the profitability, the changes made are not
02616     // part of the original matcher. Therefore, they should be dropped
02617     // otherwise the original matcher will not present the right state.
02618     TPT.rollback(LastKnownGood);
02619 
02620     // If the match didn't cover I, then it won't be shared by it.
02621     if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
02622                   I) == MatchedAddrModeInsts.end())
02623       return false;
02624 
02625     MatchedAddrModeInsts.clear();
02626   }
02627 
02628   return true;
02629 }
02630 
02631 } // end anonymous namespace
02632 
02633 /// IsNonLocalValue - Return true if the specified values are defined in a
02634 /// different basic block than BB.
02635 static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
02636   if (Instruction *I = dyn_cast<Instruction>(V))
02637     return I->getParent() != BB;
02638   return false;
02639 }
02640 
02641 /// OptimizeMemoryInst - Load and Store Instructions often have
02642 /// addressing modes that can do significant amounts of computation.  As such,
02643 /// instruction selection will try to get the load or store to do as much
02644 /// computation as possible for the program.  The problem is that isel can only
02645 /// see within a single block.  As such, we sink as much legal addressing mode
02646 /// stuff into the block as possible.
02647 ///
02648 /// This method is used to optimize both load/store and inline asms with memory
02649 /// operands.
02650 bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
02651                                         Type *AccessTy) {
02652   Value *Repl = Addr;
02653 
02654   // Try to collapse single-value PHI nodes.  This is necessary to undo
02655   // unprofitable PRE transformations.
02656   SmallVector<Value*, 8> worklist;
02657   SmallPtrSet<Value*, 16> Visited;
02658   worklist.push_back(Addr);
02659 
02660   // Use a worklist to iteratively look through PHI nodes, and ensure that
02661   // the addressing mode obtained from the non-PHI roots of the graph
02662   // are equivalent.
02663   Value *Consensus = nullptr;
02664   unsigned NumUsesConsensus = 0;
02665   bool IsNumUsesConsensusValid = false;
02666   SmallVector<Instruction*, 16> AddrModeInsts;
02667   ExtAddrMode AddrMode;
02668   TypePromotionTransaction TPT;
02669   TypePromotionTransaction::ConstRestorationPt LastKnownGood =
02670       TPT.getRestorationPoint();
02671   while (!worklist.empty()) {
02672     Value *V = worklist.back();
02673     worklist.pop_back();
02674 
02675     // Break use-def graph loops.
02676     if (!Visited.insert(V).second) {
02677       Consensus = nullptr;
02678       break;
02679     }
02680 
02681     // For a PHI node, push all of its incoming values.
02682     if (PHINode *P = dyn_cast<PHINode>(V)) {
02683       for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
02684         worklist.push_back(P->getIncomingValue(i));
02685       continue;
02686     }
02687 
02688     // For non-PHIs, determine the addressing mode being computed.
02689     SmallVector<Instruction*, 16> NewAddrModeInsts;
02690     ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
02691         V, AccessTy, MemoryInst, NewAddrModeInsts, *TLI, InsertedTruncsSet,
02692         PromotedInsts, TPT);
02693 
02694     // This check is broken into two cases with very similar code to avoid using
02695     // getNumUses() as much as possible. Some values have a lot of uses, so
02696     // calling getNumUses() unconditionally caused a significant compile-time
02697     // regression.
02698     if (!Consensus) {
02699       Consensus = V;
02700       AddrMode = NewAddrMode;
02701       AddrModeInsts = NewAddrModeInsts;
02702       continue;
02703     } else if (NewAddrMode == AddrMode) {
02704       if (!IsNumUsesConsensusValid) {
02705         NumUsesConsensus = Consensus->getNumUses();
02706         IsNumUsesConsensusValid = true;
02707       }
02708 
02709       // Ensure that the obtained addressing mode is equivalent to that obtained
02710       // for all other roots of the PHI traversal.  Also, when choosing one
02711       // such root as representative, select the one with the most uses in order
02712       // to keep the cost modeling heuristics in AddressingModeMatcher
02713       // applicable.
02714       unsigned NumUses = V->getNumUses();
02715       if (NumUses > NumUsesConsensus) {
02716         Consensus = V;
02717         NumUsesConsensus = NumUses;
02718         AddrModeInsts = NewAddrModeInsts;
02719       }
02720       continue;
02721     }
02722 
02723     Consensus = nullptr;
02724     break;
02725   }
02726 
02727   // If the addressing mode couldn't be determined, or if multiple different
02728   // ones were determined, bail out now.
02729   if (!Consensus) {
02730     TPT.rollback(LastKnownGood);
02731     return false;
02732   }
02733   TPT.commit();
02734 
02735   // Check to see if any of the instructions supersumed by this addr mode are
02736   // non-local to I's BB.
02737   bool AnyNonLocal = false;
02738   for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
02739     if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
02740       AnyNonLocal = true;
02741       break;
02742     }
02743   }
02744 
02745   // If all the instructions matched are already in this BB, don't do anything.
02746   if (!AnyNonLocal) {
02747     DEBUG(dbgs() << "CGP: Found      local addrmode: " << AddrMode << "\n");
02748     return false;
02749   }
02750 
02751   // Insert this computation right after this user.  Since our caller is
02752   // scanning from the top of the BB to the bottom, reuse of the expr are
02753   // guaranteed to happen later.
02754   IRBuilder<> Builder(MemoryInst);
02755 
02756   // Now that we determined the addressing expression we want to use and know
02757   // that we have to sink it into this block.  Check to see if we have already
02758   // done this for some other load/store instr in this block.  If so, reuse the
02759   // computation.
02760   Value *&SunkAddr = SunkAddrs[Addr];
02761   if (SunkAddr) {
02762     DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
02763                  << *MemoryInst << "\n");
02764     if (SunkAddr->getType() != Addr->getType())
02765       SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
02766   } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
02767                TM && TM->getSubtarget<TargetSubtargetInfo>().useAA())) {
02768     // By default, we use the GEP-based method when AA is used later. This
02769     // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
02770     DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
02771                  << *MemoryInst << "\n");
02772     Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
02773     Value *ResultPtr = nullptr, *ResultIndex = nullptr;
02774 
02775     // First, find the pointer.
02776     if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
02777       ResultPtr = AddrMode.BaseReg;
02778       AddrMode.BaseReg = nullptr;
02779     }
02780 
02781     if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
02782       // We can't add more than one pointer together, nor can we scale a
02783       // pointer (both of which seem meaningless).
02784       if (ResultPtr || AddrMode.Scale != 1)
02785         return false;
02786 
02787       ResultPtr = AddrMode.ScaledReg;
02788       AddrMode.Scale = 0;
02789     }
02790 
02791     if (AddrMode.BaseGV) {
02792       if (ResultPtr)
02793         return false;
02794 
02795       ResultPtr = AddrMode.BaseGV;
02796     }
02797 
02798     // If the real base value actually came from an inttoptr, then the matcher
02799     // will look through it and provide only the integer value. In that case,
02800     // use it here.
02801     if (!ResultPtr && AddrMode.BaseReg) {
02802       ResultPtr =
02803         Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr");
02804       AddrMode.BaseReg = nullptr;
02805     } else if (!ResultPtr && AddrMode.Scale == 1) {
02806       ResultPtr =
02807         Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr");
02808       AddrMode.Scale = 0;
02809     }
02810 
02811     if (!ResultPtr &&
02812         !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
02813       SunkAddr = Constant::getNullValue(Addr->getType());
02814     } else if (!ResultPtr) {
02815       return false;
02816     } else {
02817       Type *I8PtrTy =
02818         Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
02819 
02820       // Start with the base register. Do this first so that subsequent address
02821       // matching finds it last, which will prevent it from trying to match it
02822       // as the scaled value in case it happens to be a mul. That would be
02823       // problematic if we've sunk a different mul for the scale, because then
02824       // we'd end up sinking both muls.
02825       if (AddrMode.BaseReg) {
02826         Value *V = AddrMode.BaseReg;
02827         if (V->getType() != IntPtrTy)
02828           V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
02829 
02830         ResultIndex = V;
02831       }
02832 
02833       // Add the scale value.
02834       if (AddrMode.Scale) {
02835         Value *V = AddrMode.ScaledReg;
02836         if (V->getType() == IntPtrTy) {
02837           // done.
02838         } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
02839                    cast<IntegerType>(V->getType())->getBitWidth()) {
02840           V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
02841         } else {
02842           // It is only safe to sign extend the BaseReg if we know that the math
02843           // required to create it did not overflow before we extend it. Since
02844           // the original IR value was tossed in favor of a constant back when
02845           // the AddrMode was created we need to bail out gracefully if widths
02846           // do not match instead of extending it.
02847           Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex);
02848           if (I && (ResultIndex != AddrMode.BaseReg))
02849             I->eraseFromParent();
02850           return false;
02851         }
02852 
02853         if (AddrMode.Scale != 1)
02854           V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
02855                                 "sunkaddr");
02856         if (ResultIndex)
02857           ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
02858         else
02859           ResultIndex = V;
02860       }
02861 
02862       // Add in the Base Offset if present.
02863       if (AddrMode.BaseOffs) {
02864         Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
02865         if (ResultIndex) {
02866           // We need to add this separately from the scale above to help with
02867           // SDAG consecutive load/store merging.
02868           if (ResultPtr->getType() != I8PtrTy)
02869             ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
02870           ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
02871         }
02872 
02873         ResultIndex = V;
02874       }
02875 
02876       if (!ResultIndex) {
02877         SunkAddr = ResultPtr;
02878       } else {
02879         if (ResultPtr->getType() != I8PtrTy)
02880           ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
02881         SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
02882       }
02883 
02884       if (SunkAddr->getType() != Addr->getType())
02885         SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
02886     }
02887   } else {
02888     DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
02889                  << *MemoryInst << "\n");
02890     Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
02891     Value *Result = nullptr;
02892 
02893     // Start with the base register. Do this first so that subsequent address
02894     // matching finds it last, which will prevent it from trying to match it
02895     // as the scaled value in case it happens to be a mul. That would be
02896     // problematic if we've sunk a different mul for the scale, because then
02897     // we'd end up sinking both muls.
02898     if (AddrMode.BaseReg) {
02899       Value *V = AddrMode.BaseReg;
02900       if (V->getType()->isPointerTy())
02901         V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
02902       if (V->getType() != IntPtrTy)
02903         V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
02904       Result = V;
02905     }
02906 
02907     // Add the scale value.
02908     if (AddrMode.Scale) {
02909       Value *V = AddrMode.ScaledReg;
02910       if (V->getType() == IntPtrTy) {
02911         // done.
02912       } else if (V->getType()->isPointerTy()) {
02913         V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
02914       } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
02915                  cast<IntegerType>(V->getType())->getBitWidth()) {
02916         V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
02917       } else {
02918         // It is only safe to sign extend the BaseReg if we know that the math
02919         // required to create it did not overflow before we extend it. Since
02920         // the original IR value was tossed in favor of a constant back when
02921         // the AddrMode was created we need to bail out gracefully if widths
02922         // do not match instead of extending it.
02923         Instruction *I = dyn_cast_or_null<Instruction>(Result);
02924         if (I && (Result != AddrMode.BaseReg))
02925           I->eraseFromParent();
02926         return false;
02927       }
02928       if (AddrMode.Scale != 1)
02929         V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
02930                               "sunkaddr");
02931       if (Result)
02932         Result = Builder.CreateAdd(Result, V, "sunkaddr");
02933       else
02934         Result = V;
02935     }
02936 
02937     // Add in the BaseGV if present.
02938     if (AddrMode.BaseGV) {
02939       Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
02940       if (Result)
02941         Result = Builder.CreateAdd(Result, V, "sunkaddr");
02942       else
02943         Result = V;
02944     }
02945 
02946     // Add in the Base Offset if present.
02947     if (AddrMode.BaseOffs) {
02948       Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
02949       if (Result)
02950         Result = Builder.CreateAdd(Result, V, "sunkaddr");
02951       else
02952         Result = V;
02953     }
02954 
02955     if (!Result)
02956       SunkAddr = Constant::getNullValue(Addr->getType());
02957     else
02958       SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
02959   }
02960 
02961   MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
02962 
02963   // If we have no uses, recursively delete the value and all dead instructions
02964   // using it.
02965   if (Repl->use_empty()) {
02966     // This can cause recursive deletion, which can invalidate our iterator.
02967     // Use a WeakVH to hold onto it in case this happens.
02968     WeakVH IterHandle(CurInstIterator);
02969     BasicBlock *BB = CurInstIterator->getParent();
02970 
02971     RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
02972 
02973     if (IterHandle != CurInstIterator) {
02974       // If the iterator instruction was recursively deleted, start over at the
02975       // start of the block.
02976       CurInstIterator = BB->begin();
02977       SunkAddrs.clear();
02978     }
02979   }
02980   ++NumMemoryInsts;
02981   return true;
02982 }
02983 
02984 /// OptimizeInlineAsmInst - If there are any memory operands, use
02985 /// OptimizeMemoryInst to sink their address computing into the block when
02986 /// possible / profitable.
02987 bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
02988   bool MadeChange = false;
02989 
02990   TargetLowering::AsmOperandInfoVector
02991     TargetConstraints = TLI->ParseConstraints(CS);
02992   unsigned ArgNo = 0;
02993   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
02994     TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
02995 
02996     // Compute the constraint code and ConstraintType to use.
02997     TLI->ComputeConstraintToUse(OpInfo, SDValue());
02998 
02999     if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
03000         OpInfo.isIndirect) {
03001       Value *OpVal = CS->getArgOperand(ArgNo++);
03002       MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
03003     } else if (OpInfo.Type == InlineAsm::isInput)
03004       ArgNo++;
03005   }
03006 
03007   return MadeChange;
03008 }
03009 
03010 /// \brief Check if all the uses of \p Inst are equivalent (or free) zero or
03011 /// sign extensions.
03012 static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
03013   assert(!Inst->use_empty() && "Input must have at least one use");
03014   const Instruction *FirstUser = cast<Instruction>(*Inst->user_begin());
03015   bool IsSExt = isa<SExtInst>(FirstUser);
03016   Type *ExtTy = FirstUser->getType();
03017   for (const User *U : Inst->users()) {
03018     const Instruction *UI = cast<Instruction>(U);
03019     if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
03020       return false;
03021     Type *CurTy = UI->getType();
03022     // Same input and output types: Same instruction after CSE.
03023     if (CurTy == ExtTy)
03024       continue;
03025 
03026     // If IsSExt is true, we are in this situation:
03027     // a = Inst
03028     // b = sext ty1 a to ty2
03029     // c = sext ty1 a to ty3
03030     // Assuming ty2 is shorter than ty3, this could be turned into:
03031     // a = Inst
03032     // b = sext ty1 a to ty2
03033     // c = sext ty2 b to ty3
03034     // However, the last sext is not free.
03035     if (IsSExt)
03036       return false;
03037 
03038     // This is a ZExt, maybe this is free to extend from one type to another.
03039     // In that case, we would not account for a different use.
03040     Type *NarrowTy;
03041     Type *LargeTy;
03042     if (ExtTy->getScalarType()->getIntegerBitWidth() >
03043         CurTy->getScalarType()->getIntegerBitWidth()) {
03044       NarrowTy = CurTy;
03045       LargeTy = ExtTy;
03046     } else {
03047       NarrowTy = ExtTy;
03048       LargeTy = CurTy;
03049     }
03050 
03051     if (!TLI.isZExtFree(NarrowTy, LargeTy))
03052       return false;
03053   }
03054   // All uses are the same or can be derived from one another for free.
03055   return true;
03056 }
03057 
03058 /// \brief Try to form ExtLd by promoting \p Exts until they reach a
03059 /// load instruction.
03060 /// If an ext(load) can be formed, it is returned via \p LI for the load
03061 /// and \p Inst for the extension.
03062 /// Otherwise LI == nullptr and Inst == nullptr.
03063 /// When some promotion happened, \p TPT contains the proper state to
03064 /// revert them.
03065 ///
03066 /// \return true when promoting was necessary to expose the ext(load)
03067 /// opportunity, false otherwise.
03068 ///
03069 /// Example:
03070 /// \code
03071 /// %ld = load i32* %addr
03072 /// %add = add nuw i32 %ld, 4
03073 /// %zext = zext i32 %add to i64
03074 /// \endcode
03075 /// =>
03076 /// \code
03077 /// %ld = load i32* %addr
03078 /// %zext = zext i32 %ld to i64
03079 /// %add = add nuw i64 %zext, 4
03080 /// \encode
03081 /// Thanks to the promotion, we can match zext(load i32*) to i64.
03082 bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
03083                                     LoadInst *&LI, Instruction *&Inst,
03084                                     const SmallVectorImpl<Instruction *> &Exts,
03085                                     unsigned CreatedInsts = 0) {
03086   // Iterate over all the extensions to see if one form an ext(load).
03087   for (auto I : Exts) {
03088     // Check if we directly have ext(load).
03089     if ((LI = dyn_cast<LoadInst>(I->getOperand(0)))) {
03090       Inst = I;
03091       // No promotion happened here.
03092       return false;
03093     }
03094     // Check whether or not we want to do any promotion.
03095     if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
03096       continue;
03097     // Get the action to perform the promotion.
03098     TypePromotionHelper::Action TPH = TypePromotionHelper::getAction(
03099         I, InsertedTruncsSet, *TLI, PromotedInsts);
03100     // Check if we can promote.
03101     if (!TPH)
03102       continue;
03103     // Save the current state.
03104     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
03105         TPT.getRestorationPoint();
03106     SmallVector<Instruction *, 4> NewExts;
03107     unsigned NewCreatedInsts = 0;
03108     // Promote.
03109     Value *PromotedVal =
03110         TPH(I, TPT, PromotedInsts, NewCreatedInsts, &NewExts, nullptr);
03111     assert(PromotedVal &&
03112            "TypePromotionHelper should have filtered out those cases");
03113 
03114     // We would be able to merge only one extension in a load.
03115     // Therefore, if we have more than 1 new extension we heuristically
03116     // cut this search path, because it means we degrade the code quality.
03117     // With exactly 2, the transformation is neutral, because we will merge
03118     // one extension but leave one. However, we optimistically keep going,
03119     // because the new extension may be removed too.
03120     unsigned TotalCreatedInsts = CreatedInsts + NewCreatedInsts;
03121     if (!StressExtLdPromotion &&
03122         (TotalCreatedInsts > 1 ||
03123          !isPromotedInstructionLegal(*TLI, PromotedVal))) {
03124       // The promotion is not profitable, rollback to the previous state.
03125       TPT.rollback(LastKnownGood);
03126       continue;
03127     }
03128     // The promotion is profitable.
03129     // Check if it exposes an ext(load).
03130     (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInsts);
03131     if (LI && (StressExtLdPromotion || NewCreatedInsts == 0 ||
03132                // If we have created a new extension, i.e., now we have two
03133                // extensions. We must make sure one of them is merged with
03134                // the load, otherwise we may degrade the code quality.
03135                (LI->hasOneUse() || hasSameExtUse(LI, *TLI))))
03136       // Promotion happened.
03137       return true;
03138     // If this does not help to expose an ext(load) then, rollback.
03139     TPT.rollback(LastKnownGood);
03140   }
03141   // None of the extension can form an ext(load).
03142   LI = nullptr;
03143   Inst = nullptr;
03144   return false;
03145 }
03146 
03147 /// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
03148 /// basic block as the load, unless conditions are unfavorable. This allows
03149 /// SelectionDAG to fold the extend into the load.
03150 /// \p I[in/out] the extension may be modified during the process if some
03151 /// promotions apply.
03152 ///
03153 bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
03154   // Try to promote a chain of computation if it allows to form
03155   // an extended load.
03156   TypePromotionTransaction TPT;
03157   TypePromotionTransaction::ConstRestorationPt LastKnownGood =
03158     TPT.getRestorationPoint();
03159   SmallVector<Instruction *, 1> Exts;
03160   Exts.push_back(I);
03161   // Look for a load being extended.
03162   LoadInst *LI = nullptr;
03163   Instruction *OldExt = I;
03164   bool HasPromoted = ExtLdPromotion(TPT, LI, I, Exts);
03165   if (!LI || !I) {
03166     assert(!HasPromoted && !LI && "If we did not match any load instruction "
03167                                   "the code must remain the same");
03168     I = OldExt;
03169     return false;
03170   }
03171 
03172   // If they're already in the same block, there's nothing to do.
03173   // Make the cheap checks first if we did not promote.
03174   // If we promoted, we need to check if it is indeed profitable.
03175   if (!HasPromoted && LI->getParent() == I->getParent())
03176     return false;
03177 
03178   EVT VT = TLI->getValueType(I->getType());
03179   EVT LoadVT = TLI->getValueType(LI->getType());
03180 
03181   // If the load has other users and the truncate is not free, this probably
03182   // isn't worthwhile.
03183   if (!LI->hasOneUse() && TLI &&
03184       (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
03185       !TLI->isTruncateFree(I->getType(), LI->getType())) {
03186     I = OldExt;
03187     TPT.rollback(LastKnownGood);
03188     return false;
03189   }
03190 
03191   // Check whether the target supports casts folded into loads.
03192   unsigned LType;
03193   if (isa<ZExtInst>(I))
03194     LType = ISD::ZEXTLOAD;
03195   else {
03196     assert(isa<SExtInst>(I) && "Unexpected ext type!");
03197     LType = ISD::SEXTLOAD;
03198   }
03199   if (TLI && !TLI->isLoadExtLegal(LType, LoadVT)) {
03200     I = OldExt;
03201     TPT.rollback(LastKnownGood);
03202     return false;
03203   }
03204 
03205   // Move the extend into the same block as the load, so that SelectionDAG
03206   // can fold it.
03207   TPT.commit();
03208   I->removeFromParent();
03209   I->insertAfter(LI);
03210   ++NumExtsMoved;
03211   return true;
03212 }
03213 
03214 bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
03215   BasicBlock *DefBB = I->getParent();
03216 
03217   // If the result of a {s|z}ext and its source are both live out, rewrite all
03218   // other uses of the source with result of extension.
03219   Value *Src = I->getOperand(0);
03220   if (Src->hasOneUse())
03221     return false;
03222 
03223   // Only do this xform if truncating is free.
03224   if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
03225     return false;
03226 
03227   // Only safe to perform the optimization if the source is also defined in
03228   // this block.
03229   if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
03230     return false;
03231 
03232   bool DefIsLiveOut = false;
03233   for (User *U : I->users()) {
03234     Instruction *UI = cast<Instruction>(U);
03235 
03236     // Figure out which BB this ext is used in.
03237     BasicBlock *UserBB = UI->getParent();
03238     if (UserBB == DefBB) continue;
03239     DefIsLiveOut = true;
03240     break;
03241   }
03242   if (!DefIsLiveOut)
03243     return false;
03244 
03245   // Make sure none of the uses are PHI nodes.
03246   for (User *U : Src->users()) {
03247     Instruction *UI = cast<Instruction>(U);
03248     BasicBlock *UserBB = UI->getParent();
03249     if (UserBB == DefBB) continue;
03250     // Be conservative. We don't want this xform to end up introducing
03251     // reloads just before load / store instructions.
03252     if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
03253       return false;
03254   }
03255 
03256   // InsertedTruncs - Only insert one trunc in each block once.
03257   DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
03258 
03259   bool MadeChange = false;
03260   for (Use &U : Src->uses()) {
03261     Instruction *User = cast<Instruction>(U.getUser());
03262 
03263     // Figure out which BB this ext is used in.
03264     BasicBlock *UserBB = User->getParent();
03265     if (UserBB == DefBB) continue;
03266 
03267     // Both src and def are live in this block. Rewrite the use.
03268     Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
03269 
03270     if (!InsertedTrunc) {
03271       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
03272       InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
03273       InsertedTruncsSet.insert(InsertedTrunc);
03274     }
03275 
03276     // Replace a use of the {s|z}ext source with a use of the result.
03277     U = InsertedTrunc;
03278     ++NumExtUses;
03279     MadeChange = true;
03280   }
03281 
03282   return MadeChange;
03283 }
03284 
03285 /// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be
03286 /// turned into an explicit branch.
03287 static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
03288   // FIXME: This should use the same heuristics as IfConversion to determine
03289   // whether a select is better represented as a branch.  This requires that
03290   // branch probability metadata is preserved for the select, which is not the
03291   // case currently.
03292 
03293   CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
03294 
03295   // If the branch is predicted right, an out of order CPU can avoid blocking on
03296   // the compare.  Emit cmovs on compares with a memory operand as branches to
03297   // avoid stalls on the load from memory.  If the compare has more than one use
03298   // there's probably another cmov or setcc around so it's not worth emitting a
03299   // branch.
03300   if (!Cmp)
03301     return false;
03302 
03303   Value *CmpOp0 = Cmp->getOperand(0);
03304   Value *CmpOp1 = Cmp->getOperand(1);
03305 
03306   // We check that the memory operand has one use to avoid uses of the loaded
03307   // value directly after the compare, making branches unprofitable.
03308   return Cmp->hasOneUse() &&
03309          ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
03310           (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()));
03311 }
03312 
03313 
03314 /// If we have a SelectInst that will likely profit from branch prediction,
03315 /// turn it into a branch.
03316 bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
03317   bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
03318 
03319   // Can we convert the 'select' to CF ?
03320   if (DisableSelectToBranch || OptSize || !TLI || VectorCond)
03321     return false;
03322 
03323   TargetLowering::SelectSupportKind SelectKind;
03324   if (VectorCond)
03325     SelectKind = TargetLowering::VectorMaskSelect;
03326   else if (SI->getType()->isVectorTy())
03327     SelectKind = TargetLowering::ScalarCondVectorVal;
03328   else
03329     SelectKind = TargetLowering::ScalarValSelect;
03330 
03331   // Do we have efficient codegen support for this kind of 'selects' ?
03332   if (TLI->isSelectSupported(SelectKind)) {
03333     // We have efficient codegen support for the select instruction.
03334     // Check if it is profitable to keep this 'select'.
03335     if (!TLI->isPredictableSelectExpensive() ||
03336         !isFormingBranchFromSelectProfitable(SI))
03337       return false;
03338   }
03339 
03340   ModifiedDT = true;
03341 
03342   // First, we split the block containing the select into 2 blocks.
03343   BasicBlock *StartBlock = SI->getParent();
03344   BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
03345   BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
03346 
03347   // Create a new block serving as the landing pad for the branch.
03348   BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid",
03349                                              NextBlock->getParent(), NextBlock);
03350 
03351   // Move the unconditional branch from the block with the select in it into our
03352   // landing pad block.
03353   StartBlock->getTerminator()->eraseFromParent();
03354   BranchInst::Create(NextBlock, SmallBlock);
03355 
03356   // Insert the real conditional branch based on the original condition.
03357   BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI);
03358 
03359   // The select itself is replaced with a PHI Node.
03360   PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin());
03361   PN->takeName(SI);
03362   PN->addIncoming(SI->getTrueValue(), StartBlock);
03363   PN->addIncoming(SI->getFalseValue(), SmallBlock);
03364   SI->replaceAllUsesWith(PN);
03365   SI->eraseFromParent();
03366 
03367   // Instruct OptimizeBlock to skip to the next block.
03368   CurInstIterator = StartBlock->end();
03369   ++NumSelectsExpanded;
03370   return true;
03371 }
03372 
03373 static bool isBroadcastShuffle(ShuffleVectorInst *SVI) {
03374   SmallVector<int, 16> Mask(SVI->getShuffleMask());
03375   int SplatElem = -1;
03376   for (unsigned i = 0; i < Mask.size(); ++i) {
03377     if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem)
03378       return false;
03379     SplatElem = Mask[i];
03380   }
03381 
03382   return true;
03383 }
03384 
03385 /// Some targets have expensive vector shifts if the lanes aren't all the same
03386 /// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
03387 /// it's often worth sinking a shufflevector splat down to its use so that
03388 /// codegen can spot all lanes are identical.
03389 bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
03390   BasicBlock *DefBB = SVI->getParent();
03391 
03392   // Only do this xform if variable vector shifts are particularly expensive.
03393   if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType()))
03394     return false;
03395 
03396   // We only expect better codegen by sinking a shuffle if we can recognise a
03397   // constant splat.
03398   if (!isBroadcastShuffle(SVI))
03399     return false;
03400 
03401   // InsertedShuffles - Only insert a shuffle in each block once.
03402   DenseMap<BasicBlock*, Instruction*> InsertedShuffles;
03403 
03404   bool MadeChange = false;
03405   for (User *U : SVI->users()) {
03406     Instruction *UI = cast<Instruction>(U);
03407 
03408     // Figure out which BB this ext is used in.
03409     BasicBlock *UserBB = UI->getParent();
03410     if (UserBB == DefBB) continue;
03411 
03412     // For now only apply this when the splat is used by a shift instruction.
03413     if (!UI->isShift()) continue;
03414 
03415     // Everything checks out, sink the shuffle if the user's block doesn't
03416     // already have a copy.
03417     Instruction *&InsertedShuffle = InsertedShuffles[UserBB];
03418 
03419     if (!InsertedShuffle) {
03420       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
03421       InsertedShuffle = new ShuffleVectorInst(SVI->getOperand(0),
03422                                               SVI->getOperand(1),
03423                                               SVI->getOperand(2), "", InsertPt);
03424     }
03425 
03426     UI->replaceUsesOfWith(SVI, InsertedShuffle);
03427     MadeChange = true;
03428   }
03429 
03430   // If we removed all uses, nuke the shuffle.
03431   if (SVI->use_empty()) {
03432     SVI->eraseFromParent();
03433     MadeChange = true;
03434   }
03435 
03436   return MadeChange;
03437 }
03438 
03439 namespace {
03440 /// \brief Helper class to promote a scalar operation to a vector one.
03441 /// This class is used to move downward extractelement transition.
03442 /// E.g.,
03443 /// a = vector_op <2 x i32>
03444 /// b = extractelement <2 x i32> a, i32 0
03445 /// c = scalar_op b
03446 /// store c
03447 ///
03448 /// =>
03449 /// a = vector_op <2 x i32>
03450 /// c = vector_op a (equivalent to scalar_op on the related lane)
03451 /// * d = extractelement <2 x i32> c, i32 0
03452 /// * store d
03453 /// Assuming both extractelement and store can be combine, we get rid of the
03454 /// transition.
03455 class VectorPromoteHelper {
03456   /// Used to perform some checks on the legality of vector operations.
03457   const TargetLowering &TLI;
03458 
03459   /// Used to estimated the cost of the promoted chain.
03460   const TargetTransformInfo &TTI;
03461 
03462   /// The transition being moved downwards.
03463   Instruction *Transition;
03464   /// The sequence of instructions to be promoted.
03465   SmallVector<Instruction *, 4> InstsToBePromoted;
03466   /// Cost of combining a store and an extract.
03467   unsigned StoreExtractCombineCost;
03468   /// Instruction that will be combined with the transition.
03469   Instruction *CombineInst;
03470 
03471   /// \brief The instruction that represents the current end of the transition.
03472   /// Since we are faking the promotion until we reach the end of the chain
03473   /// of computation, we need a way to get the current end of the transition.
03474   Instruction *getEndOfTransition() const {
03475     if (InstsToBePromoted.empty())
03476       return Transition;
03477     return InstsToBePromoted.back();
03478   }
03479 
03480   /// \brief Return the index of the original value in the transition.
03481   /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
03482   /// c, is at index 0.
03483   unsigned getTransitionOriginalValueIdx() const {
03484     assert(isa<ExtractElementInst>(Transition) &&
03485            "Other kind of transitions are not supported yet");
03486     return 0;
03487   }
03488 
03489   /// \brief Return the index of the index in the transition.
03490   /// E.g., for "extractelement <2 x i32> c, i32 0" the index
03491   /// is at index 1.
03492   unsigned getTransitionIdx() const {
03493     assert(isa<ExtractElementInst>(Transition) &&
03494            "Other kind of transitions are not supported yet");
03495     return 1;
03496   }
03497 
03498   /// \brief Get the type of the transition.
03499   /// This is the type of the original value.
03500   /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
03501   /// transition is <2 x i32>.
03502   Type *getTransitionType() const {
03503     return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
03504   }
03505 
03506   /// \brief Promote \p ToBePromoted by moving \p Def downward through.
03507   /// I.e., we have the following sequence:
03508   /// Def = Transition <ty1> a to <ty2>
03509   /// b = ToBePromoted <ty2> Def, ...
03510   /// =>
03511   /// b = ToBePromoted <ty1> a, ...
03512   /// Def = Transition <ty1> ToBePromoted to <ty2>
03513   void promoteImpl(Instruction *ToBePromoted);
03514 
03515   /// \brief Check whether or not it is profitable to promote all the
03516   /// instructions enqueued to be promoted.
03517   bool isProfitableToPromote() {
03518     Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
03519     unsigned Index = isa<ConstantInt>(ValIdx)
03520                          ? cast<ConstantInt>(ValIdx)->getZExtValue()
03521                          : -1;
03522     Type *PromotedType = getTransitionType();
03523 
03524     StoreInst *ST = cast<StoreInst>(CombineInst);
03525     unsigned AS = ST->getPointerAddressSpace();
03526     unsigned Align = ST->getAlignment();
03527     // Check if this store is supported.
03528     if (!TLI.allowsMisalignedMemoryAccesses(
03529             TLI.getValueType(ST->getValueOperand()->getType()), AS, Align)) {
03530       // If this is not supported, there is no way we can combine
03531       // the extract with the store.
03532       return false;
03533     }
03534 
03535     // The scalar chain of computation has to pay for the transition
03536     // scalar to vector.
03537     // The vector chain has to account for the combining cost.
03538     uint64_t ScalarCost =
03539         TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index);
03540     uint64_t VectorCost = StoreExtractCombineCost;
03541     for (const auto &Inst : InstsToBePromoted) {
03542       // Compute the cost.
03543       // By construction, all instructions being promoted are arithmetic ones.
03544       // Moreover, one argument is a constant that can be viewed as a splat
03545       // constant.
03546       Value *Arg0 = Inst->getOperand(0);
03547       bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
03548                             isa<ConstantFP>(Arg0);
03549       TargetTransformInfo::OperandValueKind Arg0OVK =
03550           IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
03551                          : TargetTransformInfo::OK_AnyValue;
03552       TargetTransformInfo::OperandValueKind Arg1OVK =
03553           !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
03554                           : TargetTransformInfo::OK_AnyValue;
03555       ScalarCost += TTI.getArithmeticInstrCost(
03556           Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK);
03557       VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
03558                                                Arg0OVK, Arg1OVK);
03559     }
03560     DEBUG(dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
03561                  << ScalarCost << "\nVector: " << VectorCost << '\n');
03562     return ScalarCost > VectorCost;
03563   }
03564 
03565   /// \brief Generate a constant vector with \p Val with the same
03566   /// number of elements as the transition.
03567   /// \p UseSplat defines whether or not \p Val should be replicated
03568   /// accross the whole vector.
03569   /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
03570   /// otherwise we generate a vector with as many undef as possible:
03571   /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
03572   /// used at the index of the extract.
03573   Value *getConstantVector(Constant *Val, bool UseSplat) const {
03574     unsigned ExtractIdx = UINT_MAX;
03575     if (!UseSplat) {
03576       // If we cannot determine where the constant must be, we have to
03577       // use a splat constant.
03578       Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
03579       if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
03580         ExtractIdx = CstVal->getSExtValue();
03581       else
03582         UseSplat = true;
03583     }
03584 
03585     unsigned End = getTransitionType()->getVectorNumElements();
03586     if (UseSplat)
03587       return ConstantVector::getSplat(End, Val);
03588 
03589     SmallVector<Constant *, 4> ConstVec;
03590     UndefValue *UndefVal = UndefValue::get(Val->getType());
03591     for (unsigned Idx = 0; Idx != End; ++Idx) {
03592       if (Idx == ExtractIdx)
03593         ConstVec.push_back(Val);
03594       else
03595         ConstVec.push_back(UndefVal);
03596     }
03597     return ConstantVector::get(ConstVec);
03598   }
03599 
03600   /// \brief Check if promoting to a vector type an operand at \p OperandIdx
03601   /// in \p Use can trigger undefined behavior.
03602   static bool canCauseUndefinedBehavior(const Instruction *Use,
03603                                         unsigned OperandIdx) {
03604     // This is not safe to introduce undef when the operand is on
03605     // the right hand side of a division-like instruction.
03606     if (OperandIdx != 1)
03607       return false;
03608     switch (Use->getOpcode()) {
03609     default:
03610       return false;
03611     case Instruction::SDiv:
03612     case Instruction::UDiv:
03613     case Instruction::SRem:
03614     case Instruction::URem:
03615       return true;
03616     case Instruction::FDiv:
03617     case Instruction::FRem:
03618       return !Use->hasNoNaNs();
03619     }
03620     llvm_unreachable(nullptr);
03621   }
03622 
03623 public:
03624   VectorPromoteHelper(const TargetLowering &TLI, const TargetTransformInfo &TTI,
03625                       Instruction *Transition, unsigned CombineCost)
03626       : TLI(TLI), TTI(TTI), Transition(Transition),
03627         StoreExtractCombineCost(CombineCost), CombineInst(nullptr) {
03628     assert(Transition && "Do not know how to promote null");
03629   }
03630 
03631   /// \brief Check if we can promote \p ToBePromoted to \p Type.
03632   bool canPromote(const Instruction *ToBePromoted) const {
03633     // We could support CastInst too.
03634     return isa<BinaryOperator>(ToBePromoted);
03635   }
03636 
03637   /// \brief Check if it is profitable to promote \p ToBePromoted
03638   /// by moving downward the transition through.
03639   bool shouldPromote(const Instruction *ToBePromoted) const {
03640     // Promote only if all the operands can be statically expanded.
03641     // Indeed, we do not want to introduce any new kind of transitions.
03642     for (const Use &U : ToBePromoted->operands()) {
03643       const Value *Val = U.get();
03644       if (Val == getEndOfTransition()) {
03645         // If the use is a division and the transition is on the rhs,
03646         // we cannot promote the operation, otherwise we may create a
03647         // division by zero.
03648         if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
03649           return false;
03650         continue;
03651       }
03652       if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
03653           !isa<ConstantFP>(Val))
03654         return false;
03655     }
03656     // Check that the resulting operation is legal.
03657     int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
03658     if (!ISDOpcode)
03659       return false;
03660     return StressStoreExtract ||
03661            TLI.isOperationLegalOrCustom(
03662                ISDOpcode, TLI.getValueType(getTransitionType(), true));
03663   }
03664 
03665   /// \brief Check whether or not \p Use can be combined
03666   /// with the transition.
03667   /// I.e., is it possible to do Use(Transition) => AnotherUse?
03668   bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
03669 
03670   /// \brief Record \p ToBePromoted as part of the chain to be promoted.
03671   void enqueueForPromotion(Instruction *ToBePromoted) {
03672     InstsToBePromoted.push_back(ToBePromoted);
03673   }
03674 
03675   /// \brief Set the instruction that will be combined with the transition.
03676   void recordCombineInstruction(Instruction *ToBeCombined) {
03677     assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
03678     CombineInst = ToBeCombined;
03679   }
03680 
03681   /// \brief Promote all the instructions enqueued for promotion if it is
03682   /// is profitable.
03683   /// \return True if the promotion happened, false otherwise.
03684   bool promote() {
03685     // Check if there is something to promote.
03686     // Right now, if we do not have anything to combine with,
03687     // we assume the promotion is not profitable.
03688     if (InstsToBePromoted.empty() || !CombineInst)
03689       return false;
03690 
03691     // Check cost.
03692     if (!StressStoreExtract && !isProfitableToPromote())
03693       return false;
03694 
03695     // Promote.
03696     for (auto &ToBePromoted : InstsToBePromoted)
03697       promoteImpl(ToBePromoted);
03698     InstsToBePromoted.clear();
03699     return true;
03700   }
03701 };
03702 } // End of anonymous namespace.
03703 
03704 void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
03705   // At this point, we know that all the operands of ToBePromoted but Def
03706   // can be statically promoted.
03707   // For Def, we need to use its parameter in ToBePromoted:
03708   // b = ToBePromoted ty1 a
03709   // Def = Transition ty1 b to ty2
03710   // Move the transition down.
03711   // 1. Replace all uses of the promoted operation by the transition.
03712   // = ... b => = ... Def.
03713   assert(ToBePromoted->getType() == Transition->getType() &&
03714          "The type of the result of the transition does not match "
03715          "the final type");
03716   ToBePromoted->replaceAllUsesWith(Transition);
03717   // 2. Update the type of the uses.
03718   // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
03719   Type *TransitionTy = getTransitionType();
03720   ToBePromoted->mutateType(TransitionTy);
03721   // 3. Update all the operands of the promoted operation with promoted
03722   // operands.
03723   // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
03724   for (Use &U : ToBePromoted->operands()) {
03725     Value *Val = U.get();
03726     Value *NewVal = nullptr;
03727     if (Val == Transition)
03728       NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
03729     else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
03730              isa<ConstantFP>(Val)) {
03731       // Use a splat constant if it is not safe to use undef.
03732       NewVal = getConstantVector(
03733           cast<Constant>(Val),
03734           isa<UndefValue>(Val) ||
03735               canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
03736     } else
03737       assert(0 && "Did you modified shouldPromote and forgot to update this?");
03738     ToBePromoted->setOperand(U.getOperandNo(), NewVal);
03739   }
03740   Transition->removeFromParent();
03741   Transition->insertAfter(ToBePromoted);
03742   Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
03743 }
03744 
03745 /// Some targets can do store(extractelement) with one instruction.
03746 /// Try to push the extractelement towards the stores when the target
03747 /// has this feature and this is profitable.
03748 bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) {
03749   unsigned CombineCost = UINT_MAX;
03750   if (DisableStoreExtract || !TLI ||
03751       (!StressStoreExtract &&
03752        !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(),
03753                                        Inst->getOperand(1), CombineCost)))
03754     return false;
03755 
03756   // At this point we know that Inst is a vector to scalar transition.
03757   // Try to move it down the def-use chain, until:
03758   // - We can combine the transition with its single use
03759   //   => we got rid of the transition.
03760   // - We escape the current basic block
03761   //   => we would need to check that we are moving it at a cheaper place and
03762   //      we do not do that for now.
03763   BasicBlock *Parent = Inst->getParent();
03764   DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
03765   VectorPromoteHelper VPH(*TLI, *TTI, Inst, CombineCost);
03766   // If the transition has more than one use, assume this is not going to be
03767   // beneficial.
03768   while (Inst->hasOneUse()) {
03769     Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
03770     DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
03771 
03772     if (ToBePromoted->getParent() != Parent) {
03773       DEBUG(dbgs() << "Instruction to promote is in a different block ("
03774                    << ToBePromoted->getParent()->getName()
03775                    << ") than the transition (" << Parent->getName() << ").\n");
03776       return false;
03777     }
03778 
03779     if (VPH.canCombine(ToBePromoted)) {
03780       DEBUG(dbgs() << "Assume " << *Inst << '\n'
03781                    << "will be combined with: " << *ToBePromoted << '\n');
03782       VPH.recordCombineInstruction(ToBePromoted);
03783       bool Changed = VPH.promote();
03784       NumStoreExtractExposed += Changed;
03785       return Changed;
03786     }
03787 
03788     DEBUG(dbgs() << "Try promoting.\n");
03789     if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
03790       return false;
03791 
03792     DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
03793 
03794     VPH.enqueueForPromotion(ToBePromoted);
03795     Inst = ToBePromoted;
03796   }
03797   return false;
03798 }
03799 
03800 bool CodeGenPrepare::OptimizeInst(Instruction *I) {
03801   if (PHINode *P = dyn_cast<PHINode>(I)) {
03802     // It is possible for very late stage optimizations (such as SimplifyCFG)
03803     // to introduce PHI nodes too late to be cleaned up.  If we detect such a
03804     // trivial PHI, go ahead and zap it here.
03805     if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : nullptr,
03806                                        TLInfo, DT)) {
03807       P->replaceAllUsesWith(V);
03808       P->eraseFromParent();
03809       ++NumPHIsElim;
03810       return true;
03811     }
03812     return false;
03813   }
03814 
03815   if (CastInst *CI = dyn_cast<CastInst>(I)) {
03816     // If the source of the cast is a constant, then this should have
03817     // already been constant folded.  The only reason NOT to constant fold
03818     // it is if something (e.g. LSR) was careful to place the constant
03819     // evaluation in a block other than then one that uses it (e.g. to hoist
03820     // the address of globals out of a loop).  If this is the case, we don't
03821     // want to forward-subst the cast.
03822     if (isa<Constant>(CI->getOperand(0)))
03823       return false;
03824 
03825     if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
03826       return true;
03827 
03828     if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
03829       /// Sink a zext or sext into its user blocks if the target type doesn't
03830       /// fit in one register
03831       if (TLI && TLI->getTypeAction(CI->getContext(),
03832                                     TLI->getValueType(CI->getType())) ==
03833                      TargetLowering::TypeExpandInteger) {
03834         return SinkCast(CI);
03835       } else {
03836         bool MadeChange = MoveExtToFormExtLoad(I);
03837         return MadeChange | OptimizeExtUses(I);
03838       }
03839     }
03840     return false;
03841   }
03842 
03843   if (CmpInst *CI = dyn_cast<CmpInst>(I))
03844     if (!TLI || !TLI->hasMultipleConditionRegisters())
03845       return OptimizeCmpExpression(CI);
03846 
03847   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
03848     if (TLI)
03849       return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
03850     return false;
03851   }
03852 
03853   if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
03854     if (TLI)
03855       return OptimizeMemoryInst(I, SI->getOperand(1),
03856                                 SI->getOperand(0)->getType());
03857     return false;
03858   }
03859 
03860   BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
03861 
03862   if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
03863                 BinOp->getOpcode() == Instruction::LShr)) {
03864     ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
03865     if (TLI && CI && TLI->hasExtractBitsInsn())
03866       return OptimizeExtractBits(BinOp, CI, *TLI);
03867 
03868     return false;
03869   }
03870 
03871   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
03872     if (GEPI->hasAllZeroIndices()) {
03873       /// The GEP operand must be a pointer, so must its result -> BitCast
03874       Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
03875                                         GEPI->getName(), GEPI);
03876       GEPI->replaceAllUsesWith(NC);
03877       GEPI->eraseFromParent();
03878       ++NumGEPsElim;
03879       OptimizeInst(NC);
03880       return true;
03881     }
03882     return false;
03883   }
03884 
03885   if (CallInst *CI = dyn_cast<CallInst>(I))
03886     return OptimizeCallInst(CI);
03887 
03888   if (SelectInst *SI = dyn_cast<SelectInst>(I))
03889     return OptimizeSelectInst(SI);
03890 
03891   if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
03892     return OptimizeShuffleVectorInst(SVI);
03893 
03894   if (isa<ExtractElementInst>(I))
03895     return OptimizeExtractElementInst(I);
03896 
03897   return false;
03898 }
03899 
03900 // In this pass we look for GEP and cast instructions that are used
03901 // across basic blocks and rewrite them to improve basic-block-at-a-time
03902 // selection.
03903 bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
03904   SunkAddrs.clear();
03905   bool MadeChange = false;
03906 
03907   CurInstIterator = BB.begin();
03908   while (CurInstIterator != BB.end())
03909     MadeChange |= OptimizeInst(CurInstIterator++);
03910 
03911   MadeChange |= DupRetToEnableTailCallOpts(&BB);
03912 
03913   return MadeChange;
03914 }
03915 
03916 // llvm.dbg.value is far away from the value then iSel may not be able
03917 // handle it properly. iSel will drop llvm.dbg.value if it can not
03918 // find a node corresponding to the value.
03919 bool CodeGenPrepare::PlaceDbgValues(Function &F) {
03920   bool MadeChange = false;
03921   for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
03922     Instruction *PrevNonDbgInst = nullptr;
03923     for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
03924       Instruction *Insn = BI; ++BI;
03925       DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
03926       // Leave dbg.values that refer to an alloca alone. These
03927       // instrinsics describe the address of a variable (= the alloca)
03928       // being taken.  They should not be moved next to the alloca
03929       // (and to the beginning of the scope), but rather stay close to
03930       // where said address is used.
03931       if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) {
03932         PrevNonDbgInst = Insn;
03933         continue;
03934       }
03935 
03936       Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
03937       if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
03938         DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
03939         DVI->removeFromParent();
03940         if (isa<PHINode>(VI))
03941           DVI->insertBefore(VI->getParent()->getFirstInsertionPt());
03942         else
03943           DVI->insertAfter(VI);
03944         MadeChange = true;
03945         ++NumDbgValueMoved;
03946       }
03947     }
03948   }
03949   return MadeChange;
03950 }
03951 
03952 // If there is a sequence that branches based on comparing a single bit
03953 // against zero that can be combined into a single instruction, and the
03954 // target supports folding these into a single instruction, sink the
03955 // mask and compare into the branch uses. Do this before OptimizeBlock ->
03956 // OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being
03957 // searched for.
03958 bool CodeGenPrepare::sinkAndCmp(Function &F) {
03959   if (!EnableAndCmpSinking)
03960     return false;
03961   if (!TLI || !TLI->isMaskAndBranchFoldingLegal())
03962     return false;
03963   bool MadeChange = false;
03964   for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
03965     BasicBlock *BB = I++;
03966 
03967     // Does this BB end with the following?
03968     //   %andVal = and %val, #single-bit-set
03969     //   %icmpVal = icmp %andResult, 0
03970     //   br i1 %cmpVal label %dest1, label %dest2"
03971     BranchInst *Brcc = dyn_cast<BranchInst>(BB->getTerminator());
03972     if (!Brcc || !Brcc->isConditional())
03973       continue;
03974     ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0));
03975     if (!Cmp || Cmp->getParent() != BB)
03976       continue;
03977     ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1));
03978     if (!Zero || !Zero->isZero())
03979       continue;
03980     Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0));
03981     if (!And || And->getOpcode() != Instruction::And || And->getParent() != BB)
03982       continue;
03983     ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1));
03984     if (!Mask || !Mask->getUniqueInteger().isPowerOf2())
03985       continue;
03986     DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB->dump());
03987 
03988     // Push the "and; icmp" for any users that are conditional branches.
03989     // Since there can only be one branch use per BB, we don't need to keep
03990     // track of which BBs we insert into.
03991     for (Value::use_iterator UI = Cmp->use_begin(), E = Cmp->use_end();
03992          UI != E; ) {
03993       Use &TheUse = *UI;
03994       // Find brcc use.
03995       BranchInst *BrccUser = dyn_cast<BranchInst>(*UI);
03996       ++UI;
03997       if (!BrccUser || !BrccUser->isConditional())
03998         continue;
03999       BasicBlock *UserBB = BrccUser->getParent();
04000       if (UserBB == BB) continue;
04001       DEBUG(dbgs() << "found Brcc use\n");
04002 
04003       // Sink the "and; icmp" to use.
04004       MadeChange = true;
04005       BinaryOperator *NewAnd =
04006         BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "",
04007                                   BrccUser);
04008       CmpInst *NewCmp =
04009         CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero,
04010                         "", BrccUser);
04011       TheUse = NewCmp;
04012       ++NumAndCmpsMoved;
04013       DEBUG(BrccUser->getParent()->dump());
04014     }
04015   }
04016   return MadeChange;
04017 }
04018 
04019 /// \brief Retrieve the probabilities of a conditional branch. Returns true on
04020 /// success, or returns false if no or invalid metadata was found.
04021 static bool extractBranchMetadata(BranchInst *BI,
04022                                   uint64_t &ProbTrue, uint64_t &ProbFalse) {
04023   assert(BI->isConditional() &&
04024          "Looking for probabilities on unconditional branch?");
04025   auto *ProfileData = BI->getMetadata(LLVMContext::MD_prof);
04026   if (!ProfileData || ProfileData->getNumOperands() != 3)
04027     return false;
04028 
04029   const auto *CITrue =
04030       mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1));
04031   const auto *CIFalse =
04032       mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2));
04033   if (!CITrue || !CIFalse)
04034     return false;
04035 
04036   ProbTrue = CITrue->getValue().getZExtValue();
04037   ProbFalse = CIFalse->getValue().getZExtValue();
04038 
04039   return true;
04040 }
04041 
04042 /// \brief Scale down both weights to fit into uint32_t.
04043 static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
04044   uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
04045   uint32_t Scale = (NewMax / UINT32_MAX) + 1;
04046   NewTrue = NewTrue / Scale;
04047   NewFalse = NewFalse / Scale;
04048 }
04049 
04050 /// \brief Some targets prefer to split a conditional branch like:
04051 /// \code
04052 ///   %0 = icmp ne i32 %a, 0
04053 ///   %1 = icmp ne i32 %b, 0
04054 ///   %or.cond = or i1 %0, %1
04055 ///   br i1 %or.cond, label %TrueBB, label %FalseBB
04056 /// \endcode
04057 /// into multiple branch instructions like:
04058 /// \code
04059 ///   bb1:
04060 ///     %0 = icmp ne i32 %a, 0
04061 ///     br i1 %0, label %TrueBB, label %bb2
04062 ///   bb2:
04063 ///     %1 = icmp ne i32 %b, 0
04064 ///     br i1 %1, label %TrueBB, label %FalseBB
04065 /// \endcode
04066 /// This usually allows instruction selection to do even further optimizations
04067 /// and combine the compare with the branch instruction. Currently this is
04068 /// applied for targets which have "cheap" jump instructions.
04069 ///
04070 /// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
04071 ///
04072 bool CodeGenPrepare::splitBranchCondition(Function &F) {
04073   if (!TM || TM->Options.EnableFastISel != true ||
04074       !TLI || TLI->isJumpExpensive())
04075     return false;
04076 
04077   bool MadeChange = false;
04078   for (auto &BB : F) {
04079     // Does this BB end with the following?
04080     //   %cond1 = icmp|fcmp|binary instruction ...
04081     //   %cond2 = icmp|fcmp|binary instruction ...
04082     //   %cond.or = or|and i1 %cond1, cond2
04083     //   br i1 %cond.or label %dest1, label %dest2"
04084     BinaryOperator *LogicOp;
04085     BasicBlock *TBB, *FBB;
04086     if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB)))
04087       continue;
04088 
04089     unsigned Opc;
04090     Value *Cond1, *Cond2;
04091     if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
04092                              m_OneUse(m_Value(Cond2)))))
04093       Opc = Instruction::And;
04094     else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)),
04095                                  m_OneUse(m_Value(Cond2)))))
04096       Opc = Instruction::Or;
04097     else
04098       continue;
04099 
04100     if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) ||
04101         !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp()))   )
04102       continue;
04103 
04104     DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
04105 
04106     // Create a new BB.
04107     auto *InsertBefore = std::next(Function::iterator(BB))
04108         .getNodePtrUnchecked();
04109     auto TmpBB = BasicBlock::Create(BB.getContext(),
04110                                     BB.getName() + ".cond.split",
04111                                     BB.getParent(), InsertBefore);
04112 
04113     // Update original basic block by using the first condition directly by the
04114     // branch instruction and removing the no longer needed and/or instruction.
04115     auto *Br1 = cast<BranchInst>(BB.getTerminator());
04116     Br1->setCondition(Cond1);
04117     LogicOp->eraseFromParent();
04118 
04119     // Depending on the conditon we have to either replace the true or the false
04120     // successor of the original branch instruction.
04121     if (Opc == Instruction::And)
04122       Br1->setSuccessor(0, TmpBB);
04123     else
04124       Br1->setSuccessor(1, TmpBB);
04125 
04126     // Fill in the new basic block.
04127     auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
04128     if (auto *I = dyn_cast<Instruction>(Cond2)) {
04129       I->removeFromParent();
04130       I->insertBefore(Br2);
04131     }
04132 
04133     // Update PHI nodes in both successors. The original BB needs to be
04134     // replaced in one succesor's PHI nodes, because the branch comes now from
04135     // the newly generated BB (NewBB). In the other successor we need to add one
04136     // incoming edge to the PHI nodes, because both branch instructions target
04137     // now the same successor. Depending on the original branch condition
04138     // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
04139     // we perfrom the correct update for the PHI nodes.
04140     // This doesn't change the successor order of the just created branch
04141     // instruction (or any other instruction).
04142     if (Opc == Instruction::Or)
04143       std::swap(TBB, FBB);
04144 
04145     // Replace the old BB with the new BB.
04146     for (auto &I : *TBB) {
04147       PHINode *PN = dyn_cast<PHINode>(&I);
04148       if (!PN)
04149         break;
04150       int i;
04151       while ((i = PN->getBasicBlockIndex(&BB)) >= 0)
04152         PN->setIncomingBlock(i, TmpBB);
04153     }
04154 
04155     // Add another incoming edge form the new BB.
04156     for (auto &I : *FBB) {
04157       PHINode *PN = dyn_cast<PHINode>(&I);
04158       if (!PN)
04159         break;
04160       auto *Val = PN->getIncomingValueForBlock(&BB);
04161       PN->addIncoming(Val, TmpBB);
04162     }
04163 
04164     // Update the branch weights (from SelectionDAGBuilder::
04165     // FindMergedConditions).
04166     if (Opc == Instruction::Or) {
04167       // Codegen X | Y as:
04168       // BB1:
04169       //   jmp_if_X TBB
04170       //   jmp TmpBB
04171       // TmpBB:
04172       //   jmp_if_Y TBB
04173       //   jmp FBB
04174       //
04175 
04176       // We have flexibility in setting Prob for BB1 and Prob for NewBB.
04177       // The requirement is that
04178       //   TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
04179       //     = TrueProb for orignal BB.
04180       // Assuming the orignal weights are A and B, one choice is to set BB1's
04181       // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
04182       // assumes that
04183       //   TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
04184       // Another choice is to assume TrueProb for BB1 equals to TrueProb for
04185       // TmpBB, but the math is more complicated.
04186       uint64_t TrueWeight, FalseWeight;
04187       if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) {
04188         uint64_t NewTrueWeight = TrueWeight;
04189         uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
04190         scaleWeights(NewTrueWeight, NewFalseWeight);
04191         Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
04192                          .createBranchWeights(TrueWeight, FalseWeight));
04193 
04194         NewTrueWeight = TrueWeight;
04195         NewFalseWeight = 2 * FalseWeight;
04196         scaleWeights(NewTrueWeight, NewFalseWeight);
04197         Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
04198                          .createBranchWeights(TrueWeight, FalseWeight));
04199       }
04200     } else {
04201       // Codegen X & Y as:
04202       // BB1:
04203       //   jmp_if_X TmpBB
04204       //   jmp FBB
04205       // TmpBB:
04206       //   jmp_if_Y TBB
04207       //   jmp FBB
04208       //
04209       //  This requires creation of TmpBB after CurBB.
04210 
04211       // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
04212       // The requirement is that
04213       //   FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
04214       //     = FalseProb for orignal BB.
04215       // Assuming the orignal weights are A and B, one choice is to set BB1's
04216       // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
04217       // assumes that
04218       //   FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
04219       uint64_t TrueWeight, FalseWeight;
04220       if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) {
04221         uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
04222         uint64_t NewFalseWeight = FalseWeight;
04223         scaleWeights(NewTrueWeight, NewFalseWeight);
04224         Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
04225                          .createBranchWeights(TrueWeight, FalseWeight));
04226 
04227         NewTrueWeight = 2 * TrueWeight;
04228         NewFalseWeight = FalseWeight;
04229         scaleWeights(NewTrueWeight, NewFalseWeight);
04230         Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
04231                          .createBranchWeights(TrueWeight, FalseWeight));
04232       }
04233     }
04234 
04235     // Request DOM Tree update.
04236     // Note: No point in getting fancy here, since the DT info is never
04237     // available to CodeGenPrepare and the existing update code is broken
04238     // anyways.
04239     ModifiedDT = true;
04240 
04241     MadeChange = true;
04242 
04243     DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
04244           TmpBB->dump());
04245   }
04246   return MadeChange;
04247 }