LLVM API Documentation

CodeGenPrepare.cpp
Go to the documentation of this file.
00001 //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This pass munges the code in the input function to better prepare it for
00011 // SelectionDAG-based code generation. This works around limitations in it's
00012 // basic-block-at-a-time approach. It should eventually be removed.
00013 //
00014 //===----------------------------------------------------------------------===//
00015 
00016 #include "llvm/CodeGen/Passes.h"
00017 #include "llvm/ADT/DenseMap.h"
00018 #include "llvm/ADT/SmallSet.h"
00019 #include "llvm/ADT/Statistic.h"
00020 #include "llvm/Analysis/InstructionSimplify.h"
00021 #include "llvm/IR/CallSite.h"
00022 #include "llvm/IR/Constants.h"
00023 #include "llvm/IR/DataLayout.h"
00024 #include "llvm/IR/DerivedTypes.h"
00025 #include "llvm/IR/Dominators.h"
00026 #include "llvm/IR/Function.h"
00027 #include "llvm/IR/GetElementPtrTypeIterator.h"
00028 #include "llvm/IR/IRBuilder.h"
00029 #include "llvm/IR/InlineAsm.h"
00030 #include "llvm/IR/Instructions.h"
00031 #include "llvm/IR/IntrinsicInst.h"
00032 #include "llvm/IR/PatternMatch.h"
00033 #include "llvm/IR/ValueHandle.h"
00034 #include "llvm/IR/ValueMap.h"
00035 #include "llvm/Pass.h"
00036 #include "llvm/Support/CommandLine.h"
00037 #include "llvm/Support/Debug.h"
00038 #include "llvm/Support/raw_ostream.h"
00039 #include "llvm/Target/TargetLibraryInfo.h"
00040 #include "llvm/Target/TargetLowering.h"
00041 #include "llvm/Target/TargetSubtargetInfo.h"
00042 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
00043 #include "llvm/Transforms/Utils/BuildLibCalls.h"
00044 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
00045 #include "llvm/Transforms/Utils/Local.h"
00046 using namespace llvm;
00047 using namespace llvm::PatternMatch;
00048 
00049 #define DEBUG_TYPE "codegenprepare"
00050 
00051 STATISTIC(NumBlocksElim, "Number of blocks eliminated");
00052 STATISTIC(NumPHIsElim,   "Number of trivial PHIs eliminated");
00053 STATISTIC(NumGEPsElim,   "Number of GEPs converted to casts");
00054 STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
00055                       "sunken Cmps");
00056 STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
00057                        "of sunken Casts");
00058 STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
00059                           "computations were sunk");
00060 STATISTIC(NumExtsMoved,  "Number of [s|z]ext instructions combined with loads");
00061 STATISTIC(NumExtUses,    "Number of uses of [s|z]ext instructions optimized");
00062 STATISTIC(NumRetsDup,    "Number of return instructions duplicated");
00063 STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
00064 STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
00065 STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches");
00066 
00067 static cl::opt<bool> DisableBranchOpts(
00068   "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
00069   cl::desc("Disable branch optimizations in CodeGenPrepare"));
00070 
00071 static cl::opt<bool> DisableSelectToBranch(
00072   "disable-cgp-select2branch", cl::Hidden, cl::init(false),
00073   cl::desc("Disable select to branch conversion."));
00074 
00075 static cl::opt<bool> AddrSinkUsingGEPs(
00076   "addr-sink-using-gep", cl::Hidden, cl::init(false),
00077   cl::desc("Address sinking in CGP using GEPs."));
00078 
00079 static cl::opt<bool> EnableAndCmpSinking(
00080    "enable-andcmp-sinking", cl::Hidden, cl::init(true),
00081    cl::desc("Enable sinkinig and/cmp into branches."));
00082 
00083 namespace {
00084 typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
00085 typedef DenseMap<Instruction *, Type *> InstrToOrigTy;
00086 
00087   class CodeGenPrepare : public FunctionPass {
00088     /// TLI - Keep a pointer of a TargetLowering to consult for determining
00089     /// transformation profitability.
00090     const TargetMachine *TM;
00091     const TargetLowering *TLI;
00092     const TargetLibraryInfo *TLInfo;
00093     DominatorTree *DT;
00094 
00095     /// CurInstIterator - As we scan instructions optimizing them, this is the
00096     /// next instruction to optimize.  Xforms that can invalidate this should
00097     /// update it.
00098     BasicBlock::iterator CurInstIterator;
00099 
00100     /// Keeps track of non-local addresses that have been sunk into a block.
00101     /// This allows us to avoid inserting duplicate code for blocks with
00102     /// multiple load/stores of the same address.
00103     ValueMap<Value*, Value*> SunkAddrs;
00104 
00105     /// Keeps track of all truncates inserted for the current function.
00106     SetOfInstrs InsertedTruncsSet;
00107     /// Keeps track of the type of the related instruction before their
00108     /// promotion for the current function.
00109     InstrToOrigTy PromotedInsts;
00110 
00111     /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
00112     /// be updated.
00113     bool ModifiedDT;
00114 
00115     /// OptSize - True if optimizing for size.
00116     bool OptSize;
00117 
00118   public:
00119     static char ID; // Pass identification, replacement for typeid
00120     explicit CodeGenPrepare(const TargetMachine *TM = nullptr)
00121       : FunctionPass(ID), TM(TM), TLI(nullptr) {
00122         initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
00123       }
00124     bool runOnFunction(Function &F) override;
00125 
00126     const char *getPassName() const override { return "CodeGen Prepare"; }
00127 
00128     void getAnalysisUsage(AnalysisUsage &AU) const override {
00129       AU.addPreserved<DominatorTreeWrapperPass>();
00130       AU.addRequired<TargetLibraryInfo>();
00131     }
00132 
00133   private:
00134     bool EliminateFallThrough(Function &F);
00135     bool EliminateMostlyEmptyBlocks(Function &F);
00136     bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
00137     void EliminateMostlyEmptyBlock(BasicBlock *BB);
00138     bool OptimizeBlock(BasicBlock &BB);
00139     bool OptimizeInst(Instruction *I);
00140     bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
00141     bool OptimizeInlineAsmInst(CallInst *CS);
00142     bool OptimizeCallInst(CallInst *CI);
00143     bool MoveExtToFormExtLoad(Instruction *I);
00144     bool OptimizeExtUses(Instruction *I);
00145     bool OptimizeSelectInst(SelectInst *SI);
00146     bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI);
00147     bool DupRetToEnableTailCallOpts(BasicBlock *BB);
00148     bool PlaceDbgValues(Function &F);
00149     bool sinkAndCmp(Function &F);
00150   };
00151 }
00152 
00153 char CodeGenPrepare::ID = 0;
00154 INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare",
00155                    "Optimize for code generation", false, false)
00156 
00157 FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
00158   return new CodeGenPrepare(TM);
00159 }
00160 
00161 bool CodeGenPrepare::runOnFunction(Function &F) {
00162   if (skipOptnoneFunction(F))
00163     return false;
00164 
00165   bool EverMadeChange = false;
00166   // Clear per function information.
00167   InsertedTruncsSet.clear();
00168   PromotedInsts.clear();
00169 
00170   ModifiedDT = false;
00171   if (TM) TLI = TM->getTargetLowering();
00172   TLInfo = &getAnalysis<TargetLibraryInfo>();
00173   DominatorTreeWrapperPass *DTWP =
00174       getAnalysisIfAvailable<DominatorTreeWrapperPass>();
00175   DT = DTWP ? &DTWP->getDomTree() : nullptr;
00176   OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
00177                                            Attribute::OptimizeForSize);
00178 
00179   /// This optimization identifies DIV instructions that can be
00180   /// profitably bypassed and carried out with a shorter, faster divide.
00181   if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
00182     const DenseMap<unsigned int, unsigned int> &BypassWidths =
00183        TLI->getBypassSlowDivWidths();
00184     for (Function::iterator I = F.begin(); I != F.end(); I++)
00185       EverMadeChange |= bypassSlowDivision(F, I, BypassWidths);
00186   }
00187 
00188   // Eliminate blocks that contain only PHI nodes and an
00189   // unconditional branch.
00190   EverMadeChange |= EliminateMostlyEmptyBlocks(F);
00191 
00192   // llvm.dbg.value is far away from the value then iSel may not be able
00193   // handle it properly. iSel will drop llvm.dbg.value if it can not
00194   // find a node corresponding to the value.
00195   EverMadeChange |= PlaceDbgValues(F);
00196 
00197   // If there is a mask, compare against zero, and branch that can be combined
00198   // into a single target instruction, push the mask and compare into branch
00199   // users. Do this before OptimizeBlock -> OptimizeInst ->
00200   // OptimizeCmpExpression, which perturbs the pattern being searched for.
00201   if (!DisableBranchOpts)
00202     EverMadeChange |= sinkAndCmp(F);
00203 
00204   bool MadeChange = true;
00205   while (MadeChange) {
00206     MadeChange = false;
00207     for (Function::iterator I = F.begin(); I != F.end(); ) {
00208       BasicBlock *BB = I++;
00209       MadeChange |= OptimizeBlock(*BB);
00210     }
00211     EverMadeChange |= MadeChange;
00212   }
00213 
00214   SunkAddrs.clear();
00215 
00216   if (!DisableBranchOpts) {
00217     MadeChange = false;
00218     SmallPtrSet<BasicBlock*, 8> WorkList;
00219     for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
00220       SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
00221       MadeChange |= ConstantFoldTerminator(BB, true);
00222       if (!MadeChange) continue;
00223 
00224       for (SmallVectorImpl<BasicBlock*>::iterator
00225              II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
00226         if (pred_begin(*II) == pred_end(*II))
00227           WorkList.insert(*II);
00228     }
00229 
00230     // Delete the dead blocks and any of their dead successors.
00231     MadeChange |= !WorkList.empty();
00232     while (!WorkList.empty()) {
00233       BasicBlock *BB = *WorkList.begin();
00234       WorkList.erase(BB);
00235       SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
00236 
00237       DeleteDeadBlock(BB);
00238 
00239       for (SmallVectorImpl<BasicBlock*>::iterator
00240              II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
00241         if (pred_begin(*II) == pred_end(*II))
00242           WorkList.insert(*II);
00243     }
00244 
00245     // Merge pairs of basic blocks with unconditional branches, connected by
00246     // a single edge.
00247     if (EverMadeChange || MadeChange)
00248       MadeChange |= EliminateFallThrough(F);
00249 
00250     if (MadeChange)
00251       ModifiedDT = true;
00252     EverMadeChange |= MadeChange;
00253   }
00254 
00255   if (ModifiedDT && DT)
00256     DT->recalculate(F);
00257 
00258   return EverMadeChange;
00259 }
00260 
00261 /// EliminateFallThrough - Merge basic blocks which are connected
00262 /// by a single edge, where one of the basic blocks has a single successor
00263 /// pointing to the other basic block, which has a single predecessor.
00264 bool CodeGenPrepare::EliminateFallThrough(Function &F) {
00265   bool Changed = false;
00266   // Scan all of the blocks in the function, except for the entry block.
00267   for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
00268     BasicBlock *BB = I++;
00269     // If the destination block has a single pred, then this is a trivial
00270     // edge, just collapse it.
00271     BasicBlock *SinglePred = BB->getSinglePredecessor();
00272 
00273     // Don't merge if BB's address is taken.
00274     if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
00275 
00276     BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
00277     if (Term && !Term->isConditional()) {
00278       Changed = true;
00279       DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n");
00280       // Remember if SinglePred was the entry block of the function.
00281       // If so, we will need to move BB back to the entry position.
00282       bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
00283       MergeBasicBlockIntoOnlyPred(BB, this);
00284 
00285       if (isEntry && BB != &BB->getParent()->getEntryBlock())
00286         BB->moveBefore(&BB->getParent()->getEntryBlock());
00287 
00288       // We have erased a block. Update the iterator.
00289       I = BB;
00290     }
00291   }
00292   return Changed;
00293 }
00294 
00295 /// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
00296 /// debug info directives, and an unconditional branch.  Passes before isel
00297 /// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
00298 /// isel.  Start by eliminating these blocks so we can split them the way we
00299 /// want them.
00300 bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
00301   bool MadeChange = false;
00302   // Note that this intentionally skips the entry block.
00303   for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
00304     BasicBlock *BB = I++;
00305 
00306     // If this block doesn't end with an uncond branch, ignore it.
00307     BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
00308     if (!BI || !BI->isUnconditional())
00309       continue;
00310 
00311     // If the instruction before the branch (skipping debug info) isn't a phi
00312     // node, then other stuff is happening here.
00313     BasicBlock::iterator BBI = BI;
00314     if (BBI != BB->begin()) {
00315       --BBI;
00316       while (isa<DbgInfoIntrinsic>(BBI)) {
00317         if (BBI == BB->begin())
00318           break;
00319         --BBI;
00320       }
00321       if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
00322         continue;
00323     }
00324 
00325     // Do not break infinite loops.
00326     BasicBlock *DestBB = BI->getSuccessor(0);
00327     if (DestBB == BB)
00328       continue;
00329 
00330     if (!CanMergeBlocks(BB, DestBB))
00331       continue;
00332 
00333     EliminateMostlyEmptyBlock(BB);
00334     MadeChange = true;
00335   }
00336   return MadeChange;
00337 }
00338 
00339 /// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
00340 /// single uncond branch between them, and BB contains no other non-phi
00341 /// instructions.
00342 bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
00343                                     const BasicBlock *DestBB) const {
00344   // We only want to eliminate blocks whose phi nodes are used by phi nodes in
00345   // the successor.  If there are more complex condition (e.g. preheaders),
00346   // don't mess around with them.
00347   BasicBlock::const_iterator BBI = BB->begin();
00348   while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
00349     for (const User *U : PN->users()) {
00350       const Instruction *UI = cast<Instruction>(U);
00351       if (UI->getParent() != DestBB || !isa<PHINode>(UI))
00352         return false;
00353       // If User is inside DestBB block and it is a PHINode then check
00354       // incoming value. If incoming value is not from BB then this is
00355       // a complex condition (e.g. preheaders) we want to avoid here.
00356       if (UI->getParent() == DestBB) {
00357         if (const PHINode *UPN = dyn_cast<PHINode>(UI))
00358           for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
00359             Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
00360             if (Insn && Insn->getParent() == BB &&
00361                 Insn->getParent() != UPN->getIncomingBlock(I))
00362               return false;
00363           }
00364       }
00365     }
00366   }
00367 
00368   // If BB and DestBB contain any common predecessors, then the phi nodes in BB
00369   // and DestBB may have conflicting incoming values for the block.  If so, we
00370   // can't merge the block.
00371   const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
00372   if (!DestBBPN) return true;  // no conflict.
00373 
00374   // Collect the preds of BB.
00375   SmallPtrSet<const BasicBlock*, 16> BBPreds;
00376   if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
00377     // It is faster to get preds from a PHI than with pred_iterator.
00378     for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
00379       BBPreds.insert(BBPN->getIncomingBlock(i));
00380   } else {
00381     BBPreds.insert(pred_begin(BB), pred_end(BB));
00382   }
00383 
00384   // Walk the preds of DestBB.
00385   for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
00386     BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
00387     if (BBPreds.count(Pred)) {   // Common predecessor?
00388       BBI = DestBB->begin();
00389       while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
00390         const Value *V1 = PN->getIncomingValueForBlock(Pred);
00391         const Value *V2 = PN->getIncomingValueForBlock(BB);
00392 
00393         // If V2 is a phi node in BB, look up what the mapped value will be.
00394         if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
00395           if (V2PN->getParent() == BB)
00396             V2 = V2PN->getIncomingValueForBlock(Pred);
00397 
00398         // If there is a conflict, bail out.
00399         if (V1 != V2) return false;
00400       }
00401     }
00402   }
00403 
00404   return true;
00405 }
00406 
00407 
00408 /// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
00409 /// an unconditional branch in it.
00410 void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
00411   BranchInst *BI = cast<BranchInst>(BB->getTerminator());
00412   BasicBlock *DestBB = BI->getSuccessor(0);
00413 
00414   DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
00415 
00416   // If the destination block has a single pred, then this is a trivial edge,
00417   // just collapse it.
00418   if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
00419     if (SinglePred != DestBB) {
00420       // Remember if SinglePred was the entry block of the function.  If so, we
00421       // will need to move BB back to the entry position.
00422       bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
00423       MergeBasicBlockIntoOnlyPred(DestBB, this);
00424 
00425       if (isEntry && BB != &BB->getParent()->getEntryBlock())
00426         BB->moveBefore(&BB->getParent()->getEntryBlock());
00427 
00428       DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
00429       return;
00430     }
00431   }
00432 
00433   // Otherwise, we have multiple predecessors of BB.  Update the PHIs in DestBB
00434   // to handle the new incoming edges it is about to have.
00435   PHINode *PN;
00436   for (BasicBlock::iterator BBI = DestBB->begin();
00437        (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
00438     // Remove the incoming value for BB, and remember it.
00439     Value *InVal = PN->removeIncomingValue(BB, false);
00440 
00441     // Two options: either the InVal is a phi node defined in BB or it is some
00442     // value that dominates BB.
00443     PHINode *InValPhi = dyn_cast<PHINode>(InVal);
00444     if (InValPhi && InValPhi->getParent() == BB) {
00445       // Add all of the input values of the input PHI as inputs of this phi.
00446       for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
00447         PN->addIncoming(InValPhi->getIncomingValue(i),
00448                         InValPhi->getIncomingBlock(i));
00449     } else {
00450       // Otherwise, add one instance of the dominating value for each edge that
00451       // we will be adding.
00452       if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
00453         for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
00454           PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
00455       } else {
00456         for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
00457           PN->addIncoming(InVal, *PI);
00458       }
00459     }
00460   }
00461 
00462   // The PHIs are now updated, change everything that refers to BB to use
00463   // DestBB and remove BB.
00464   BB->replaceAllUsesWith(DestBB);
00465   if (DT && !ModifiedDT) {
00466     BasicBlock *BBIDom  = DT->getNode(BB)->getIDom()->getBlock();
00467     BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
00468     BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
00469     DT->changeImmediateDominator(DestBB, NewIDom);
00470     DT->eraseNode(BB);
00471   }
00472   BB->eraseFromParent();
00473   ++NumBlocksElim;
00474 
00475   DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
00476 }
00477 
00478 /// SinkCast - Sink the specified cast instruction into its user blocks
00479 static bool SinkCast(CastInst *CI) {
00480   BasicBlock *DefBB = CI->getParent();
00481 
00482   /// InsertedCasts - Only insert a cast in each block once.
00483   DenseMap<BasicBlock*, CastInst*> InsertedCasts;
00484 
00485   bool MadeChange = false;
00486   for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
00487        UI != E; ) {
00488     Use &TheUse = UI.getUse();
00489     Instruction *User = cast<Instruction>(*UI);
00490 
00491     // Figure out which BB this cast is used in.  For PHI's this is the
00492     // appropriate predecessor block.
00493     BasicBlock *UserBB = User->getParent();
00494     if (PHINode *PN = dyn_cast<PHINode>(User)) {
00495       UserBB = PN->getIncomingBlock(TheUse);
00496     }
00497 
00498     // Preincrement use iterator so we don't invalidate it.
00499     ++UI;
00500 
00501     // If this user is in the same block as the cast, don't change the cast.
00502     if (UserBB == DefBB) continue;
00503 
00504     // If we have already inserted a cast into this block, use it.
00505     CastInst *&InsertedCast = InsertedCasts[UserBB];
00506 
00507     if (!InsertedCast) {
00508       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
00509       InsertedCast =
00510         CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
00511                          InsertPt);
00512       MadeChange = true;
00513     }
00514 
00515     // Replace a use of the cast with a use of the new cast.
00516     TheUse = InsertedCast;
00517     ++NumCastUses;
00518   }
00519 
00520   // If we removed all uses, nuke the cast.
00521   if (CI->use_empty()) {
00522     CI->eraseFromParent();
00523     MadeChange = true;
00524   }
00525 
00526   return MadeChange;
00527 }
00528 
00529 /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
00530 /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
00531 /// sink it into user blocks to reduce the number of virtual
00532 /// registers that must be created and coalesced.
00533 ///
00534 /// Return true if any changes are made.
00535 ///
00536 static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
00537   // If this is a noop copy,
00538   EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
00539   EVT DstVT = TLI.getValueType(CI->getType());
00540 
00541   // This is an fp<->int conversion?
00542   if (SrcVT.isInteger() != DstVT.isInteger())
00543     return false;
00544 
00545   // If this is an extension, it will be a zero or sign extension, which
00546   // isn't a noop.
00547   if (SrcVT.bitsLT(DstVT)) return false;
00548 
00549   // If these values will be promoted, find out what they will be promoted
00550   // to.  This helps us consider truncates on PPC as noop copies when they
00551   // are.
00552   if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
00553       TargetLowering::TypePromoteInteger)
00554     SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
00555   if (TLI.getTypeAction(CI->getContext(), DstVT) ==
00556       TargetLowering::TypePromoteInteger)
00557     DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
00558 
00559   // If, after promotion, these are the same types, this is a noop copy.
00560   if (SrcVT != DstVT)
00561     return false;
00562 
00563   return SinkCast(CI);
00564 }
00565 
00566 /// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
00567 /// the number of virtual registers that must be created and coalesced.  This is
00568 /// a clear win except on targets with multiple condition code registers
00569 ///  (PowerPC), where it might lose; some adjustment may be wanted there.
00570 ///
00571 /// Return true if any changes are made.
00572 static bool OptimizeCmpExpression(CmpInst *CI) {
00573   BasicBlock *DefBB = CI->getParent();
00574 
00575   /// InsertedCmp - Only insert a cmp in each block once.
00576   DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
00577 
00578   bool MadeChange = false;
00579   for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
00580        UI != E; ) {
00581     Use &TheUse = UI.getUse();
00582     Instruction *User = cast<Instruction>(*UI);
00583 
00584     // Preincrement use iterator so we don't invalidate it.
00585     ++UI;
00586 
00587     // Don't bother for PHI nodes.
00588     if (isa<PHINode>(User))
00589       continue;
00590 
00591     // Figure out which BB this cmp is used in.
00592     BasicBlock *UserBB = User->getParent();
00593 
00594     // If this user is in the same block as the cmp, don't change the cmp.
00595     if (UserBB == DefBB) continue;
00596 
00597     // If we have already inserted a cmp into this block, use it.
00598     CmpInst *&InsertedCmp = InsertedCmps[UserBB];
00599 
00600     if (!InsertedCmp) {
00601       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
00602       InsertedCmp =
00603         CmpInst::Create(CI->getOpcode(),
00604                         CI->getPredicate(),  CI->getOperand(0),
00605                         CI->getOperand(1), "", InsertPt);
00606       MadeChange = true;
00607     }
00608 
00609     // Replace a use of the cmp with a use of the new cmp.
00610     TheUse = InsertedCmp;
00611     ++NumCmpUses;
00612   }
00613 
00614   // If we removed all uses, nuke the cmp.
00615   if (CI->use_empty())
00616     CI->eraseFromParent();
00617 
00618   return MadeChange;
00619 }
00620 
00621 /// isExtractBitsCandidateUse - Check if the candidates could
00622 /// be combined with shift instruction, which includes:
00623 /// 1. Truncate instruction
00624 /// 2. And instruction and the imm is a mask of the low bits:
00625 /// imm & (imm+1) == 0
00626 static bool isExtractBitsCandidateUse(Instruction *User) {
00627   if (!isa<TruncInst>(User)) {
00628     if (User->getOpcode() != Instruction::And ||
00629         !isa<ConstantInt>(User->getOperand(1)))
00630       return false;
00631 
00632     const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
00633 
00634     if ((Cimm & (Cimm + 1)).getBoolValue())
00635       return false;
00636   }
00637   return true;
00638 }
00639 
00640 /// SinkShiftAndTruncate - sink both shift and truncate instruction
00641 /// to the use of truncate's BB.
00642 static bool
00643 SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
00644                      DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
00645                      const TargetLowering &TLI) {
00646   BasicBlock *UserBB = User->getParent();
00647   DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
00648   TruncInst *TruncI = dyn_cast<TruncInst>(User);
00649   bool MadeChange = false;
00650 
00651   for (Value::user_iterator TruncUI = TruncI->user_begin(),
00652                             TruncE = TruncI->user_end();
00653        TruncUI != TruncE;) {
00654 
00655     Use &TruncTheUse = TruncUI.getUse();
00656     Instruction *TruncUser = cast<Instruction>(*TruncUI);
00657     // Preincrement use iterator so we don't invalidate it.
00658 
00659     ++TruncUI;
00660 
00661     int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
00662     if (!ISDOpcode)
00663       continue;
00664 
00665     // If the use is actually a legal node, there will not be an implicit
00666     // truncate.
00667     if (TLI.isOperationLegalOrCustom(ISDOpcode,
00668                                      EVT::getEVT(TruncUser->getType())))
00669       continue;
00670 
00671     // Don't bother for PHI nodes.
00672     if (isa<PHINode>(TruncUser))
00673       continue;
00674 
00675     BasicBlock *TruncUserBB = TruncUser->getParent();
00676 
00677     if (UserBB == TruncUserBB)
00678       continue;
00679 
00680     BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
00681     CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
00682 
00683     if (!InsertedShift && !InsertedTrunc) {
00684       BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
00685       // Sink the shift
00686       if (ShiftI->getOpcode() == Instruction::AShr)
00687         InsertedShift =
00688             BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
00689       else
00690         InsertedShift =
00691             BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
00692 
00693       // Sink the trunc
00694       BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
00695       TruncInsertPt++;
00696 
00697       InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
00698                                        TruncI->getType(), "", TruncInsertPt);
00699 
00700       MadeChange = true;
00701 
00702       TruncTheUse = InsertedTrunc;
00703     }
00704   }
00705   return MadeChange;
00706 }
00707 
00708 /// OptimizeExtractBits - sink the shift *right* instruction into user blocks if
00709 /// the uses could potentially be combined with this shift instruction and
00710 /// generate BitExtract instruction. It will only be applied if the architecture
00711 /// supports BitExtract instruction. Here is an example:
00712 /// BB1:
00713 ///   %x.extract.shift = lshr i64 %arg1, 32
00714 /// BB2:
00715 ///   %x.extract.trunc = trunc i64 %x.extract.shift to i16
00716 /// ==>
00717 ///
00718 /// BB2:
00719 ///   %x.extract.shift.1 = lshr i64 %arg1, 32
00720 ///   %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
00721 ///
00722 /// CodeGen will recoginze the pattern in BB2 and generate BitExtract
00723 /// instruction.
00724 /// Return true if any changes are made.
00725 static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
00726                                 const TargetLowering &TLI) {
00727   BasicBlock *DefBB = ShiftI->getParent();
00728 
00729   /// Only insert instructions in each block once.
00730   DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
00731 
00732   bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType()));
00733 
00734   bool MadeChange = false;
00735   for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
00736        UI != E;) {
00737     Use &TheUse = UI.getUse();
00738     Instruction *User = cast<Instruction>(*UI);
00739     // Preincrement use iterator so we don't invalidate it.
00740     ++UI;
00741 
00742     // Don't bother for PHI nodes.
00743     if (isa<PHINode>(User))
00744       continue;
00745 
00746     if (!isExtractBitsCandidateUse(User))
00747       continue;
00748 
00749     BasicBlock *UserBB = User->getParent();
00750 
00751     if (UserBB == DefBB) {
00752       // If the shift and truncate instruction are in the same BB. The use of
00753       // the truncate(TruncUse) may still introduce another truncate if not
00754       // legal. In this case, we would like to sink both shift and truncate
00755       // instruction to the BB of TruncUse.
00756       // for example:
00757       // BB1:
00758       // i64 shift.result = lshr i64 opnd, imm
00759       // trunc.result = trunc shift.result to i16
00760       //
00761       // BB2:
00762       //   ----> We will have an implicit truncate here if the architecture does
00763       //   not have i16 compare.
00764       // cmp i16 trunc.result, opnd2
00765       //
00766       if (isa<TruncInst>(User) && shiftIsLegal
00767           // If the type of the truncate is legal, no trucate will be
00768           // introduced in other basic blocks.
00769           && (!TLI.isTypeLegal(TLI.getValueType(User->getType()))))
00770         MadeChange =
00771             SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI);
00772 
00773       continue;
00774     }
00775     // If we have already inserted a shift into this block, use it.
00776     BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
00777 
00778     if (!InsertedShift) {
00779       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
00780 
00781       if (ShiftI->getOpcode() == Instruction::AShr)
00782         InsertedShift =
00783             BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
00784       else
00785         InsertedShift =
00786             BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
00787 
00788       MadeChange = true;
00789     }
00790 
00791     // Replace a use of the shift with a use of the new shift.
00792     TheUse = InsertedShift;
00793   }
00794 
00795   // If we removed all uses, nuke the shift.
00796   if (ShiftI->use_empty())
00797     ShiftI->eraseFromParent();
00798 
00799   return MadeChange;
00800 }
00801 
00802 namespace {
00803 class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
00804 protected:
00805   void replaceCall(Value *With) override {
00806     CI->replaceAllUsesWith(With);
00807     CI->eraseFromParent();
00808   }
00809   bool isFoldable(unsigned SizeCIOp, unsigned, bool) const override {
00810       if (ConstantInt *SizeCI =
00811                              dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp)))
00812         return SizeCI->isAllOnesValue();
00813     return false;
00814   }
00815 };
00816 } // end anonymous namespace
00817 
00818 bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
00819   BasicBlock *BB = CI->getParent();
00820 
00821   // Lower inline assembly if we can.
00822   // If we found an inline asm expession, and if the target knows how to
00823   // lower it to normal LLVM code, do so now.
00824   if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
00825     if (TLI->ExpandInlineAsm(CI)) {
00826       // Avoid invalidating the iterator.
00827       CurInstIterator = BB->begin();
00828       // Avoid processing instructions out of order, which could cause
00829       // reuse before a value is defined.
00830       SunkAddrs.clear();
00831       return true;
00832     }
00833     // Sink address computing for memory operands into the block.
00834     if (OptimizeInlineAsmInst(CI))
00835       return true;
00836   }
00837 
00838   // Lower all uses of llvm.objectsize.*
00839   IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
00840   if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
00841     bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
00842     Type *ReturnTy = CI->getType();
00843     Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
00844 
00845     // Substituting this can cause recursive simplifications, which can
00846     // invalidate our iterator.  Use a WeakVH to hold onto it in case this
00847     // happens.
00848     WeakVH IterHandle(CurInstIterator);
00849 
00850     replaceAndRecursivelySimplify(CI, RetVal,
00851                                   TLI ? TLI->getDataLayout() : nullptr,
00852                                   TLInfo, ModifiedDT ? nullptr : DT);
00853 
00854     // If the iterator instruction was recursively deleted, start over at the
00855     // start of the block.
00856     if (IterHandle != CurInstIterator) {
00857       CurInstIterator = BB->begin();
00858       SunkAddrs.clear();
00859     }
00860     return true;
00861   }
00862 
00863   if (II && TLI) {
00864     SmallVector<Value*, 2> PtrOps;
00865     Type *AccessTy;
00866     if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
00867       while (!PtrOps.empty())
00868         if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
00869           return true;
00870   }
00871 
00872   // From here on out we're working with named functions.
00873   if (!CI->getCalledFunction()) return false;
00874 
00875   // We'll need DataLayout from here on out.
00876   const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr;
00877   if (!TD) return false;
00878 
00879   // Lower all default uses of _chk calls.  This is very similar
00880   // to what InstCombineCalls does, but here we are only lowering calls
00881   // that have the default "don't know" as the objectsize.  Anything else
00882   // should be left alone.
00883   CodeGenPrepareFortifiedLibCalls Simplifier;
00884   return Simplifier.fold(CI, TD, TLInfo);
00885 }
00886 
00887 /// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
00888 /// instructions to the predecessor to enable tail call optimizations. The
00889 /// case it is currently looking for is:
00890 /// @code
00891 /// bb0:
00892 ///   %tmp0 = tail call i32 @f0()
00893 ///   br label %return
00894 /// bb1:
00895 ///   %tmp1 = tail call i32 @f1()
00896 ///   br label %return
00897 /// bb2:
00898 ///   %tmp2 = tail call i32 @f2()
00899 ///   br label %return
00900 /// return:
00901 ///   %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
00902 ///   ret i32 %retval
00903 /// @endcode
00904 ///
00905 /// =>
00906 ///
00907 /// @code
00908 /// bb0:
00909 ///   %tmp0 = tail call i32 @f0()
00910 ///   ret i32 %tmp0
00911 /// bb1:
00912 ///   %tmp1 = tail call i32 @f1()
00913 ///   ret i32 %tmp1
00914 /// bb2:
00915 ///   %tmp2 = tail call i32 @f2()
00916 ///   ret i32 %tmp2
00917 /// @endcode
00918 bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
00919   if (!TLI)
00920     return false;
00921 
00922   ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
00923   if (!RI)
00924     return false;
00925 
00926   PHINode *PN = nullptr;
00927   BitCastInst *BCI = nullptr;
00928   Value *V = RI->getReturnValue();
00929   if (V) {
00930     BCI = dyn_cast<BitCastInst>(V);
00931     if (BCI)
00932       V = BCI->getOperand(0);
00933 
00934     PN = dyn_cast<PHINode>(V);
00935     if (!PN)
00936       return false;
00937   }
00938 
00939   if (PN && PN->getParent() != BB)
00940     return false;
00941 
00942   // It's not safe to eliminate the sign / zero extension of the return value.
00943   // See llvm::isInTailCallPosition().
00944   const Function *F = BB->getParent();
00945   AttributeSet CallerAttrs = F->getAttributes();
00946   if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
00947       CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
00948     return false;
00949 
00950   // Make sure there are no instructions between the PHI and return, or that the
00951   // return is the first instruction in the block.
00952   if (PN) {
00953     BasicBlock::iterator BI = BB->begin();
00954     do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
00955     if (&*BI == BCI)
00956       // Also skip over the bitcast.
00957       ++BI;
00958     if (&*BI != RI)
00959       return false;
00960   } else {
00961     BasicBlock::iterator BI = BB->begin();
00962     while (isa<DbgInfoIntrinsic>(BI)) ++BI;
00963     if (&*BI != RI)
00964       return false;
00965   }
00966 
00967   /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
00968   /// call.
00969   SmallVector<CallInst*, 4> TailCalls;
00970   if (PN) {
00971     for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
00972       CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
00973       // Make sure the phi value is indeed produced by the tail call.
00974       if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
00975           TLI->mayBeEmittedAsTailCall(CI))
00976         TailCalls.push_back(CI);
00977     }
00978   } else {
00979     SmallPtrSet<BasicBlock*, 4> VisitedBBs;
00980     for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
00981       if (!VisitedBBs.insert(*PI))
00982         continue;
00983 
00984       BasicBlock::InstListType &InstList = (*PI)->getInstList();
00985       BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
00986       BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
00987       do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
00988       if (RI == RE)
00989         continue;
00990 
00991       CallInst *CI = dyn_cast<CallInst>(&*RI);
00992       if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
00993         TailCalls.push_back(CI);
00994     }
00995   }
00996 
00997   bool Changed = false;
00998   for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
00999     CallInst *CI = TailCalls[i];
01000     CallSite CS(CI);
01001 
01002     // Conservatively require the attributes of the call to match those of the
01003     // return. Ignore noalias because it doesn't affect the call sequence.
01004     AttributeSet CalleeAttrs = CS.getAttributes();
01005     if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
01006           removeAttribute(Attribute::NoAlias) !=
01007         AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
01008           removeAttribute(Attribute::NoAlias))
01009       continue;
01010 
01011     // Make sure the call instruction is followed by an unconditional branch to
01012     // the return block.
01013     BasicBlock *CallBB = CI->getParent();
01014     BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
01015     if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
01016       continue;
01017 
01018     // Duplicate the return into CallBB.
01019     (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
01020     ModifiedDT = Changed = true;
01021     ++NumRetsDup;
01022   }
01023 
01024   // If we eliminated all predecessors of the block, delete the block now.
01025   if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
01026     BB->eraseFromParent();
01027 
01028   return Changed;
01029 }
01030 
01031 //===----------------------------------------------------------------------===//
01032 // Memory Optimization
01033 //===----------------------------------------------------------------------===//
01034 
01035 namespace {
01036 
01037 /// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
01038 /// which holds actual Value*'s for register values.
01039 struct ExtAddrMode : public TargetLowering::AddrMode {
01040   Value *BaseReg;
01041   Value *ScaledReg;
01042   ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {}
01043   void print(raw_ostream &OS) const;
01044   void dump() const;
01045 
01046   bool operator==(const ExtAddrMode& O) const {
01047     return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
01048            (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
01049            (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
01050   }
01051 };
01052 
01053 #ifndef NDEBUG
01054 static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
01055   AM.print(OS);
01056   return OS;
01057 }
01058 #endif
01059 
01060 void ExtAddrMode::print(raw_ostream &OS) const {
01061   bool NeedPlus = false;
01062   OS << "[";
01063   if (BaseGV) {
01064     OS << (NeedPlus ? " + " : "")
01065        << "GV:";
01066     BaseGV->printAsOperand(OS, /*PrintType=*/false);
01067     NeedPlus = true;
01068   }
01069 
01070   if (BaseOffs) {
01071     OS << (NeedPlus ? " + " : "")
01072        << BaseOffs;
01073     NeedPlus = true;
01074   }
01075 
01076   if (BaseReg) {
01077     OS << (NeedPlus ? " + " : "")
01078        << "Base:";
01079     BaseReg->printAsOperand(OS, /*PrintType=*/false);
01080     NeedPlus = true;
01081   }
01082   if (Scale) {
01083     OS << (NeedPlus ? " + " : "")
01084        << Scale << "*";
01085     ScaledReg->printAsOperand(OS, /*PrintType=*/false);
01086   }
01087 
01088   OS << ']';
01089 }
01090 
01091 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
01092 void ExtAddrMode::dump() const {
01093   print(dbgs());
01094   dbgs() << '\n';
01095 }
01096 #endif
01097 
01098 /// \brief This class provides transaction based operation on the IR.
01099 /// Every change made through this class is recorded in the internal state and
01100 /// can be undone (rollback) until commit is called.
01101 class TypePromotionTransaction {
01102 
01103   /// \brief This represents the common interface of the individual transaction.
01104   /// Each class implements the logic for doing one specific modification on
01105   /// the IR via the TypePromotionTransaction.
01106   class TypePromotionAction {
01107   protected:
01108     /// The Instruction modified.
01109     Instruction *Inst;
01110 
01111   public:
01112     /// \brief Constructor of the action.
01113     /// The constructor performs the related action on the IR.
01114     TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
01115 
01116     virtual ~TypePromotionAction() {}
01117 
01118     /// \brief Undo the modification done by this action.
01119     /// When this method is called, the IR must be in the same state as it was
01120     /// before this action was applied.
01121     /// \pre Undoing the action works if and only if the IR is in the exact same
01122     /// state as it was directly after this action was applied.
01123     virtual void undo() = 0;
01124 
01125     /// \brief Advocate every change made by this action.
01126     /// When the results on the IR of the action are to be kept, it is important
01127     /// to call this function, otherwise hidden information may be kept forever.
01128     virtual void commit() {
01129       // Nothing to be done, this action is not doing anything.
01130     }
01131   };
01132 
01133   /// \brief Utility to remember the position of an instruction.
01134   class InsertionHandler {
01135     /// Position of an instruction.
01136     /// Either an instruction:
01137     /// - Is the first in a basic block: BB is used.
01138     /// - Has a previous instructon: PrevInst is used.
01139     union {
01140       Instruction *PrevInst;
01141       BasicBlock *BB;
01142     } Point;
01143     /// Remember whether or not the instruction had a previous instruction.
01144     bool HasPrevInstruction;
01145 
01146   public:
01147     /// \brief Record the position of \p Inst.
01148     InsertionHandler(Instruction *Inst) {
01149       BasicBlock::iterator It = Inst;
01150       HasPrevInstruction = (It != (Inst->getParent()->begin()));
01151       if (HasPrevInstruction)
01152         Point.PrevInst = --It;
01153       else
01154         Point.BB = Inst->getParent();
01155     }
01156 
01157     /// \brief Insert \p Inst at the recorded position.
01158     void insert(Instruction *Inst) {
01159       if (HasPrevInstruction) {
01160         if (Inst->getParent())
01161           Inst->removeFromParent();
01162         Inst->insertAfter(Point.PrevInst);
01163       } else {
01164         Instruction *Position = Point.BB->getFirstInsertionPt();
01165         if (Inst->getParent())
01166           Inst->moveBefore(Position);
01167         else
01168           Inst->insertBefore(Position);
01169       }
01170     }
01171   };
01172 
01173   /// \brief Move an instruction before another.
01174   class InstructionMoveBefore : public TypePromotionAction {
01175     /// Original position of the instruction.
01176     InsertionHandler Position;
01177 
01178   public:
01179     /// \brief Move \p Inst before \p Before.
01180     InstructionMoveBefore(Instruction *Inst, Instruction *Before)
01181         : TypePromotionAction(Inst), Position(Inst) {
01182       DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n");
01183       Inst->moveBefore(Before);
01184     }
01185 
01186     /// \brief Move the instruction back to its original position.
01187     void undo() override {
01188       DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
01189       Position.insert(Inst);
01190     }
01191   };
01192 
01193   /// \brief Set the operand of an instruction with a new value.
01194   class OperandSetter : public TypePromotionAction {
01195     /// Original operand of the instruction.
01196     Value *Origin;
01197     /// Index of the modified instruction.
01198     unsigned Idx;
01199 
01200   public:
01201     /// \brief Set \p Idx operand of \p Inst with \p NewVal.
01202     OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
01203         : TypePromotionAction(Inst), Idx(Idx) {
01204       DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
01205                    << "for:" << *Inst << "\n"
01206                    << "with:" << *NewVal << "\n");
01207       Origin = Inst->getOperand(Idx);
01208       Inst->setOperand(Idx, NewVal);
01209     }
01210 
01211     /// \brief Restore the original value of the instruction.
01212     void undo() override {
01213       DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
01214                    << "for: " << *Inst << "\n"
01215                    << "with: " << *Origin << "\n");
01216       Inst->setOperand(Idx, Origin);
01217     }
01218   };
01219 
01220   /// \brief Hide the operands of an instruction.
01221   /// Do as if this instruction was not using any of its operands.
01222   class OperandsHider : public TypePromotionAction {
01223     /// The list of original operands.
01224     SmallVector<Value *, 4> OriginalValues;
01225 
01226   public:
01227     /// \brief Remove \p Inst from the uses of the operands of \p Inst.
01228     OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
01229       DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
01230       unsigned NumOpnds = Inst->getNumOperands();
01231       OriginalValues.reserve(NumOpnds);
01232       for (unsigned It = 0; It < NumOpnds; ++It) {
01233         // Save the current operand.
01234         Value *Val = Inst->getOperand(It);
01235         OriginalValues.push_back(Val);
01236         // Set a dummy one.
01237         // We could use OperandSetter here, but that would implied an overhead
01238         // that we are not willing to pay.
01239         Inst->setOperand(It, UndefValue::get(Val->getType()));
01240       }
01241     }
01242 
01243     /// \brief Restore the original list of uses.
01244     void undo() override {
01245       DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
01246       for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
01247         Inst->setOperand(It, OriginalValues[It]);
01248     }
01249   };
01250 
01251   /// \brief Build a truncate instruction.
01252   class TruncBuilder : public TypePromotionAction {
01253   public:
01254     /// \brief Build a truncate instruction of \p Opnd producing a \p Ty
01255     /// result.
01256     /// trunc Opnd to Ty.
01257     TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
01258       IRBuilder<> Builder(Opnd);
01259       Inst = cast<Instruction>(Builder.CreateTrunc(Opnd, Ty, "promoted"));
01260       DEBUG(dbgs() << "Do: TruncBuilder: " << *Inst << "\n");
01261     }
01262 
01263     /// \brief Get the built instruction.
01264     Instruction *getBuiltInstruction() { return Inst; }
01265 
01266     /// \brief Remove the built instruction.
01267     void undo() override {
01268       DEBUG(dbgs() << "Undo: TruncBuilder: " << *Inst << "\n");
01269       Inst->eraseFromParent();
01270     }
01271   };
01272 
01273   /// \brief Build a sign extension instruction.
01274   class SExtBuilder : public TypePromotionAction {
01275   public:
01276     /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty
01277     /// result.
01278     /// sext Opnd to Ty.
01279     SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
01280         : TypePromotionAction(Inst) {
01281       IRBuilder<> Builder(InsertPt);
01282       Inst = cast<Instruction>(Builder.CreateSExt(Opnd, Ty, "promoted"));
01283       DEBUG(dbgs() << "Do: SExtBuilder: " << *Inst << "\n");
01284     }
01285 
01286     /// \brief Get the built instruction.
01287     Instruction *getBuiltInstruction() { return Inst; }
01288 
01289     /// \brief Remove the built instruction.
01290     void undo() override {
01291       DEBUG(dbgs() << "Undo: SExtBuilder: " << *Inst << "\n");
01292       Inst->eraseFromParent();
01293     }
01294   };
01295 
01296   /// \brief Mutate an instruction to another type.
01297   class TypeMutator : public TypePromotionAction {
01298     /// Record the original type.
01299     Type *OrigTy;
01300 
01301   public:
01302     /// \brief Mutate the type of \p Inst into \p NewTy.
01303     TypeMutator(Instruction *Inst, Type *NewTy)
01304         : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
01305       DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
01306                    << "\n");
01307       Inst->mutateType(NewTy);
01308     }
01309 
01310     /// \brief Mutate the instruction back to its original type.
01311     void undo() override {
01312       DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
01313                    << "\n");
01314       Inst->mutateType(OrigTy);
01315     }
01316   };
01317 
01318   /// \brief Replace the uses of an instruction by another instruction.
01319   class UsesReplacer : public TypePromotionAction {
01320     /// Helper structure to keep track of the replaced uses.
01321     struct InstructionAndIdx {
01322       /// The instruction using the instruction.
01323       Instruction *Inst;
01324       /// The index where this instruction is used for Inst.
01325       unsigned Idx;
01326       InstructionAndIdx(Instruction *Inst, unsigned Idx)
01327           : Inst(Inst), Idx(Idx) {}
01328     };
01329 
01330     /// Keep track of the original uses (pair Instruction, Index).
01331     SmallVector<InstructionAndIdx, 4> OriginalUses;
01332     typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator;
01333 
01334   public:
01335     /// \brief Replace all the use of \p Inst by \p New.
01336     UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {
01337       DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
01338                    << "\n");
01339       // Record the original uses.
01340       for (Use &U : Inst->uses()) {
01341         Instruction *UserI = cast<Instruction>(U.getUser());
01342         OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
01343       }
01344       // Now, we can replace the uses.
01345       Inst->replaceAllUsesWith(New);
01346     }
01347 
01348     /// \brief Reassign the original uses of Inst to Inst.
01349     void undo() override {
01350       DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
01351       for (use_iterator UseIt = OriginalUses.begin(),
01352                         EndIt = OriginalUses.end();
01353            UseIt != EndIt; ++UseIt) {
01354         UseIt->Inst->setOperand(UseIt->Idx, Inst);
01355       }
01356     }
01357   };
01358 
01359   /// \brief Remove an instruction from the IR.
01360   class InstructionRemover : public TypePromotionAction {
01361     /// Original position of the instruction.
01362     InsertionHandler Inserter;
01363     /// Helper structure to hide all the link to the instruction. In other
01364     /// words, this helps to do as if the instruction was removed.
01365     OperandsHider Hider;
01366     /// Keep track of the uses replaced, if any.
01367     UsesReplacer *Replacer;
01368 
01369   public:
01370     /// \brief Remove all reference of \p Inst and optinally replace all its
01371     /// uses with New.
01372     /// \pre If !Inst->use_empty(), then New != nullptr
01373     InstructionRemover(Instruction *Inst, Value *New = nullptr)
01374         : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
01375           Replacer(nullptr) {
01376       if (New)
01377         Replacer = new UsesReplacer(Inst, New);
01378       DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
01379       Inst->removeFromParent();
01380     }
01381 
01382     ~InstructionRemover() { delete Replacer; }
01383 
01384     /// \brief Really remove the instruction.
01385     void commit() override { delete Inst; }
01386 
01387     /// \brief Resurrect the instruction and reassign it to the proper uses if
01388     /// new value was provided when build this action.
01389     void undo() override {
01390       DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
01391       Inserter.insert(Inst);
01392       if (Replacer)
01393         Replacer->undo();
01394       Hider.undo();
01395     }
01396   };
01397 
01398 public:
01399   /// Restoration point.
01400   /// The restoration point is a pointer to an action instead of an iterator
01401   /// because the iterator may be invalidated but not the pointer.
01402   typedef const TypePromotionAction *ConstRestorationPt;
01403   /// Advocate every changes made in that transaction.
01404   void commit();
01405   /// Undo all the changes made after the given point.
01406   void rollback(ConstRestorationPt Point);
01407   /// Get the current restoration point.
01408   ConstRestorationPt getRestorationPoint() const;
01409 
01410   /// \name API for IR modification with state keeping to support rollback.
01411   /// @{
01412   /// Same as Instruction::setOperand.
01413   void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
01414   /// Same as Instruction::eraseFromParent.
01415   void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
01416   /// Same as Value::replaceAllUsesWith.
01417   void replaceAllUsesWith(Instruction *Inst, Value *New);
01418   /// Same as Value::mutateType.
01419   void mutateType(Instruction *Inst, Type *NewTy);
01420   /// Same as IRBuilder::createTrunc.
01421   Instruction *createTrunc(Instruction *Opnd, Type *Ty);
01422   /// Same as IRBuilder::createSExt.
01423   Instruction *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
01424   /// Same as Instruction::moveBefore.
01425   void moveBefore(Instruction *Inst, Instruction *Before);
01426   /// @}
01427 
01428 private:
01429   /// The ordered list of actions made so far.
01430   SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
01431   typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt;
01432 };
01433 
01434 void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
01435                                           Value *NewVal) {
01436   Actions.push_back(
01437       make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal));
01438 }
01439 
01440 void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
01441                                                 Value *NewVal) {
01442   Actions.push_back(
01443       make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal));
01444 }
01445 
01446 void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
01447                                                   Value *New) {
01448   Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
01449 }
01450 
01451 void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
01452   Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
01453 }
01454 
01455 Instruction *TypePromotionTransaction::createTrunc(Instruction *Opnd,
01456                                                    Type *Ty) {
01457   std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
01458   Instruction *I = Ptr->getBuiltInstruction();
01459   Actions.push_back(std::move(Ptr));
01460   return I;
01461 }
01462 
01463 Instruction *TypePromotionTransaction::createSExt(Instruction *Inst,
01464                                                   Value *Opnd, Type *Ty) {
01465   std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
01466   Instruction *I = Ptr->getBuiltInstruction();
01467   Actions.push_back(std::move(Ptr));
01468   return I;
01469 }
01470 
01471 void TypePromotionTransaction::moveBefore(Instruction *Inst,
01472                                           Instruction *Before) {
01473   Actions.push_back(
01474       make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before));
01475 }
01476 
01477 TypePromotionTransaction::ConstRestorationPt
01478 TypePromotionTransaction::getRestorationPoint() const {
01479   return !Actions.empty() ? Actions.back().get() : nullptr;
01480 }
01481 
01482 void TypePromotionTransaction::commit() {
01483   for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
01484        ++It)
01485     (*It)->commit();
01486   Actions.clear();
01487 }
01488 
01489 void TypePromotionTransaction::rollback(
01490     TypePromotionTransaction::ConstRestorationPt Point) {
01491   while (!Actions.empty() && Point != Actions.back().get()) {
01492     std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
01493     Curr->undo();
01494   }
01495 }
01496 
01497 /// \brief A helper class for matching addressing modes.
01498 ///
01499 /// This encapsulates the logic for matching the target-legal addressing modes.
01500 class AddressingModeMatcher {
01501   SmallVectorImpl<Instruction*> &AddrModeInsts;
01502   const TargetLowering &TLI;
01503 
01504   /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
01505   /// the memory instruction that we're computing this address for.
01506   Type *AccessTy;
01507   Instruction *MemoryInst;
01508 
01509   /// AddrMode - This is the addressing mode that we're building up.  This is
01510   /// part of the return value of this addressing mode matching stuff.
01511   ExtAddrMode &AddrMode;
01512 
01513   /// The truncate instruction inserted by other CodeGenPrepare optimizations.
01514   const SetOfInstrs &InsertedTruncs;
01515   /// A map from the instructions to their type before promotion.
01516   InstrToOrigTy &PromotedInsts;
01517   /// The ongoing transaction where every action should be registered.
01518   TypePromotionTransaction &TPT;
01519 
01520   /// IgnoreProfitability - This is set to true when we should not do
01521   /// profitability checks.  When true, IsProfitableToFoldIntoAddressingMode
01522   /// always returns true.
01523   bool IgnoreProfitability;
01524 
01525   AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
01526                         const TargetLowering &T, Type *AT,
01527                         Instruction *MI, ExtAddrMode &AM,
01528                         const SetOfInstrs &InsertedTruncs,
01529                         InstrToOrigTy &PromotedInsts,
01530                         TypePromotionTransaction &TPT)
01531       : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM),
01532         InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) {
01533     IgnoreProfitability = false;
01534   }
01535 public:
01536 
01537   /// Match - Find the maximal addressing mode that a load/store of V can fold,
01538   /// give an access type of AccessTy.  This returns a list of involved
01539   /// instructions in AddrModeInsts.
01540   /// \p InsertedTruncs The truncate instruction inserted by other
01541   /// CodeGenPrepare
01542   /// optimizations.
01543   /// \p PromotedInsts maps the instructions to their type before promotion.
01544   /// \p The ongoing transaction where every action should be registered.
01545   static ExtAddrMode Match(Value *V, Type *AccessTy,
01546                            Instruction *MemoryInst,
01547                            SmallVectorImpl<Instruction*> &AddrModeInsts,
01548                            const TargetLowering &TLI,
01549                            const SetOfInstrs &InsertedTruncs,
01550                            InstrToOrigTy &PromotedInsts,
01551                            TypePromotionTransaction &TPT) {
01552     ExtAddrMode Result;
01553 
01554     bool Success = AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
01555                                          MemoryInst, Result, InsertedTruncs,
01556                                          PromotedInsts, TPT).MatchAddr(V, 0);
01557     (void)Success; assert(Success && "Couldn't select *anything*?");
01558     return Result;
01559   }
01560 private:
01561   bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
01562   bool MatchAddr(Value *V, unsigned Depth);
01563   bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth,
01564                           bool *MovedAway = nullptr);
01565   bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
01566                                             ExtAddrMode &AMBefore,
01567                                             ExtAddrMode &AMAfter);
01568   bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
01569   bool IsPromotionProfitable(unsigned MatchedSize, unsigned SizeWithPromotion,
01570                              Value *PromotedOperand) const;
01571 };
01572 
01573 /// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
01574 /// Return true and update AddrMode if this addr mode is legal for the target,
01575 /// false if not.
01576 bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
01577                                              unsigned Depth) {
01578   // If Scale is 1, then this is the same as adding ScaleReg to the addressing
01579   // mode.  Just process that directly.
01580   if (Scale == 1)
01581     return MatchAddr(ScaleReg, Depth);
01582 
01583   // If the scale is 0, it takes nothing to add this.
01584   if (Scale == 0)
01585     return true;
01586 
01587   // If we already have a scale of this value, we can add to it, otherwise, we
01588   // need an available scale field.
01589   if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
01590     return false;
01591 
01592   ExtAddrMode TestAddrMode = AddrMode;
01593 
01594   // Add scale to turn X*4+X*3 -> X*7.  This could also do things like
01595   // [A+B + A*7] -> [B+A*8].
01596   TestAddrMode.Scale += Scale;
01597   TestAddrMode.ScaledReg = ScaleReg;
01598 
01599   // If the new address isn't legal, bail out.
01600   if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
01601     return false;
01602 
01603   // It was legal, so commit it.
01604   AddrMode = TestAddrMode;
01605 
01606   // Okay, we decided that we can add ScaleReg+Scale to AddrMode.  Check now
01607   // to see if ScaleReg is actually X+C.  If so, we can turn this into adding
01608   // X*Scale + C*Scale to addr mode.
01609   ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
01610   if (isa<Instruction>(ScaleReg) &&  // not a constant expr.
01611       match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
01612     TestAddrMode.ScaledReg = AddLHS;
01613     TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
01614 
01615     // If this addressing mode is legal, commit it and remember that we folded
01616     // this instruction.
01617     if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
01618       AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
01619       AddrMode = TestAddrMode;
01620       return true;
01621     }
01622   }
01623 
01624   // Otherwise, not (x+c)*scale, just return what we have.
01625   return true;
01626 }
01627 
01628 /// MightBeFoldableInst - This is a little filter, which returns true if an
01629 /// addressing computation involving I might be folded into a load/store
01630 /// accessing it.  This doesn't need to be perfect, but needs to accept at least
01631 /// the set of instructions that MatchOperationAddr can.
01632 static bool MightBeFoldableInst(Instruction *I) {
01633   switch (I->getOpcode()) {
01634   case Instruction::BitCast:
01635   case Instruction::AddrSpaceCast:
01636     // Don't touch identity bitcasts.
01637     if (I->getType() == I->getOperand(0)->getType())
01638       return false;
01639     return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
01640   case Instruction::PtrToInt:
01641     // PtrToInt is always a noop, as we know that the int type is pointer sized.
01642     return true;
01643   case Instruction::IntToPtr:
01644     // We know the input is intptr_t, so this is foldable.
01645     return true;
01646   case Instruction::Add:
01647     return true;
01648   case Instruction::Mul:
01649   case Instruction::Shl:
01650     // Can only handle X*C and X << C.
01651     return isa<ConstantInt>(I->getOperand(1));
01652   case Instruction::GetElementPtr:
01653     return true;
01654   default:
01655     return false;
01656   }
01657 }
01658 
01659 /// \brief Hepler class to perform type promotion.
01660 class TypePromotionHelper {
01661   /// \brief Utility function to check whether or not a sign extension of
01662   /// \p Inst with \p ConsideredSExtType can be moved through \p Inst by either
01663   /// using the operands of \p Inst or promoting \p Inst.
01664   /// In other words, check if:
01665   /// sext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredSExtType.
01666   /// #1 Promotion applies:
01667   /// ConsideredSExtType Inst (sext opnd1 to ConsideredSExtType, ...).
01668   /// #2 Operand reuses:
01669   /// sext opnd1 to ConsideredSExtType.
01670   /// \p PromotedInsts maps the instructions to their type before promotion.
01671   static bool canGetThrough(const Instruction *Inst, Type *ConsideredSExtType,
01672                             const InstrToOrigTy &PromotedInsts);
01673 
01674   /// \brief Utility function to determine if \p OpIdx should be promoted when
01675   /// promoting \p Inst.
01676   static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) {
01677     if (isa<SelectInst>(Inst) && OpIdx == 0)
01678       return false;
01679     return true;
01680   }
01681 
01682   /// \brief Utility function to promote the operand of \p SExt when this
01683   /// operand is a promotable trunc or sext.
01684   /// \p PromotedInsts maps the instructions to their type before promotion.
01685   /// \p CreatedInsts[out] contains how many non-free instructions have been
01686   /// created to promote the operand of SExt.
01687   /// Should never be called directly.
01688   /// \return The promoted value which is used instead of SExt.
01689   static Value *promoteOperandForTruncAndSExt(Instruction *SExt,
01690                                               TypePromotionTransaction &TPT,
01691                                               InstrToOrigTy &PromotedInsts,
01692                                               unsigned &CreatedInsts);
01693 
01694   /// \brief Utility function to promote the operand of \p SExt when this
01695   /// operand is promotable and is not a supported trunc or sext.
01696   /// \p PromotedInsts maps the instructions to their type before promotion.
01697   /// \p CreatedInsts[out] contains how many non-free instructions have been
01698   /// created to promote the operand of SExt.
01699   /// Should never be called directly.
01700   /// \return The promoted value which is used instead of SExt.
01701   static Value *promoteOperandForOther(Instruction *SExt,
01702                                        TypePromotionTransaction &TPT,
01703                                        InstrToOrigTy &PromotedInsts,
01704                                        unsigned &CreatedInsts);
01705 
01706 public:
01707   /// Type for the utility function that promotes the operand of SExt.
01708   typedef Value *(*Action)(Instruction *SExt, TypePromotionTransaction &TPT,
01709                            InstrToOrigTy &PromotedInsts,
01710                            unsigned &CreatedInsts);
01711   /// \brief Given a sign extend instruction \p SExt, return the approriate
01712   /// action to promote the operand of \p SExt instead of using SExt.
01713   /// \return NULL if no promotable action is possible with the current
01714   /// sign extension.
01715   /// \p InsertedTruncs keeps track of all the truncate instructions inserted by
01716   /// the others CodeGenPrepare optimizations. This information is important
01717   /// because we do not want to promote these instructions as CodeGenPrepare
01718   /// will reinsert them later. Thus creating an infinite loop: create/remove.
01719   /// \p PromotedInsts maps the instructions to their type before promotion.
01720   static Action getAction(Instruction *SExt, const SetOfInstrs &InsertedTruncs,
01721                           const TargetLowering &TLI,
01722                           const InstrToOrigTy &PromotedInsts);
01723 };
01724 
01725 bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
01726                                         Type *ConsideredSExtType,
01727                                         const InstrToOrigTy &PromotedInsts) {
01728   // We can always get through sext.
01729   if (isa<SExtInst>(Inst))
01730     return true;
01731 
01732   // We can get through binary operator, if it is legal. In other words, the
01733   // binary operator must have a nuw or nsw flag.
01734   const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
01735   if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
01736       (BinOp->hasNoUnsignedWrap() || BinOp->hasNoSignedWrap()))
01737     return true;
01738 
01739   // Check if we can do the following simplification.
01740   // sext(trunc(sext)) --> sext
01741   if (!isa<TruncInst>(Inst))
01742     return false;
01743 
01744   Value *OpndVal = Inst->getOperand(0);
01745   // Check if we can use this operand in the sext.
01746   // If the type is larger than the result type of the sign extension,
01747   // we cannot.
01748   if (OpndVal->getType()->getIntegerBitWidth() >
01749       ConsideredSExtType->getIntegerBitWidth())
01750     return false;
01751 
01752   // If the operand of the truncate is not an instruction, we will not have
01753   // any information on the dropped bits.
01754   // (Actually we could for constant but it is not worth the extra logic).
01755   Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
01756   if (!Opnd)
01757     return false;
01758 
01759   // Check if the source of the type is narrow enough.
01760   // I.e., check that trunc just drops sign extended bits.
01761   // #1 get the type of the operand.
01762   const Type *OpndType;
01763   InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
01764   if (It != PromotedInsts.end())
01765     OpndType = It->second;
01766   else if (isa<SExtInst>(Opnd))
01767     OpndType = cast<Instruction>(Opnd)->getOperand(0)->getType();
01768   else
01769     return false;
01770 
01771   // #2 check that the truncate just drop sign extended bits.
01772   if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth())
01773     return true;
01774 
01775   return false;
01776 }
01777 
01778 TypePromotionHelper::Action TypePromotionHelper::getAction(
01779     Instruction *SExt, const SetOfInstrs &InsertedTruncs,
01780     const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
01781   Instruction *SExtOpnd = dyn_cast<Instruction>(SExt->getOperand(0));
01782   Type *SExtTy = SExt->getType();
01783   // If the operand of the sign extension is not an instruction, we cannot
01784   // get through.
01785   // If it, check we can get through.
01786   if (!SExtOpnd || !canGetThrough(SExtOpnd, SExtTy, PromotedInsts))
01787     return nullptr;
01788 
01789   // Do not promote if the operand has been added by codegenprepare.
01790   // Otherwise, it means we are undoing an optimization that is likely to be
01791   // redone, thus causing potential infinite loop.
01792   if (isa<TruncInst>(SExtOpnd) && InsertedTruncs.count(SExtOpnd))
01793     return nullptr;
01794 
01795   // SExt or Trunc instructions.
01796   // Return the related handler.
01797   if (isa<SExtInst>(SExtOpnd) || isa<TruncInst>(SExtOpnd))
01798     return promoteOperandForTruncAndSExt;
01799 
01800   // Regular instruction.
01801   // Abort early if we will have to insert non-free instructions.
01802   if (!SExtOpnd->hasOneUse() &&
01803       !TLI.isTruncateFree(SExtTy, SExtOpnd->getType()))
01804     return nullptr;
01805   return promoteOperandForOther;
01806 }
01807 
01808 Value *TypePromotionHelper::promoteOperandForTruncAndSExt(
01809     llvm::Instruction *SExt, TypePromotionTransaction &TPT,
01810     InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts) {
01811   // By construction, the operand of SExt is an instruction. Otherwise we cannot
01812   // get through it and this method should not be called.
01813   Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
01814   // Replace sext(trunc(opnd)) or sext(sext(opnd))
01815   // => sext(opnd).
01816   TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
01817   CreatedInsts = 0;
01818 
01819   // Remove dead code.
01820   if (SExtOpnd->use_empty())
01821     TPT.eraseInstruction(SExtOpnd);
01822 
01823   // Check if the sext is still needed.
01824   if (SExt->getType() != SExt->getOperand(0)->getType())
01825     return SExt;
01826 
01827   // At this point we have: sext ty opnd to ty.
01828   // Reassign the uses of SExt to the opnd and remove SExt.
01829   Value *NextVal = SExt->getOperand(0);
01830   TPT.eraseInstruction(SExt, NextVal);
01831   return NextVal;
01832 }
01833 
01834 Value *
01835 TypePromotionHelper::promoteOperandForOther(Instruction *SExt,
01836                                             TypePromotionTransaction &TPT,
01837                                             InstrToOrigTy &PromotedInsts,
01838                                             unsigned &CreatedInsts) {
01839   // By construction, the operand of SExt is an instruction. Otherwise we cannot
01840   // get through it and this method should not be called.
01841   Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
01842   CreatedInsts = 0;
01843   if (!SExtOpnd->hasOneUse()) {
01844     // SExtOpnd will be promoted.
01845     // All its uses, but SExt, will need to use a truncated value of the
01846     // promoted version.
01847     // Create the truncate now.
01848     Instruction *Trunc = TPT.createTrunc(SExt, SExtOpnd->getType());
01849     Trunc->removeFromParent();
01850     // Insert it just after the definition.
01851     Trunc->insertAfter(SExtOpnd);
01852 
01853     TPT.replaceAllUsesWith(SExtOpnd, Trunc);
01854     // Restore the operand of SExt (which has been replace by the previous call
01855     // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
01856     TPT.setOperand(SExt, 0, SExtOpnd);
01857   }
01858 
01859   // Get through the Instruction:
01860   // 1. Update its type.
01861   // 2. Replace the uses of SExt by Inst.
01862   // 3. Sign extend each operand that needs to be sign extended.
01863 
01864   // Remember the original type of the instruction before promotion.
01865   // This is useful to know that the high bits are sign extended bits.
01866   PromotedInsts.insert(
01867       std::pair<Instruction *, Type *>(SExtOpnd, SExtOpnd->getType()));
01868   // Step #1.
01869   TPT.mutateType(SExtOpnd, SExt->getType());
01870   // Step #2.
01871   TPT.replaceAllUsesWith(SExt, SExtOpnd);
01872   // Step #3.
01873   Instruction *SExtForOpnd = SExt;
01874 
01875   DEBUG(dbgs() << "Propagate SExt to operands\n");
01876   for (int OpIdx = 0, EndOpIdx = SExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
01877        ++OpIdx) {
01878     DEBUG(dbgs() << "Operand:\n" << *(SExtOpnd->getOperand(OpIdx)) << '\n');
01879     if (SExtOpnd->getOperand(OpIdx)->getType() == SExt->getType() ||
01880         !shouldSExtOperand(SExtOpnd, OpIdx)) {
01881       DEBUG(dbgs() << "No need to propagate\n");
01882       continue;
01883     }
01884     // Check if we can statically sign extend the operand.
01885     Value *Opnd = SExtOpnd->getOperand(OpIdx);
01886     if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
01887       DEBUG(dbgs() << "Statically sign extend\n");
01888       TPT.setOperand(
01889           SExtOpnd, OpIdx,
01890           ConstantInt::getSigned(SExt->getType(), Cst->getSExtValue()));
01891       continue;
01892     }
01893     // UndefValue are typed, so we have to statically sign extend them.
01894     if (isa<UndefValue>(Opnd)) {
01895       DEBUG(dbgs() << "Statically sign extend\n");
01896       TPT.setOperand(SExtOpnd, OpIdx, UndefValue::get(SExt->getType()));
01897       continue;
01898     }
01899 
01900     // Otherwise we have to explicity sign extend the operand.
01901     // Check if SExt was reused to sign extend an operand.
01902     if (!SExtForOpnd) {
01903       // If yes, create a new one.
01904       DEBUG(dbgs() << "More operands to sext\n");
01905       SExtForOpnd = TPT.createSExt(SExt, Opnd, SExt->getType());
01906       ++CreatedInsts;
01907     }
01908 
01909     TPT.setOperand(SExtForOpnd, 0, Opnd);
01910 
01911     // Move the sign extension before the insertion point.
01912     TPT.moveBefore(SExtForOpnd, SExtOpnd);
01913     TPT.setOperand(SExtOpnd, OpIdx, SExtForOpnd);
01914     // If more sext are required, new instructions will have to be created.
01915     SExtForOpnd = nullptr;
01916   }
01917   if (SExtForOpnd == SExt) {
01918     DEBUG(dbgs() << "Sign extension is useless now\n");
01919     TPT.eraseInstruction(SExt);
01920   }
01921   return SExtOpnd;
01922 }
01923 
01924 /// IsPromotionProfitable - Check whether or not promoting an instruction
01925 /// to a wider type was profitable.
01926 /// \p MatchedSize gives the number of instructions that have been matched
01927 /// in the addressing mode after the promotion was applied.
01928 /// \p SizeWithPromotion gives the number of created instructions for
01929 /// the promotion plus the number of instructions that have been
01930 /// matched in the addressing mode before the promotion.
01931 /// \p PromotedOperand is the value that has been promoted.
01932 /// \return True if the promotion is profitable, false otherwise.
01933 bool
01934 AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize,
01935                                              unsigned SizeWithPromotion,
01936                                              Value *PromotedOperand) const {
01937   // We folded less instructions than what we created to promote the operand.
01938   // This is not profitable.
01939   if (MatchedSize < SizeWithPromotion)
01940     return false;
01941   if (MatchedSize > SizeWithPromotion)
01942     return true;
01943   // The promotion is neutral but it may help folding the sign extension in
01944   // loads for instance.
01945   // Check that we did not create an illegal instruction.
01946   Instruction *PromotedInst = dyn_cast<Instruction>(PromotedOperand);
01947   if (!PromotedInst)
01948     return false;
01949   int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
01950   // If the ISDOpcode is undefined, it was undefined before the promotion.
01951   if (!ISDOpcode)
01952     return true;
01953   // Otherwise, check if the promoted instruction is legal or not.
01954   return TLI.isOperationLegalOrCustom(ISDOpcode,
01955                                       EVT::getEVT(PromotedInst->getType()));
01956 }
01957 
01958 /// MatchOperationAddr - Given an instruction or constant expr, see if we can
01959 /// fold the operation into the addressing mode.  If so, update the addressing
01960 /// mode and return true, otherwise return false without modifying AddrMode.
01961 /// If \p MovedAway is not NULL, it contains the information of whether or
01962 /// not AddrInst has to be folded into the addressing mode on success.
01963 /// If \p MovedAway == true, \p AddrInst will not be part of the addressing
01964 /// because it has been moved away.
01965 /// Thus AddrInst must not be added in the matched instructions.
01966 /// This state can happen when AddrInst is a sext, since it may be moved away.
01967 /// Therefore, AddrInst may not be valid when MovedAway is true and it must
01968 /// not be referenced anymore.
01969 bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
01970                                                unsigned Depth,
01971                                                bool *MovedAway) {
01972   // Avoid exponential behavior on extremely deep expression trees.
01973   if (Depth >= 5) return false;
01974 
01975   // By default, all matched instructions stay in place.
01976   if (MovedAway)
01977     *MovedAway = false;
01978 
01979   switch (Opcode) {
01980   case Instruction::PtrToInt:
01981     // PtrToInt is always a noop, as we know that the int type is pointer sized.
01982     return MatchAddr(AddrInst->getOperand(0), Depth);
01983   case Instruction::IntToPtr:
01984     // This inttoptr is a no-op if the integer type is pointer sized.
01985     if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
01986         TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace()))
01987       return MatchAddr(AddrInst->getOperand(0), Depth);
01988     return false;
01989   case Instruction::BitCast:
01990   case Instruction::AddrSpaceCast:
01991     // BitCast is always a noop, and we can handle it as long as it is
01992     // int->int or pointer->pointer (we don't want int<->fp or something).
01993     if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
01994          AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
01995         // Don't touch identity bitcasts.  These were probably put here by LSR,
01996         // and we don't want to mess around with them.  Assume it knows what it
01997         // is doing.
01998         AddrInst->getOperand(0)->getType() != AddrInst->getType())
01999       return MatchAddr(AddrInst->getOperand(0), Depth);
02000     return false;
02001   case Instruction::Add: {
02002     // Check to see if we can merge in the RHS then the LHS.  If so, we win.
02003     ExtAddrMode BackupAddrMode = AddrMode;
02004     unsigned OldSize = AddrModeInsts.size();
02005     // Start a transaction at this point.
02006     // The LHS may match but not the RHS.
02007     // Therefore, we need a higher level restoration point to undo partially
02008     // matched operation.
02009     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
02010         TPT.getRestorationPoint();
02011 
02012     if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
02013         MatchAddr(AddrInst->getOperand(0), Depth+1))
02014       return true;
02015 
02016     // Restore the old addr mode info.
02017     AddrMode = BackupAddrMode;
02018     AddrModeInsts.resize(OldSize);
02019     TPT.rollback(LastKnownGood);
02020 
02021     // Otherwise this was over-aggressive.  Try merging in the LHS then the RHS.
02022     if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
02023         MatchAddr(AddrInst->getOperand(1), Depth+1))
02024       return true;
02025 
02026     // Otherwise we definitely can't merge the ADD in.
02027     AddrMode = BackupAddrMode;
02028     AddrModeInsts.resize(OldSize);
02029     TPT.rollback(LastKnownGood);
02030     break;
02031   }
02032   //case Instruction::Or:
02033   // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
02034   //break;
02035   case Instruction::Mul:
02036   case Instruction::Shl: {
02037     // Can only handle X*C and X << C.
02038     ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
02039     if (!RHS)
02040       return false;
02041     int64_t Scale = RHS->getSExtValue();
02042     if (Opcode == Instruction::Shl)
02043       Scale = 1LL << Scale;
02044 
02045     return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
02046   }
02047   case Instruction::GetElementPtr: {
02048     // Scan the GEP.  We check it if it contains constant offsets and at most
02049     // one variable offset.
02050     int VariableOperand = -1;
02051     unsigned VariableScale = 0;
02052 
02053     int64_t ConstantOffset = 0;
02054     const DataLayout *TD = TLI.getDataLayout();
02055     gep_type_iterator GTI = gep_type_begin(AddrInst);
02056     for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
02057       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
02058         const StructLayout *SL = TD->getStructLayout(STy);
02059         unsigned Idx =
02060           cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
02061         ConstantOffset += SL->getElementOffset(Idx);
02062       } else {
02063         uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
02064         if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
02065           ConstantOffset += CI->getSExtValue()*TypeSize;
02066         } else if (TypeSize) {  // Scales of zero don't do anything.
02067           // We only allow one variable index at the moment.
02068           if (VariableOperand != -1)
02069             return false;
02070 
02071           // Remember the variable index.
02072           VariableOperand = i;
02073           VariableScale = TypeSize;
02074         }
02075       }
02076     }
02077 
02078     // A common case is for the GEP to only do a constant offset.  In this case,
02079     // just add it to the disp field and check validity.
02080     if (VariableOperand == -1) {
02081       AddrMode.BaseOffs += ConstantOffset;
02082       if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
02083         // Check to see if we can fold the base pointer in too.
02084         if (MatchAddr(AddrInst->getOperand(0), Depth+1))
02085           return true;
02086       }
02087       AddrMode.BaseOffs -= ConstantOffset;
02088       return false;
02089     }
02090 
02091     // Save the valid addressing mode in case we can't match.
02092     ExtAddrMode BackupAddrMode = AddrMode;
02093     unsigned OldSize = AddrModeInsts.size();
02094 
02095     // See if the scale and offset amount is valid for this target.
02096     AddrMode.BaseOffs += ConstantOffset;
02097 
02098     // Match the base operand of the GEP.
02099     if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
02100       // If it couldn't be matched, just stuff the value in a register.
02101       if (AddrMode.HasBaseReg) {
02102         AddrMode = BackupAddrMode;
02103         AddrModeInsts.resize(OldSize);
02104         return false;
02105       }
02106       AddrMode.HasBaseReg = true;
02107       AddrMode.BaseReg = AddrInst->getOperand(0);
02108     }
02109 
02110     // Match the remaining variable portion of the GEP.
02111     if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
02112                           Depth)) {
02113       // If it couldn't be matched, try stuffing the base into a register
02114       // instead of matching it, and retrying the match of the scale.
02115       AddrMode = BackupAddrMode;
02116       AddrModeInsts.resize(OldSize);
02117       if (AddrMode.HasBaseReg)
02118         return false;
02119       AddrMode.HasBaseReg = true;
02120       AddrMode.BaseReg = AddrInst->getOperand(0);
02121       AddrMode.BaseOffs += ConstantOffset;
02122       if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
02123                             VariableScale, Depth)) {
02124         // If even that didn't work, bail.
02125         AddrMode = BackupAddrMode;
02126         AddrModeInsts.resize(OldSize);
02127         return false;
02128       }
02129     }
02130 
02131     return true;
02132   }
02133   case Instruction::SExt: {
02134     Instruction *SExt = dyn_cast<Instruction>(AddrInst);
02135     if (!SExt)
02136       return false;
02137 
02138     // Try to move this sext out of the way of the addressing mode.
02139     // Ask for a method for doing so.
02140     TypePromotionHelper::Action TPH = TypePromotionHelper::getAction(
02141         SExt, InsertedTruncs, TLI, PromotedInsts);
02142     if (!TPH)
02143       return false;
02144 
02145     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
02146         TPT.getRestorationPoint();
02147     unsigned CreatedInsts = 0;
02148     Value *PromotedOperand = TPH(SExt, TPT, PromotedInsts, CreatedInsts);
02149     // SExt has been moved away.
02150     // Thus either it will be rematched later in the recursive calls or it is
02151     // gone. Anyway, we must not fold it into the addressing mode at this point.
02152     // E.g.,
02153     // op = add opnd, 1
02154     // idx = sext op
02155     // addr = gep base, idx
02156     // is now:
02157     // promotedOpnd = sext opnd           <- no match here
02158     // op = promoted_add promotedOpnd, 1  <- match (later in recursive calls)
02159     // addr = gep base, op                <- match
02160     if (MovedAway)
02161       *MovedAway = true;
02162 
02163     assert(PromotedOperand &&
02164            "TypePromotionHelper should have filtered out those cases");
02165 
02166     ExtAddrMode BackupAddrMode = AddrMode;
02167     unsigned OldSize = AddrModeInsts.size();
02168 
02169     if (!MatchAddr(PromotedOperand, Depth) ||
02170         !IsPromotionProfitable(AddrModeInsts.size(), OldSize + CreatedInsts,
02171                                PromotedOperand)) {
02172       AddrMode = BackupAddrMode;
02173       AddrModeInsts.resize(OldSize);
02174       DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
02175       TPT.rollback(LastKnownGood);
02176       return false;
02177     }
02178     return true;
02179   }
02180   }
02181   return false;
02182 }
02183 
02184 /// MatchAddr - If we can, try to add the value of 'Addr' into the current
02185 /// addressing mode.  If Addr can't be added to AddrMode this returns false and
02186 /// leaves AddrMode unmodified.  This assumes that Addr is either a pointer type
02187 /// or intptr_t for the target.
02188 ///
02189 bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
02190   // Start a transaction at this point that we will rollback if the matching
02191   // fails.
02192   TypePromotionTransaction::ConstRestorationPt LastKnownGood =
02193       TPT.getRestorationPoint();
02194   if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
02195     // Fold in immediates if legal for the target.
02196     AddrMode.BaseOffs += CI->getSExtValue();
02197     if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
02198       return true;
02199     AddrMode.BaseOffs -= CI->getSExtValue();
02200   } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
02201     // If this is a global variable, try to fold it into the addressing mode.
02202     if (!AddrMode.BaseGV) {
02203       AddrMode.BaseGV = GV;
02204       if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
02205         return true;
02206       AddrMode.BaseGV = nullptr;
02207     }
02208   } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
02209     ExtAddrMode BackupAddrMode = AddrMode;
02210     unsigned OldSize = AddrModeInsts.size();
02211 
02212     // Check to see if it is possible to fold this operation.
02213     bool MovedAway = false;
02214     if (MatchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
02215       // This instruction may have been move away. If so, there is nothing
02216       // to check here.
02217       if (MovedAway)
02218         return true;
02219       // Okay, it's possible to fold this.  Check to see if it is actually
02220       // *profitable* to do so.  We use a simple cost model to avoid increasing
02221       // register pressure too much.
02222       if (I->hasOneUse() ||
02223           IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
02224         AddrModeInsts.push_back(I);
02225         return true;
02226       }
02227 
02228       // It isn't profitable to do this, roll back.
02229       //cerr << "NOT FOLDING: " << *I;
02230       AddrMode = BackupAddrMode;
02231       AddrModeInsts.resize(OldSize);
02232       TPT.rollback(LastKnownGood);
02233     }
02234   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
02235     if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
02236       return true;
02237     TPT.rollback(LastKnownGood);
02238   } else if (isa<ConstantPointerNull>(Addr)) {
02239     // Null pointer gets folded without affecting the addressing mode.
02240     return true;
02241   }
02242 
02243   // Worse case, the target should support [reg] addressing modes. :)
02244   if (!AddrMode.HasBaseReg) {
02245     AddrMode.HasBaseReg = true;
02246     AddrMode.BaseReg = Addr;
02247     // Still check for legality in case the target supports [imm] but not [i+r].
02248     if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
02249       return true;
02250     AddrMode.HasBaseReg = false;
02251     AddrMode.BaseReg = nullptr;
02252   }
02253 
02254   // If the base register is already taken, see if we can do [r+r].
02255   if (AddrMode.Scale == 0) {
02256     AddrMode.Scale = 1;
02257     AddrMode.ScaledReg = Addr;
02258     if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
02259       return true;
02260     AddrMode.Scale = 0;
02261     AddrMode.ScaledReg = nullptr;
02262   }
02263   // Couldn't match.
02264   TPT.rollback(LastKnownGood);
02265   return false;
02266 }
02267 
02268 /// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
02269 /// inline asm call are due to memory operands.  If so, return true, otherwise
02270 /// return false.
02271 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
02272                                     const TargetLowering &TLI) {
02273   TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
02274   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
02275     TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
02276 
02277     // Compute the constraint code and ConstraintType to use.
02278     TLI.ComputeConstraintToUse(OpInfo, SDValue());
02279 
02280     // If this asm operand is our Value*, and if it isn't an indirect memory
02281     // operand, we can't fold it!
02282     if (OpInfo.CallOperandVal == OpVal &&
02283         (OpInfo.ConstraintType != TargetLowering::C_Memory ||
02284          !OpInfo.isIndirect))
02285       return false;
02286   }
02287 
02288   return true;
02289 }
02290 
02291 /// FindAllMemoryUses - Recursively walk all the uses of I until we find a
02292 /// memory use.  If we find an obviously non-foldable instruction, return true.
02293 /// Add the ultimately found memory instructions to MemoryUses.
02294 static bool FindAllMemoryUses(Instruction *I,
02295                 SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
02296                               SmallPtrSet<Instruction*, 16> &ConsideredInsts,
02297                               const TargetLowering &TLI) {
02298   // If we already considered this instruction, we're done.
02299   if (!ConsideredInsts.insert(I))
02300     return false;
02301 
02302   // If this is an obviously unfoldable instruction, bail out.
02303   if (!MightBeFoldableInst(I))
02304     return true;
02305 
02306   // Loop over all the uses, recursively processing them.
02307   for (Use &U : I->uses()) {
02308     Instruction *UserI = cast<Instruction>(U.getUser());
02309 
02310     if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
02311       MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
02312       continue;
02313     }
02314 
02315     if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
02316       unsigned opNo = U.getOperandNo();
02317       if (opNo == 0) return true; // Storing addr, not into addr.
02318       MemoryUses.push_back(std::make_pair(SI, opNo));
02319       continue;
02320     }
02321 
02322     if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
02323       InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
02324       if (!IA) return true;
02325 
02326       // If this is a memory operand, we're cool, otherwise bail out.
02327       if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
02328         return true;
02329       continue;
02330     }
02331 
02332     if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI))
02333       return true;
02334   }
02335 
02336   return false;
02337 }
02338 
02339 /// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
02340 /// the use site that we're folding it into.  If so, there is no cost to
02341 /// include it in the addressing mode.  KnownLive1 and KnownLive2 are two values
02342 /// that we know are live at the instruction already.
02343 bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
02344                                                    Value *KnownLive2) {
02345   // If Val is either of the known-live values, we know it is live!
02346   if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
02347     return true;
02348 
02349   // All values other than instructions and arguments (e.g. constants) are live.
02350   if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
02351 
02352   // If Val is a constant sized alloca in the entry block, it is live, this is
02353   // true because it is just a reference to the stack/frame pointer, which is
02354   // live for the whole function.
02355   if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
02356     if (AI->isStaticAlloca())
02357       return true;
02358 
02359   // Check to see if this value is already used in the memory instruction's
02360   // block.  If so, it's already live into the block at the very least, so we
02361   // can reasonably fold it.
02362   return Val->isUsedInBasicBlock(MemoryInst->getParent());
02363 }
02364 
02365 /// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
02366 /// mode of the machine to fold the specified instruction into a load or store
02367 /// that ultimately uses it.  However, the specified instruction has multiple
02368 /// uses.  Given this, it may actually increase register pressure to fold it
02369 /// into the load.  For example, consider this code:
02370 ///
02371 ///     X = ...
02372 ///     Y = X+1
02373 ///     use(Y)   -> nonload/store
02374 ///     Z = Y+1
02375 ///     load Z
02376 ///
02377 /// In this case, Y has multiple uses, and can be folded into the load of Z
02378 /// (yielding load [X+2]).  However, doing this will cause both "X" and "X+1" to
02379 /// be live at the use(Y) line.  If we don't fold Y into load Z, we use one
02380 /// fewer register.  Since Y can't be folded into "use(Y)" we don't increase the
02381 /// number of computations either.
02382 ///
02383 /// Note that this (like most of CodeGenPrepare) is just a rough heuristic.  If
02384 /// X was live across 'load Z' for other reasons, we actually *would* want to
02385 /// fold the addressing mode in the Z case.  This would make Y die earlier.
02386 bool AddressingModeMatcher::
02387 IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
02388                                      ExtAddrMode &AMAfter) {
02389   if (IgnoreProfitability) return true;
02390 
02391   // AMBefore is the addressing mode before this instruction was folded into it,
02392   // and AMAfter is the addressing mode after the instruction was folded.  Get
02393   // the set of registers referenced by AMAfter and subtract out those
02394   // referenced by AMBefore: this is the set of values which folding in this
02395   // address extends the lifetime of.
02396   //
02397   // Note that there are only two potential values being referenced here,
02398   // BaseReg and ScaleReg (global addresses are always available, as are any
02399   // folded immediates).
02400   Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
02401 
02402   // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
02403   // lifetime wasn't extended by adding this instruction.
02404   if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
02405     BaseReg = nullptr;
02406   if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
02407     ScaledReg = nullptr;
02408 
02409   // If folding this instruction (and it's subexprs) didn't extend any live
02410   // ranges, we're ok with it.
02411   if (!BaseReg && !ScaledReg)
02412     return true;
02413 
02414   // If all uses of this instruction are ultimately load/store/inlineasm's,
02415   // check to see if their addressing modes will include this instruction.  If
02416   // so, we can fold it into all uses, so it doesn't matter if it has multiple
02417   // uses.
02418   SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
02419   SmallPtrSet<Instruction*, 16> ConsideredInsts;
02420   if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
02421     return false;  // Has a non-memory, non-foldable use!
02422 
02423   // Now that we know that all uses of this instruction are part of a chain of
02424   // computation involving only operations that could theoretically be folded
02425   // into a memory use, loop over each of these uses and see if they could
02426   // *actually* fold the instruction.
02427   SmallVector<Instruction*, 32> MatchedAddrModeInsts;
02428   for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
02429     Instruction *User = MemoryUses[i].first;
02430     unsigned OpNo = MemoryUses[i].second;
02431 
02432     // Get the access type of this use.  If the use isn't a pointer, we don't
02433     // know what it accesses.
02434     Value *Address = User->getOperand(OpNo);
02435     if (!Address->getType()->isPointerTy())
02436       return false;
02437     Type *AddressAccessTy = Address->getType()->getPointerElementType();
02438 
02439     // Do a match against the root of this address, ignoring profitability. This
02440     // will tell us if the addressing mode for the memory operation will
02441     // *actually* cover the shared instruction.
02442     ExtAddrMode Result;
02443     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
02444         TPT.getRestorationPoint();
02445     AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
02446                                   MemoryInst, Result, InsertedTruncs,
02447                                   PromotedInsts, TPT);
02448     Matcher.IgnoreProfitability = true;
02449     bool Success = Matcher.MatchAddr(Address, 0);
02450     (void)Success; assert(Success && "Couldn't select *anything*?");
02451 
02452     // The match was to check the profitability, the changes made are not
02453     // part of the original matcher. Therefore, they should be dropped
02454     // otherwise the original matcher will not present the right state.
02455     TPT.rollback(LastKnownGood);
02456 
02457     // If the match didn't cover I, then it won't be shared by it.
02458     if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
02459                   I) == MatchedAddrModeInsts.end())
02460       return false;
02461 
02462     MatchedAddrModeInsts.clear();
02463   }
02464 
02465   return true;
02466 }
02467 
02468 } // end anonymous namespace
02469 
02470 /// IsNonLocalValue - Return true if the specified values are defined in a
02471 /// different basic block than BB.
02472 static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
02473   if (Instruction *I = dyn_cast<Instruction>(V))
02474     return I->getParent() != BB;
02475   return false;
02476 }
02477 
02478 /// OptimizeMemoryInst - Load and Store Instructions often have
02479 /// addressing modes that can do significant amounts of computation.  As such,
02480 /// instruction selection will try to get the load or store to do as much
02481 /// computation as possible for the program.  The problem is that isel can only
02482 /// see within a single block.  As such, we sink as much legal addressing mode
02483 /// stuff into the block as possible.
02484 ///
02485 /// This method is used to optimize both load/store and inline asms with memory
02486 /// operands.
02487 bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
02488                                         Type *AccessTy) {
02489   Value *Repl = Addr;
02490 
02491   // Try to collapse single-value PHI nodes.  This is necessary to undo
02492   // unprofitable PRE transformations.
02493   SmallVector<Value*, 8> worklist;
02494   SmallPtrSet<Value*, 16> Visited;
02495   worklist.push_back(Addr);
02496 
02497   // Use a worklist to iteratively look through PHI nodes, and ensure that
02498   // the addressing mode obtained from the non-PHI roots of the graph
02499   // are equivalent.
02500   Value *Consensus = nullptr;
02501   unsigned NumUsesConsensus = 0;
02502   bool IsNumUsesConsensusValid = false;
02503   SmallVector<Instruction*, 16> AddrModeInsts;
02504   ExtAddrMode AddrMode;
02505   TypePromotionTransaction TPT;
02506   TypePromotionTransaction::ConstRestorationPt LastKnownGood =
02507       TPT.getRestorationPoint();
02508   while (!worklist.empty()) {
02509     Value *V = worklist.back();
02510     worklist.pop_back();
02511 
02512     // Break use-def graph loops.
02513     if (!Visited.insert(V)) {
02514       Consensus = nullptr;
02515       break;
02516     }
02517 
02518     // For a PHI node, push all of its incoming values.
02519     if (PHINode *P = dyn_cast<PHINode>(V)) {
02520       for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
02521         worklist.push_back(P->getIncomingValue(i));
02522       continue;
02523     }
02524 
02525     // For non-PHIs, determine the addressing mode being computed.
02526     SmallVector<Instruction*, 16> NewAddrModeInsts;
02527     ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
02528         V, AccessTy, MemoryInst, NewAddrModeInsts, *TLI, InsertedTruncsSet,
02529         PromotedInsts, TPT);
02530 
02531     // This check is broken into two cases with very similar code to avoid using
02532     // getNumUses() as much as possible. Some values have a lot of uses, so
02533     // calling getNumUses() unconditionally caused a significant compile-time
02534     // regression.
02535     if (!Consensus) {
02536       Consensus = V;
02537       AddrMode = NewAddrMode;
02538       AddrModeInsts = NewAddrModeInsts;
02539       continue;
02540     } else if (NewAddrMode == AddrMode) {
02541       if (!IsNumUsesConsensusValid) {
02542         NumUsesConsensus = Consensus->getNumUses();
02543         IsNumUsesConsensusValid = true;
02544       }
02545 
02546       // Ensure that the obtained addressing mode is equivalent to that obtained
02547       // for all other roots of the PHI traversal.  Also, when choosing one
02548       // such root as representative, select the one with the most uses in order
02549       // to keep the cost modeling heuristics in AddressingModeMatcher
02550       // applicable.
02551       unsigned NumUses = V->getNumUses();
02552       if (NumUses > NumUsesConsensus) {
02553         Consensus = V;
02554         NumUsesConsensus = NumUses;
02555         AddrModeInsts = NewAddrModeInsts;
02556       }
02557       continue;
02558     }
02559 
02560     Consensus = nullptr;
02561     break;
02562   }
02563 
02564   // If the addressing mode couldn't be determined, or if multiple different
02565   // ones were determined, bail out now.
02566   if (!Consensus) {
02567     TPT.rollback(LastKnownGood);
02568     return false;
02569   }
02570   TPT.commit();
02571 
02572   // Check to see if any of the instructions supersumed by this addr mode are
02573   // non-local to I's BB.
02574   bool AnyNonLocal = false;
02575   for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
02576     if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
02577       AnyNonLocal = true;
02578       break;
02579     }
02580   }
02581 
02582   // If all the instructions matched are already in this BB, don't do anything.
02583   if (!AnyNonLocal) {
02584     DEBUG(dbgs() << "CGP: Found      local addrmode: " << AddrMode << "\n");
02585     return false;
02586   }
02587 
02588   // Insert this computation right after this user.  Since our caller is
02589   // scanning from the top of the BB to the bottom, reuse of the expr are
02590   // guaranteed to happen later.
02591   IRBuilder<> Builder(MemoryInst);
02592 
02593   // Now that we determined the addressing expression we want to use and know
02594   // that we have to sink it into this block.  Check to see if we have already
02595   // done this for some other load/store instr in this block.  If so, reuse the
02596   // computation.
02597   Value *&SunkAddr = SunkAddrs[Addr];
02598   if (SunkAddr) {
02599     DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
02600                  << *MemoryInst << "\n");
02601     if (SunkAddr->getType() != Addr->getType())
02602       SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
02603   } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
02604                TM && TM->getSubtarget<TargetSubtargetInfo>().useAA())) {
02605     // By default, we use the GEP-based method when AA is used later. This
02606     // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
02607     DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
02608                  << *MemoryInst << "\n");
02609     Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
02610     Value *ResultPtr = nullptr, *ResultIndex = nullptr;
02611 
02612     // First, find the pointer.
02613     if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
02614       ResultPtr = AddrMode.BaseReg;
02615       AddrMode.BaseReg = nullptr;
02616     }
02617 
02618     if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
02619       // We can't add more than one pointer together, nor can we scale a
02620       // pointer (both of which seem meaningless).
02621       if (ResultPtr || AddrMode.Scale != 1)
02622         return false;
02623 
02624       ResultPtr = AddrMode.ScaledReg;
02625       AddrMode.Scale = 0;
02626     }
02627 
02628     if (AddrMode.BaseGV) {
02629       if (ResultPtr)
02630         return false;
02631 
02632       ResultPtr = AddrMode.BaseGV;
02633     }
02634 
02635     // If the real base value actually came from an inttoptr, then the matcher
02636     // will look through it and provide only the integer value. In that case,
02637     // use it here.
02638     if (!ResultPtr && AddrMode.BaseReg) {
02639       ResultPtr =
02640         Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr");
02641       AddrMode.BaseReg = nullptr;
02642     } else if (!ResultPtr && AddrMode.Scale == 1) {
02643       ResultPtr =
02644         Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr");
02645       AddrMode.Scale = 0;
02646     }
02647 
02648     if (!ResultPtr &&
02649         !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
02650       SunkAddr = Constant::getNullValue(Addr->getType());
02651     } else if (!ResultPtr) {
02652       return false;
02653     } else {
02654       Type *I8PtrTy =
02655         Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
02656 
02657       // Start with the base register. Do this first so that subsequent address
02658       // matching finds it last, which will prevent it from trying to match it
02659       // as the scaled value in case it happens to be a mul. That would be
02660       // problematic if we've sunk a different mul for the scale, because then
02661       // we'd end up sinking both muls.
02662       if (AddrMode.BaseReg) {
02663         Value *V = AddrMode.BaseReg;
02664         if (V->getType() != IntPtrTy)
02665           V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
02666 
02667         ResultIndex = V;
02668       }
02669 
02670       // Add the scale value.
02671       if (AddrMode.Scale) {
02672         Value *V = AddrMode.ScaledReg;
02673         if (V->getType() == IntPtrTy) {
02674           // done.
02675         } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
02676                    cast<IntegerType>(V->getType())->getBitWidth()) {
02677           V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
02678         } else {
02679           // It is only safe to sign extend the BaseReg if we know that the math
02680           // required to create it did not overflow before we extend it. Since
02681           // the original IR value was tossed in favor of a constant back when
02682           // the AddrMode was created we need to bail out gracefully if widths
02683           // do not match instead of extending it.
02684           Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex);
02685           if (I && (ResultIndex != AddrMode.BaseReg))
02686             I->eraseFromParent();
02687           return false;
02688         }
02689 
02690         if (AddrMode.Scale != 1)
02691           V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
02692                                 "sunkaddr");
02693         if (ResultIndex)
02694           ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
02695         else
02696           ResultIndex = V;
02697       }
02698 
02699       // Add in the Base Offset if present.
02700       if (AddrMode.BaseOffs) {
02701         Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
02702         if (ResultIndex) {
02703     // We need to add this separately from the scale above to help with
02704     // SDAG consecutive load/store merging.
02705           if (ResultPtr->getType() != I8PtrTy)
02706             ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
02707           ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
02708         }
02709 
02710         ResultIndex = V;
02711       }
02712 
02713       if (!ResultIndex) {
02714         SunkAddr = ResultPtr;
02715       } else {
02716         if (ResultPtr->getType() != I8PtrTy)
02717           ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
02718         SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
02719       }
02720 
02721       if (SunkAddr->getType() != Addr->getType())
02722         SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
02723     }
02724   } else {
02725     DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
02726                  << *MemoryInst << "\n");
02727     Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
02728     Value *Result = nullptr;
02729 
02730     // Start with the base register. Do this first so that subsequent address
02731     // matching finds it last, which will prevent it from trying to match it
02732     // as the scaled value in case it happens to be a mul. That would be
02733     // problematic if we've sunk a different mul for the scale, because then
02734     // we'd end up sinking both muls.
02735     if (AddrMode.BaseReg) {
02736       Value *V = AddrMode.BaseReg;
02737       if (V->getType()->isPointerTy())
02738         V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
02739       if (V->getType() != IntPtrTy)
02740         V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
02741       Result = V;
02742     }
02743 
02744     // Add the scale value.
02745     if (AddrMode.Scale) {
02746       Value *V = AddrMode.ScaledReg;
02747       if (V->getType() == IntPtrTy) {
02748         // done.
02749       } else if (V->getType()->isPointerTy()) {
02750         V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
02751       } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
02752                  cast<IntegerType>(V->getType())->getBitWidth()) {
02753         V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
02754       } else {
02755         // It is only safe to sign extend the BaseReg if we know that the math
02756         // required to create it did not overflow before we extend it. Since
02757         // the original IR value was tossed in favor of a constant back when
02758         // the AddrMode was created we need to bail out gracefully if widths
02759         // do not match instead of extending it.
02760         Instruction *I = dyn_cast_or_null<Instruction>(Result);
02761         if (I && (Result != AddrMode.BaseReg))
02762           I->eraseFromParent();
02763         return false;
02764       }
02765       if (AddrMode.Scale != 1)
02766         V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
02767                               "sunkaddr");
02768       if (Result)
02769         Result = Builder.CreateAdd(Result, V, "sunkaddr");
02770       else
02771         Result = V;
02772     }
02773 
02774     // Add in the BaseGV if present.
02775     if (AddrMode.BaseGV) {
02776       Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
02777       if (Result)
02778         Result = Builder.CreateAdd(Result, V, "sunkaddr");
02779       else
02780         Result = V;
02781     }
02782 
02783     // Add in the Base Offset if present.
02784     if (AddrMode.BaseOffs) {
02785       Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
02786       if (Result)
02787         Result = Builder.CreateAdd(Result, V, "sunkaddr");
02788       else
02789         Result = V;
02790     }
02791 
02792     if (!Result)
02793       SunkAddr = Constant::getNullValue(Addr->getType());
02794     else
02795       SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
02796   }
02797 
02798   MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
02799 
02800   // If we have no uses, recursively delete the value and all dead instructions
02801   // using it.
02802   if (Repl->use_empty()) {
02803     // This can cause recursive deletion, which can invalidate our iterator.
02804     // Use a WeakVH to hold onto it in case this happens.
02805     WeakVH IterHandle(CurInstIterator);
02806     BasicBlock *BB = CurInstIterator->getParent();
02807 
02808     RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
02809 
02810     if (IterHandle != CurInstIterator) {
02811       // If the iterator instruction was recursively deleted, start over at the
02812       // start of the block.
02813       CurInstIterator = BB->begin();
02814       SunkAddrs.clear();
02815     }
02816   }
02817   ++NumMemoryInsts;
02818   return true;
02819 }
02820 
02821 /// OptimizeInlineAsmInst - If there are any memory operands, use
02822 /// OptimizeMemoryInst to sink their address computing into the block when
02823 /// possible / profitable.
02824 bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
02825   bool MadeChange = false;
02826 
02827   TargetLowering::AsmOperandInfoVector
02828     TargetConstraints = TLI->ParseConstraints(CS);
02829   unsigned ArgNo = 0;
02830   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
02831     TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
02832 
02833     // Compute the constraint code and ConstraintType to use.
02834     TLI->ComputeConstraintToUse(OpInfo, SDValue());
02835 
02836     if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
02837         OpInfo.isIndirect) {
02838       Value *OpVal = CS->getArgOperand(ArgNo++);
02839       MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
02840     } else if (OpInfo.Type == InlineAsm::isInput)
02841       ArgNo++;
02842   }
02843 
02844   return MadeChange;
02845 }
02846 
02847 /// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
02848 /// basic block as the load, unless conditions are unfavorable. This allows
02849 /// SelectionDAG to fold the extend into the load.
02850 ///
02851 bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
02852   // Look for a load being extended.
02853   LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0));
02854   if (!LI) return false;
02855 
02856   // If they're already in the same block, there's nothing to do.
02857   if (LI->getParent() == I->getParent())
02858     return false;
02859 
02860   // If the load has other users and the truncate is not free, this probably
02861   // isn't worthwhile.
02862   if (!LI->hasOneUse() &&
02863       TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) ||
02864               !TLI->isTypeLegal(TLI->getValueType(I->getType()))) &&
02865       !TLI->isTruncateFree(I->getType(), LI->getType()))
02866     return false;
02867 
02868   // Check whether the target supports casts folded into loads.
02869   unsigned LType;
02870   if (isa<ZExtInst>(I))
02871     LType = ISD::ZEXTLOAD;
02872   else {
02873     assert(isa<SExtInst>(I) && "Unexpected ext type!");
02874     LType = ISD::SEXTLOAD;
02875   }
02876   if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType())))
02877     return false;
02878 
02879   // Move the extend into the same block as the load, so that SelectionDAG
02880   // can fold it.
02881   I->removeFromParent();
02882   I->insertAfter(LI);
02883   ++NumExtsMoved;
02884   return true;
02885 }
02886 
02887 bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
02888   BasicBlock *DefBB = I->getParent();
02889 
02890   // If the result of a {s|z}ext and its source are both live out, rewrite all
02891   // other uses of the source with result of extension.
02892   Value *Src = I->getOperand(0);
02893   if (Src->hasOneUse())
02894     return false;
02895 
02896   // Only do this xform if truncating is free.
02897   if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
02898     return false;
02899 
02900   // Only safe to perform the optimization if the source is also defined in
02901   // this block.
02902   if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
02903     return false;
02904 
02905   bool DefIsLiveOut = false;
02906   for (User *U : I->users()) {
02907     Instruction *UI = cast<Instruction>(U);
02908 
02909     // Figure out which BB this ext is used in.
02910     BasicBlock *UserBB = UI->getParent();
02911     if (UserBB == DefBB) continue;
02912     DefIsLiveOut = true;
02913     break;
02914   }
02915   if (!DefIsLiveOut)
02916     return false;
02917 
02918   // Make sure none of the uses are PHI nodes.
02919   for (User *U : Src->users()) {
02920     Instruction *UI = cast<Instruction>(U);
02921     BasicBlock *UserBB = UI->getParent();
02922     if (UserBB == DefBB) continue;
02923     // Be conservative. We don't want this xform to end up introducing
02924     // reloads just before load / store instructions.
02925     if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
02926       return false;
02927   }
02928 
02929   // InsertedTruncs - Only insert one trunc in each block once.
02930   DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
02931 
02932   bool MadeChange = false;
02933   for (Use &U : Src->uses()) {
02934     Instruction *User = cast<Instruction>(U.getUser());
02935 
02936     // Figure out which BB this ext is used in.
02937     BasicBlock *UserBB = User->getParent();
02938     if (UserBB == DefBB) continue;
02939 
02940     // Both src and def are live in this block. Rewrite the use.
02941     Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
02942 
02943     if (!InsertedTrunc) {
02944       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
02945       InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
02946       InsertedTruncsSet.insert(InsertedTrunc);
02947     }
02948 
02949     // Replace a use of the {s|z}ext source with a use of the result.
02950     U = InsertedTrunc;
02951     ++NumExtUses;
02952     MadeChange = true;
02953   }
02954 
02955   return MadeChange;
02956 }
02957 
02958 /// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be
02959 /// turned into an explicit branch.
02960 static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
02961   // FIXME: This should use the same heuristics as IfConversion to determine
02962   // whether a select is better represented as a branch.  This requires that
02963   // branch probability metadata is preserved for the select, which is not the
02964   // case currently.
02965 
02966   CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
02967 
02968   // If the branch is predicted right, an out of order CPU can avoid blocking on
02969   // the compare.  Emit cmovs on compares with a memory operand as branches to
02970   // avoid stalls on the load from memory.  If the compare has more than one use
02971   // there's probably another cmov or setcc around so it's not worth emitting a
02972   // branch.
02973   if (!Cmp)
02974     return false;
02975 
02976   Value *CmpOp0 = Cmp->getOperand(0);
02977   Value *CmpOp1 = Cmp->getOperand(1);
02978 
02979   // We check that the memory operand has one use to avoid uses of the loaded
02980   // value directly after the compare, making branches unprofitable.
02981   return Cmp->hasOneUse() &&
02982          ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
02983           (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()));
02984 }
02985 
02986 
02987 /// If we have a SelectInst that will likely profit from branch prediction,
02988 /// turn it into a branch.
02989 bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
02990   bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
02991 
02992   // Can we convert the 'select' to CF ?
02993   if (DisableSelectToBranch || OptSize || !TLI || VectorCond)
02994     return false;
02995 
02996   TargetLowering::SelectSupportKind SelectKind;
02997   if (VectorCond)
02998     SelectKind = TargetLowering::VectorMaskSelect;
02999   else if (SI->getType()->isVectorTy())
03000     SelectKind = TargetLowering::ScalarCondVectorVal;
03001   else
03002     SelectKind = TargetLowering::ScalarValSelect;
03003 
03004   // Do we have efficient codegen support for this kind of 'selects' ?
03005   if (TLI->isSelectSupported(SelectKind)) {
03006     // We have efficient codegen support for the select instruction.
03007     // Check if it is profitable to keep this 'select'.
03008     if (!TLI->isPredictableSelectExpensive() ||
03009         !isFormingBranchFromSelectProfitable(SI))
03010       return false;
03011   }
03012 
03013   ModifiedDT = true;
03014 
03015   // First, we split the block containing the select into 2 blocks.
03016   BasicBlock *StartBlock = SI->getParent();
03017   BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
03018   BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
03019 
03020   // Create a new block serving as the landing pad for the branch.
03021   BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid",
03022                                              NextBlock->getParent(), NextBlock);
03023 
03024   // Move the unconditional branch from the block with the select in it into our
03025   // landing pad block.
03026   StartBlock->getTerminator()->eraseFromParent();
03027   BranchInst::Create(NextBlock, SmallBlock);
03028 
03029   // Insert the real conditional branch based on the original condition.
03030   BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI);
03031 
03032   // The select itself is replaced with a PHI Node.
03033   PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin());
03034   PN->takeName(SI);
03035   PN->addIncoming(SI->getTrueValue(), StartBlock);
03036   PN->addIncoming(SI->getFalseValue(), SmallBlock);
03037   SI->replaceAllUsesWith(PN);
03038   SI->eraseFromParent();
03039 
03040   // Instruct OptimizeBlock to skip to the next block.
03041   CurInstIterator = StartBlock->end();
03042   ++NumSelectsExpanded;
03043   return true;
03044 }
03045 
03046 static bool isBroadcastShuffle(ShuffleVectorInst *SVI) {
03047   SmallVector<int, 16> Mask(SVI->getShuffleMask());
03048   int SplatElem = -1;
03049   for (unsigned i = 0; i < Mask.size(); ++i) {
03050     if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem)
03051       return false;
03052     SplatElem = Mask[i];
03053   }
03054 
03055   return true;
03056 }
03057 
03058 /// Some targets have expensive vector shifts if the lanes aren't all the same
03059 /// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
03060 /// it's often worth sinking a shufflevector splat down to its use so that
03061 /// codegen can spot all lanes are identical.
03062 bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
03063   BasicBlock *DefBB = SVI->getParent();
03064 
03065   // Only do this xform if variable vector shifts are particularly expensive.
03066   if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType()))
03067     return false;
03068 
03069   // We only expect better codegen by sinking a shuffle if we can recognise a
03070   // constant splat.
03071   if (!isBroadcastShuffle(SVI))
03072     return false;
03073 
03074   // InsertedShuffles - Only insert a shuffle in each block once.
03075   DenseMap<BasicBlock*, Instruction*> InsertedShuffles;
03076 
03077   bool MadeChange = false;
03078   for (User *U : SVI->users()) {
03079     Instruction *UI = cast<Instruction>(U);
03080 
03081     // Figure out which BB this ext is used in.
03082     BasicBlock *UserBB = UI->getParent();
03083     if (UserBB == DefBB) continue;
03084 
03085     // For now only apply this when the splat is used by a shift instruction.
03086     if (!UI->isShift()) continue;
03087 
03088     // Everything checks out, sink the shuffle if the user's block doesn't
03089     // already have a copy.
03090     Instruction *&InsertedShuffle = InsertedShuffles[UserBB];
03091 
03092     if (!InsertedShuffle) {
03093       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
03094       InsertedShuffle = new ShuffleVectorInst(SVI->getOperand(0),
03095                                               SVI->getOperand(1),
03096                                               SVI->getOperand(2), "", InsertPt);
03097     }
03098 
03099     UI->replaceUsesOfWith(SVI, InsertedShuffle);
03100     MadeChange = true;
03101   }
03102 
03103   // If we removed all uses, nuke the shuffle.
03104   if (SVI->use_empty()) {
03105     SVI->eraseFromParent();
03106     MadeChange = true;
03107   }
03108 
03109   return MadeChange;
03110 }
03111 
03112 bool CodeGenPrepare::OptimizeInst(Instruction *I) {
03113   if (PHINode *P = dyn_cast<PHINode>(I)) {
03114     // It is possible for very late stage optimizations (such as SimplifyCFG)
03115     // to introduce PHI nodes too late to be cleaned up.  If we detect such a
03116     // trivial PHI, go ahead and zap it here.
03117     if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : nullptr,
03118                                        TLInfo, DT)) {
03119       P->replaceAllUsesWith(V);
03120       P->eraseFromParent();
03121       ++NumPHIsElim;
03122       return true;
03123     }
03124     return false;
03125   }
03126 
03127   if (CastInst *CI = dyn_cast<CastInst>(I)) {
03128     // If the source of the cast is a constant, then this should have
03129     // already been constant folded.  The only reason NOT to constant fold
03130     // it is if something (e.g. LSR) was careful to place the constant
03131     // evaluation in a block other than then one that uses it (e.g. to hoist
03132     // the address of globals out of a loop).  If this is the case, we don't
03133     // want to forward-subst the cast.
03134     if (isa<Constant>(CI->getOperand(0)))
03135       return false;
03136 
03137     if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
03138       return true;
03139 
03140     if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
03141       /// Sink a zext or sext into its user blocks if the target type doesn't
03142       /// fit in one register
03143       if (TLI && TLI->getTypeAction(CI->getContext(),
03144                                     TLI->getValueType(CI->getType())) ==
03145                      TargetLowering::TypeExpandInteger) {
03146         return SinkCast(CI);
03147       } else {
03148         bool MadeChange = MoveExtToFormExtLoad(I);
03149         return MadeChange | OptimizeExtUses(I);
03150       }
03151     }
03152     return false;
03153   }
03154 
03155   if (CmpInst *CI = dyn_cast<CmpInst>(I))
03156     if (!TLI || !TLI->hasMultipleConditionRegisters())
03157       return OptimizeCmpExpression(CI);
03158 
03159   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
03160     if (TLI)
03161       return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
03162     return false;
03163   }
03164 
03165   if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
03166     if (TLI)
03167       return OptimizeMemoryInst(I, SI->getOperand(1),
03168                                 SI->getOperand(0)->getType());
03169     return false;
03170   }
03171 
03172   BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
03173 
03174   if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
03175                 BinOp->getOpcode() == Instruction::LShr)) {
03176     ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
03177     if (TLI && CI && TLI->hasExtractBitsInsn())
03178       return OptimizeExtractBits(BinOp, CI, *TLI);
03179 
03180     return false;
03181   }
03182 
03183   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
03184     if (GEPI->hasAllZeroIndices()) {
03185       /// The GEP operand must be a pointer, so must its result -> BitCast
03186       Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
03187                                         GEPI->getName(), GEPI);
03188       GEPI->replaceAllUsesWith(NC);
03189       GEPI->eraseFromParent();
03190       ++NumGEPsElim;
03191       OptimizeInst(NC);
03192       return true;
03193     }
03194     return false;
03195   }
03196 
03197   if (CallInst *CI = dyn_cast<CallInst>(I))
03198     return OptimizeCallInst(CI);
03199 
03200   if (SelectInst *SI = dyn_cast<SelectInst>(I))
03201     return OptimizeSelectInst(SI);
03202 
03203   if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
03204     return OptimizeShuffleVectorInst(SVI);
03205 
03206   return false;
03207 }
03208 
03209 // In this pass we look for GEP and cast instructions that are used
03210 // across basic blocks and rewrite them to improve basic-block-at-a-time
03211 // selection.
03212 bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
03213   SunkAddrs.clear();
03214   bool MadeChange = false;
03215 
03216   CurInstIterator = BB.begin();
03217   while (CurInstIterator != BB.end())
03218     MadeChange |= OptimizeInst(CurInstIterator++);
03219 
03220   MadeChange |= DupRetToEnableTailCallOpts(&BB);
03221 
03222   return MadeChange;
03223 }
03224 
03225 // llvm.dbg.value is far away from the value then iSel may not be able
03226 // handle it properly. iSel will drop llvm.dbg.value if it can not
03227 // find a node corresponding to the value.
03228 bool CodeGenPrepare::PlaceDbgValues(Function &F) {
03229   bool MadeChange = false;
03230   for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
03231     Instruction *PrevNonDbgInst = nullptr;
03232     for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
03233       Instruction *Insn = BI; ++BI;
03234       DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
03235       // Leave dbg.values that refer to an alloca alone. These
03236       // instrinsics describe the address of a variable (= the alloca)
03237       // being taken.  They should not be moved next to the alloca
03238       // (and to the beginning of the scope), but rather stay close to
03239       // where said address is used.
03240       if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) {
03241         PrevNonDbgInst = Insn;
03242         continue;
03243       }
03244 
03245       Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
03246       if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
03247         DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
03248         DVI->removeFromParent();
03249         if (isa<PHINode>(VI))
03250           DVI->insertBefore(VI->getParent()->getFirstInsertionPt());
03251         else
03252           DVI->insertAfter(VI);
03253         MadeChange = true;
03254         ++NumDbgValueMoved;
03255       }
03256     }
03257   }
03258   return MadeChange;
03259 }
03260 
03261 // If there is a sequence that branches based on comparing a single bit
03262 // against zero that can be combined into a single instruction, and the
03263 // target supports folding these into a single instruction, sink the
03264 // mask and compare into the branch uses. Do this before OptimizeBlock ->
03265 // OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being
03266 // searched for.
03267 bool CodeGenPrepare::sinkAndCmp(Function &F) {
03268   if (!EnableAndCmpSinking)
03269     return false;
03270   if (!TLI || !TLI->isMaskAndBranchFoldingLegal())
03271     return false;
03272   bool MadeChange = false;
03273   for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
03274     BasicBlock *BB = I++;
03275 
03276     // Does this BB end with the following?
03277     //   %andVal = and %val, #single-bit-set
03278     //   %icmpVal = icmp %andResult, 0
03279     //   br i1 %cmpVal label %dest1, label %dest2"
03280     BranchInst *Brcc = dyn_cast<BranchInst>(BB->getTerminator());
03281     if (!Brcc || !Brcc->isConditional())
03282       continue;
03283     ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0));
03284     if (!Cmp || Cmp->getParent() != BB)
03285       continue;
03286     ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1));
03287     if (!Zero || !Zero->isZero())
03288       continue;
03289     Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0));
03290     if (!And || And->getOpcode() != Instruction::And || And->getParent() != BB)
03291       continue;
03292     ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1));
03293     if (!Mask || !Mask->getUniqueInteger().isPowerOf2())
03294       continue;
03295     DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB->dump());
03296 
03297     // Push the "and; icmp" for any users that are conditional branches.
03298     // Since there can only be one branch use per BB, we don't need to keep
03299     // track of which BBs we insert into.
03300     for (Value::use_iterator UI = Cmp->use_begin(), E = Cmp->use_end();
03301          UI != E; ) {
03302       Use &TheUse = *UI;
03303       // Find brcc use.
03304       BranchInst *BrccUser = dyn_cast<BranchInst>(*UI);
03305       ++UI;
03306       if (!BrccUser || !BrccUser->isConditional())
03307         continue;
03308       BasicBlock *UserBB = BrccUser->getParent();
03309       if (UserBB == BB) continue;
03310       DEBUG(dbgs() << "found Brcc use\n");
03311 
03312       // Sink the "and; icmp" to use.
03313       MadeChange = true;
03314       BinaryOperator *NewAnd =
03315         BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "",
03316                                   BrccUser);
03317       CmpInst *NewCmp =
03318         CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero,
03319                         "", BrccUser);
03320       TheUse = NewCmp;
03321       ++NumAndCmpsMoved;
03322       DEBUG(BrccUser->getParent()->dump());
03323     }
03324   }
03325   return MadeChange;
03326 }