LLVM  mainline
CodeGenPrepare.cpp
Go to the documentation of this file.
00001 //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This pass munges the code in the input function to better prepare it for
00011 // SelectionDAG-based code generation. This works around limitations in it's
00012 // basic-block-at-a-time approach. It should eventually be removed.
00013 //
00014 //===----------------------------------------------------------------------===//
00015 
00016 #include "llvm/CodeGen/Passes.h"
00017 #include "llvm/ADT/DenseMap.h"
00018 #include "llvm/ADT/SmallSet.h"
00019 #include "llvm/ADT/Statistic.h"
00020 #include "llvm/Analysis/InstructionSimplify.h"
00021 #include "llvm/Analysis/TargetLibraryInfo.h"
00022 #include "llvm/Analysis/TargetTransformInfo.h"
00023 #include "llvm/Analysis/ValueTracking.h"
00024 #include "llvm/IR/CallSite.h"
00025 #include "llvm/IR/Constants.h"
00026 #include "llvm/IR/DataLayout.h"
00027 #include "llvm/IR/DerivedTypes.h"
00028 #include "llvm/IR/Dominators.h"
00029 #include "llvm/IR/Function.h"
00030 #include "llvm/IR/GetElementPtrTypeIterator.h"
00031 #include "llvm/IR/IRBuilder.h"
00032 #include "llvm/IR/InlineAsm.h"
00033 #include "llvm/IR/Instructions.h"
00034 #include "llvm/IR/IntrinsicInst.h"
00035 #include "llvm/IR/MDBuilder.h"
00036 #include "llvm/IR/PatternMatch.h"
00037 #include "llvm/IR/Statepoint.h"
00038 #include "llvm/IR/ValueHandle.h"
00039 #include "llvm/IR/ValueMap.h"
00040 #include "llvm/Pass.h"
00041 #include "llvm/Support/CommandLine.h"
00042 #include "llvm/Support/Debug.h"
00043 #include "llvm/Support/raw_ostream.h"
00044 #include "llvm/Target/TargetLowering.h"
00045 #include "llvm/Target/TargetSubtargetInfo.h"
00046 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
00047 #include "llvm/Transforms/Utils/BuildLibCalls.h"
00048 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
00049 #include "llvm/Transforms/Utils/Local.h"
00050 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
00051 using namespace llvm;
00052 using namespace llvm::PatternMatch;
00053 
00054 #define DEBUG_TYPE "codegenprepare"
00055 
00056 STATISTIC(NumBlocksElim, "Number of blocks eliminated");
00057 STATISTIC(NumPHIsElim,   "Number of trivial PHIs eliminated");
00058 STATISTIC(NumGEPsElim,   "Number of GEPs converted to casts");
00059 STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
00060                       "sunken Cmps");
00061 STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
00062                        "of sunken Casts");
00063 STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
00064                           "computations were sunk");
00065 STATISTIC(NumExtsMoved,  "Number of [s|z]ext instructions combined with loads");
00066 STATISTIC(NumExtUses,    "Number of uses of [s|z]ext instructions optimized");
00067 STATISTIC(NumAndsAdded,
00068           "Number of and mask instructions added to form ext loads");
00069 STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
00070 STATISTIC(NumRetsDup,    "Number of return instructions duplicated");
00071 STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
00072 STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
00073 STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches");
00074 STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
00075 
00076 static cl::opt<bool> DisableBranchOpts(
00077   "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
00078   cl::desc("Disable branch optimizations in CodeGenPrepare"));
00079 
00080 static cl::opt<bool>
00081     DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
00082                   cl::desc("Disable GC optimizations in CodeGenPrepare"));
00083 
00084 static cl::opt<bool> DisableSelectToBranch(
00085   "disable-cgp-select2branch", cl::Hidden, cl::init(false),
00086   cl::desc("Disable select to branch conversion."));
00087 
00088 static cl::opt<bool> AddrSinkUsingGEPs(
00089   "addr-sink-using-gep", cl::Hidden, cl::init(false),
00090   cl::desc("Address sinking in CGP using GEPs."));
00091 
00092 static cl::opt<bool> EnableAndCmpSinking(
00093    "enable-andcmp-sinking", cl::Hidden, cl::init(true),
00094    cl::desc("Enable sinkinig and/cmp into branches."));
00095 
00096 static cl::opt<bool> DisableStoreExtract(
00097     "disable-cgp-store-extract", cl::Hidden, cl::init(false),
00098     cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
00099 
00100 static cl::opt<bool> StressStoreExtract(
00101     "stress-cgp-store-extract", cl::Hidden, cl::init(false),
00102     cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
00103 
00104 static cl::opt<bool> DisableExtLdPromotion(
00105     "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
00106     cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
00107              "CodeGenPrepare"));
00108 
00109 static cl::opt<bool> StressExtLdPromotion(
00110     "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
00111     cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
00112              "optimization in CodeGenPrepare"));
00113 
00114 namespace {
00115 typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
00116 typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
00117 typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
00118 class TypePromotionTransaction;
00119 
00120   class CodeGenPrepare : public FunctionPass {
00121     const TargetMachine *TM;
00122     const TargetLowering *TLI;
00123     const TargetTransformInfo *TTI;
00124     const TargetLibraryInfo *TLInfo;
00125 
00126     /// As we scan instructions optimizing them, this is the next instruction
00127     /// to optimize. Transforms that can invalidate this should update it.
00128     BasicBlock::iterator CurInstIterator;
00129 
00130     /// Keeps track of non-local addresses that have been sunk into a block.
00131     /// This allows us to avoid inserting duplicate code for blocks with
00132     /// multiple load/stores of the same address.
00133     ValueMap<Value*, Value*> SunkAddrs;
00134 
00135     /// Keeps track of all instructions inserted for the current function.
00136     SetOfInstrs InsertedInsts;
00137     /// Keeps track of the type of the related instruction before their
00138     /// promotion for the current function.
00139     InstrToOrigTy PromotedInsts;
00140 
00141     /// True if CFG is modified in any way.
00142     bool ModifiedDT;
00143 
00144     /// True if optimizing for size.
00145     bool OptSize;
00146 
00147     /// DataLayout for the Function being processed.
00148     const DataLayout *DL;
00149 
00150   public:
00151     static char ID; // Pass identification, replacement for typeid
00152     explicit CodeGenPrepare(const TargetMachine *TM = nullptr)
00153         : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr), DL(nullptr) {
00154         initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
00155       }
00156     bool runOnFunction(Function &F) override;
00157 
00158     const char *getPassName() const override { return "CodeGen Prepare"; }
00159 
00160     void getAnalysisUsage(AnalysisUsage &AU) const override {
00161       AU.addPreserved<DominatorTreeWrapperPass>();
00162       AU.addRequired<TargetLibraryInfoWrapperPass>();
00163       AU.addRequired<TargetTransformInfoWrapperPass>();
00164     }
00165 
00166   private:
00167     bool eliminateFallThrough(Function &F);
00168     bool eliminateMostlyEmptyBlocks(Function &F);
00169     bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
00170     void eliminateMostlyEmptyBlock(BasicBlock *BB);
00171     bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT);
00172     bool optimizeInst(Instruction *I, bool& ModifiedDT);
00173     bool optimizeMemoryInst(Instruction *I, Value *Addr,
00174                             Type *AccessTy, unsigned AS);
00175     bool optimizeInlineAsmInst(CallInst *CS);
00176     bool optimizeCallInst(CallInst *CI, bool& ModifiedDT);
00177     bool moveExtToFormExtLoad(Instruction *&I);
00178     bool optimizeExtUses(Instruction *I);
00179     bool optimizeLoadExt(LoadInst *I);
00180     bool optimizeSelectInst(SelectInst *SI);
00181     bool optimizeShuffleVectorInst(ShuffleVectorInst *SI);
00182     bool optimizeSwitchInst(SwitchInst *CI);
00183     bool optimizeExtractElementInst(Instruction *Inst);
00184     bool dupRetToEnableTailCallOpts(BasicBlock *BB);
00185     bool placeDbgValues(Function &F);
00186     bool sinkAndCmp(Function &F);
00187     bool extLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
00188                         Instruction *&Inst,
00189                         const SmallVectorImpl<Instruction *> &Exts,
00190                         unsigned CreatedInstCost);
00191     bool splitBranchCondition(Function &F);
00192     bool simplifyOffsetableRelocate(Instruction &I);
00193     void stripInvariantGroupMetadata(Instruction &I);
00194   };
00195 }
00196 
00197 char CodeGenPrepare::ID = 0;
00198 INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare",
00199                    "Optimize for code generation", false, false)
00200 
00201 FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
00202   return new CodeGenPrepare(TM);
00203 }
00204 
00205 bool CodeGenPrepare::runOnFunction(Function &F) {
00206   if (skipOptnoneFunction(F))
00207     return false;
00208 
00209   DL = &F.getParent()->getDataLayout();
00210 
00211   bool EverMadeChange = false;
00212   // Clear per function information.
00213   InsertedInsts.clear();
00214   PromotedInsts.clear();
00215 
00216   ModifiedDT = false;
00217   if (TM)
00218     TLI = TM->getSubtargetImpl(F)->getTargetLowering();
00219   TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
00220   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
00221   OptSize = F.optForSize();
00222 
00223   /// This optimization identifies DIV instructions that can be
00224   /// profitably bypassed and carried out with a shorter, faster divide.
00225   if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
00226     const DenseMap<unsigned int, unsigned int> &BypassWidths =
00227        TLI->getBypassSlowDivWidths();
00228     BasicBlock* BB = &*F.begin();
00229     while (BB != nullptr) {
00230       // bypassSlowDivision may create new BBs, but we don't want to reapply the
00231       // optimization to those blocks.
00232       BasicBlock* Next = BB->getNextNode();
00233       EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
00234       BB = Next;
00235     }
00236   }
00237 
00238   // Eliminate blocks that contain only PHI nodes and an
00239   // unconditional branch.
00240   EverMadeChange |= eliminateMostlyEmptyBlocks(F);
00241 
00242   // llvm.dbg.value is far away from the value then iSel may not be able
00243   // handle it properly. iSel will drop llvm.dbg.value if it can not
00244   // find a node corresponding to the value.
00245   EverMadeChange |= placeDbgValues(F);
00246 
00247   // If there is a mask, compare against zero, and branch that can be combined
00248   // into a single target instruction, push the mask and compare into branch
00249   // users. Do this before OptimizeBlock -> OptimizeInst ->
00250   // OptimizeCmpExpression, which perturbs the pattern being searched for.
00251   if (!DisableBranchOpts) {
00252     EverMadeChange |= sinkAndCmp(F);
00253     EverMadeChange |= splitBranchCondition(F);
00254   }
00255 
00256   bool MadeChange = true;
00257   while (MadeChange) {
00258     MadeChange = false;
00259     for (Function::iterator I = F.begin(); I != F.end(); ) {
00260       BasicBlock *BB = &*I++;
00261       bool ModifiedDTOnIteration = false;
00262       MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
00263 
00264       // Restart BB iteration if the dominator tree of the Function was changed
00265       if (ModifiedDTOnIteration)
00266         break;
00267     }
00268     EverMadeChange |= MadeChange;
00269   }
00270 
00271   SunkAddrs.clear();
00272 
00273   if (!DisableBranchOpts) {
00274     MadeChange = false;
00275     SmallPtrSet<BasicBlock*, 8> WorkList;
00276     for (BasicBlock &BB : F) {
00277       SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));
00278       MadeChange |= ConstantFoldTerminator(&BB, true);
00279       if (!MadeChange) continue;
00280 
00281       for (SmallVectorImpl<BasicBlock*>::iterator
00282              II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
00283         if (pred_begin(*II) == pred_end(*II))
00284           WorkList.insert(*II);
00285     }
00286 
00287     // Delete the dead blocks and any of their dead successors.
00288     MadeChange |= !WorkList.empty();
00289     while (!WorkList.empty()) {
00290       BasicBlock *BB = *WorkList.begin();
00291       WorkList.erase(BB);
00292       SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
00293 
00294       DeleteDeadBlock(BB);
00295 
00296       for (SmallVectorImpl<BasicBlock*>::iterator
00297              II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
00298         if (pred_begin(*II) == pred_end(*II))
00299           WorkList.insert(*II);
00300     }
00301 
00302     // Merge pairs of basic blocks with unconditional branches, connected by
00303     // a single edge.
00304     if (EverMadeChange || MadeChange)
00305       MadeChange |= eliminateFallThrough(F);
00306 
00307     EverMadeChange |= MadeChange;
00308   }
00309 
00310   if (!DisableGCOpts) {
00311     SmallVector<Instruction *, 2> Statepoints;
00312     for (BasicBlock &BB : F)
00313       for (Instruction &I : BB)
00314         if (isStatepoint(I))
00315           Statepoints.push_back(&I);
00316     for (auto &I : Statepoints)
00317       EverMadeChange |= simplifyOffsetableRelocate(*I);
00318   }
00319 
00320   return EverMadeChange;
00321 }
00322 
00323 /// Merge basic blocks which are connected by a single edge, where one of the
00324 /// basic blocks has a single successor pointing to the other basic block,
00325 /// which has a single predecessor.
00326 bool CodeGenPrepare::eliminateFallThrough(Function &F) {
00327   bool Changed = false;
00328   // Scan all of the blocks in the function, except for the entry block.
00329   for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
00330     BasicBlock *BB = &*I++;
00331     // If the destination block has a single pred, then this is a trivial
00332     // edge, just collapse it.
00333     BasicBlock *SinglePred = BB->getSinglePredecessor();
00334 
00335     // Don't merge if BB's address is taken.
00336     if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
00337 
00338     BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
00339     if (Term && !Term->isConditional()) {
00340       Changed = true;
00341       DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n");
00342       // Remember if SinglePred was the entry block of the function.
00343       // If so, we will need to move BB back to the entry position.
00344       bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
00345       MergeBasicBlockIntoOnlyPred(BB, nullptr);
00346 
00347       if (isEntry && BB != &BB->getParent()->getEntryBlock())
00348         BB->moveBefore(&BB->getParent()->getEntryBlock());
00349 
00350       // We have erased a block. Update the iterator.
00351       I = BB->getIterator();
00352     }
00353   }
00354   return Changed;
00355 }
00356 
00357 /// Eliminate blocks that contain only PHI nodes, debug info directives, and an
00358 /// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
00359 /// edges in ways that are non-optimal for isel. Start by eliminating these
00360 /// blocks so we can split them the way we want them.
00361 bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
00362   bool MadeChange = false;
00363   // Note that this intentionally skips the entry block.
00364   for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
00365     BasicBlock *BB = &*I++;
00366 
00367     // If this block doesn't end with an uncond branch, ignore it.
00368     BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
00369     if (!BI || !BI->isUnconditional())
00370       continue;
00371 
00372     // If the instruction before the branch (skipping debug info) isn't a phi
00373     // node, then other stuff is happening here.
00374     BasicBlock::iterator BBI = BI->getIterator();
00375     if (BBI != BB->begin()) {
00376       --BBI;
00377       while (isa<DbgInfoIntrinsic>(BBI)) {
00378         if (BBI == BB->begin())
00379           break;
00380         --BBI;
00381       }
00382       if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
00383         continue;
00384     }
00385 
00386     // Do not break infinite loops.
00387     BasicBlock *DestBB = BI->getSuccessor(0);
00388     if (DestBB == BB)
00389       continue;
00390 
00391     if (!canMergeBlocks(BB, DestBB))
00392       continue;
00393 
00394     eliminateMostlyEmptyBlock(BB);
00395     MadeChange = true;
00396   }
00397   return MadeChange;
00398 }
00399 
00400 /// Return true if we can merge BB into DestBB if there is a single
00401 /// unconditional branch between them, and BB contains no other non-phi
00402 /// instructions.
00403 bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
00404                                     const BasicBlock *DestBB) const {
00405   // We only want to eliminate blocks whose phi nodes are used by phi nodes in
00406   // the successor.  If there are more complex condition (e.g. preheaders),
00407   // don't mess around with them.
00408   BasicBlock::const_iterator BBI = BB->begin();
00409   while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
00410     for (const User *U : PN->users()) {
00411       const Instruction *UI = cast<Instruction>(U);
00412       if (UI->getParent() != DestBB || !isa<PHINode>(UI))
00413         return false;
00414       // If User is inside DestBB block and it is a PHINode then check
00415       // incoming value. If incoming value is not from BB then this is
00416       // a complex condition (e.g. preheaders) we want to avoid here.
00417       if (UI->getParent() == DestBB) {
00418         if (const PHINode *UPN = dyn_cast<PHINode>(UI))
00419           for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
00420             Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
00421             if (Insn && Insn->getParent() == BB &&
00422                 Insn->getParent() != UPN->getIncomingBlock(I))
00423               return false;
00424           }
00425       }
00426     }
00427   }
00428 
00429   // If BB and DestBB contain any common predecessors, then the phi nodes in BB
00430   // and DestBB may have conflicting incoming values for the block.  If so, we
00431   // can't merge the block.
00432   const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
00433   if (!DestBBPN) return true;  // no conflict.
00434 
00435   // Collect the preds of BB.
00436   SmallPtrSet<const BasicBlock*, 16> BBPreds;
00437   if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
00438     // It is faster to get preds from a PHI than with pred_iterator.
00439     for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
00440       BBPreds.insert(BBPN->getIncomingBlock(i));
00441   } else {
00442     BBPreds.insert(pred_begin(BB), pred_end(BB));
00443   }
00444 
00445   // Walk the preds of DestBB.
00446   for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
00447     BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
00448     if (BBPreds.count(Pred)) {   // Common predecessor?
00449       BBI = DestBB->begin();
00450       while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
00451         const Value *V1 = PN->getIncomingValueForBlock(Pred);
00452         const Value *V2 = PN->getIncomingValueForBlock(BB);
00453 
00454         // If V2 is a phi node in BB, look up what the mapped value will be.
00455         if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
00456           if (V2PN->getParent() == BB)
00457             V2 = V2PN->getIncomingValueForBlock(Pred);
00458 
00459         // If there is a conflict, bail out.
00460         if (V1 != V2) return false;
00461       }
00462     }
00463   }
00464 
00465   return true;
00466 }
00467 
00468 
00469 /// Eliminate a basic block that has only phi's and an unconditional branch in
00470 /// it.
00471 void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
00472   BranchInst *BI = cast<BranchInst>(BB->getTerminator());
00473   BasicBlock *DestBB = BI->getSuccessor(0);
00474 
00475   DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
00476 
00477   // If the destination block has a single pred, then this is a trivial edge,
00478   // just collapse it.
00479   if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
00480     if (SinglePred != DestBB) {
00481       // Remember if SinglePred was the entry block of the function.  If so, we
00482       // will need to move BB back to the entry position.
00483       bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
00484       MergeBasicBlockIntoOnlyPred(DestBB, nullptr);
00485 
00486       if (isEntry && BB != &BB->getParent()->getEntryBlock())
00487         BB->moveBefore(&BB->getParent()->getEntryBlock());
00488 
00489       DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
00490       return;
00491     }
00492   }
00493 
00494   // Otherwise, we have multiple predecessors of BB.  Update the PHIs in DestBB
00495   // to handle the new incoming edges it is about to have.
00496   PHINode *PN;
00497   for (BasicBlock::iterator BBI = DestBB->begin();
00498        (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
00499     // Remove the incoming value for BB, and remember it.
00500     Value *InVal = PN->removeIncomingValue(BB, false);
00501 
00502     // Two options: either the InVal is a phi node defined in BB or it is some
00503     // value that dominates BB.
00504     PHINode *InValPhi = dyn_cast<PHINode>(InVal);
00505     if (InValPhi && InValPhi->getParent() == BB) {
00506       // Add all of the input values of the input PHI as inputs of this phi.
00507       for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
00508         PN->addIncoming(InValPhi->getIncomingValue(i),
00509                         InValPhi->getIncomingBlock(i));
00510     } else {
00511       // Otherwise, add one instance of the dominating value for each edge that
00512       // we will be adding.
00513       if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
00514         for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
00515           PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
00516       } else {
00517         for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
00518           PN->addIncoming(InVal, *PI);
00519       }
00520     }
00521   }
00522 
00523   // The PHIs are now updated, change everything that refers to BB to use
00524   // DestBB and remove BB.
00525   BB->replaceAllUsesWith(DestBB);
00526   BB->eraseFromParent();
00527   ++NumBlocksElim;
00528 
00529   DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
00530 }
00531 
00532 // Computes a map of base pointer relocation instructions to corresponding
00533 // derived pointer relocation instructions given a vector of all relocate calls
00534 static void computeBaseDerivedRelocateMap(
00535     const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
00536     DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>>
00537         &RelocateInstMap) {
00538   // Collect information in two maps: one primarily for locating the base object
00539   // while filling the second map; the second map is the final structure holding
00540   // a mapping between Base and corresponding Derived relocate calls
00541   DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap;
00542   for (auto *ThisRelocate : AllRelocateCalls) {
00543     auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
00544                             ThisRelocate->getDerivedPtrIndex());
00545     RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
00546   }
00547   for (auto &Item : RelocateIdxMap) {
00548     std::pair<unsigned, unsigned> Key = Item.first;
00549     if (Key.first == Key.second)
00550       // Base relocation: nothing to insert
00551       continue;
00552 
00553     GCRelocateInst *I = Item.second;
00554     auto BaseKey = std::make_pair(Key.first, Key.first);
00555 
00556     // We're iterating over RelocateIdxMap so we cannot modify it.
00557     auto MaybeBase = RelocateIdxMap.find(BaseKey);
00558     if (MaybeBase == RelocateIdxMap.end())
00559       // TODO: We might want to insert a new base object relocate and gep off
00560       // that, if there are enough derived object relocates.
00561       continue;
00562 
00563     RelocateInstMap[MaybeBase->second].push_back(I);
00564   }
00565 }
00566 
00567 // Accepts a GEP and extracts the operands into a vector provided they're all
00568 // small integer constants
00569 static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
00570                                           SmallVectorImpl<Value *> &OffsetV) {
00571   for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
00572     // Only accept small constant integer operands
00573     auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
00574     if (!Op || Op->getZExtValue() > 20)
00575       return false;
00576   }
00577 
00578   for (unsigned i = 1; i < GEP->getNumOperands(); i++)
00579     OffsetV.push_back(GEP->getOperand(i));
00580   return true;
00581 }
00582 
00583 // Takes a RelocatedBase (base pointer relocation instruction) and Targets to
00584 // replace, computes a replacement, and affects it.
00585 static bool
00586 simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
00587                           const SmallVectorImpl<GCRelocateInst *> &Targets) {
00588   bool MadeChange = false;
00589   for (GCRelocateInst *ToReplace : Targets) {
00590     assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
00591            "Not relocating a derived object of the original base object");
00592     if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
00593       // A duplicate relocate call. TODO: coalesce duplicates.
00594       continue;
00595     }
00596 
00597     if (RelocatedBase->getParent() != ToReplace->getParent()) {
00598       // Base and derived relocates are in different basic blocks.
00599       // In this case transform is only valid when base dominates derived
00600       // relocate. However it would be too expensive to check dominance
00601       // for each such relocate, so we skip the whole transformation.
00602       continue;
00603     }
00604 
00605     Value *Base = ToReplace->getBasePtr();
00606     auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
00607     if (!Derived || Derived->getPointerOperand() != Base)
00608       continue;
00609 
00610     SmallVector<Value *, 2> OffsetV;
00611     if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
00612       continue;
00613 
00614     // Create a Builder and replace the target callsite with a gep
00615     assert(RelocatedBase->getNextNode() &&
00616            "Should always have one since it's not a terminator");
00617 
00618     // Insert after RelocatedBase
00619     IRBuilder<> Builder(RelocatedBase->getNextNode());
00620     Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
00621 
00622     // If gc_relocate does not match the actual type, cast it to the right type.
00623     // In theory, there must be a bitcast after gc_relocate if the type does not
00624     // match, and we should reuse it to get the derived pointer. But it could be
00625     // cases like this:
00626     // bb1:
00627     //  ...
00628     //  %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
00629     //  br label %merge
00630     //
00631     // bb2:
00632     //  ...
00633     //  %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
00634     //  br label %merge
00635     //
00636     // merge:
00637     //  %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
00638     //  %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
00639     //
00640     // In this case, we can not find the bitcast any more. So we insert a new bitcast
00641     // no matter there is already one or not. In this way, we can handle all cases, and
00642     // the extra bitcast should be optimized away in later passes.
00643     Value *ActualRelocatedBase = RelocatedBase;
00644     if (RelocatedBase->getType() != Base->getType()) {
00645       ActualRelocatedBase =
00646           Builder.CreateBitCast(RelocatedBase, Base->getType());
00647     }
00648     Value *Replacement = Builder.CreateGEP(
00649         Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
00650     Replacement->takeName(ToReplace);
00651     // If the newly generated derived pointer's type does not match the original derived
00652     // pointer's type, cast the new derived pointer to match it. Same reasoning as above.
00653     Value *ActualReplacement = Replacement;
00654     if (Replacement->getType() != ToReplace->getType()) {
00655       ActualReplacement =
00656           Builder.CreateBitCast(Replacement, ToReplace->getType());
00657     }
00658     ToReplace->replaceAllUsesWith(ActualReplacement);
00659     ToReplace->eraseFromParent();
00660 
00661     MadeChange = true;
00662   }
00663   return MadeChange;
00664 }
00665 
00666 // Turns this:
00667 //
00668 // %base = ...
00669 // %ptr = gep %base + 15
00670 // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
00671 // %base' = relocate(%tok, i32 4, i32 4)
00672 // %ptr' = relocate(%tok, i32 4, i32 5)
00673 // %val = load %ptr'
00674 //
00675 // into this:
00676 //
00677 // %base = ...
00678 // %ptr = gep %base + 15
00679 // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
00680 // %base' = gc.relocate(%tok, i32 4, i32 4)
00681 // %ptr' = gep %base' + 15
00682 // %val = load %ptr'
00683 bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
00684   bool MadeChange = false;
00685   SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
00686 
00687   for (auto *U : I.users())
00688     if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
00689       // Collect all the relocate calls associated with a statepoint
00690       AllRelocateCalls.push_back(Relocate);
00691 
00692   // We need atleast one base pointer relocation + one derived pointer
00693   // relocation to mangle
00694   if (AllRelocateCalls.size() < 2)
00695     return false;
00696 
00697   // RelocateInstMap is a mapping from the base relocate instruction to the
00698   // corresponding derived relocate instructions
00699   DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap;
00700   computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
00701   if (RelocateInstMap.empty())
00702     return false;
00703 
00704   for (auto &Item : RelocateInstMap)
00705     // Item.first is the RelocatedBase to offset against
00706     // Item.second is the vector of Targets to replace
00707     MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
00708   return MadeChange;
00709 }
00710 
00711 /// SinkCast - Sink the specified cast instruction into its user blocks
00712 static bool SinkCast(CastInst *CI) {
00713   BasicBlock *DefBB = CI->getParent();
00714 
00715   /// InsertedCasts - Only insert a cast in each block once.
00716   DenseMap<BasicBlock*, CastInst*> InsertedCasts;
00717 
00718   bool MadeChange = false;
00719   for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
00720        UI != E; ) {
00721     Use &TheUse = UI.getUse();
00722     Instruction *User = cast<Instruction>(*UI);
00723 
00724     // Figure out which BB this cast is used in.  For PHI's this is the
00725     // appropriate predecessor block.
00726     BasicBlock *UserBB = User->getParent();
00727     if (PHINode *PN = dyn_cast<PHINode>(User)) {
00728       UserBB = PN->getIncomingBlock(TheUse);
00729     }
00730 
00731     // Preincrement use iterator so we don't invalidate it.
00732     ++UI;
00733 
00734     // If the block selected to receive the cast is an EH pad that does not
00735     // allow non-PHI instructions before the terminator, we can't sink the
00736     // cast.
00737     if (UserBB->getTerminator()->isEHPad())
00738       continue;
00739 
00740     // If this user is in the same block as the cast, don't change the cast.
00741     if (UserBB == DefBB) continue;
00742 
00743     // If we have already inserted a cast into this block, use it.
00744     CastInst *&InsertedCast = InsertedCasts[UserBB];
00745 
00746     if (!InsertedCast) {
00747       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
00748       assert(InsertPt != UserBB->end());
00749       InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
00750                                       CI->getType(), "", &*InsertPt);
00751     }
00752 
00753     // Replace a use of the cast with a use of the new cast.
00754     TheUse = InsertedCast;
00755     MadeChange = true;
00756     ++NumCastUses;
00757   }
00758 
00759   // If we removed all uses, nuke the cast.
00760   if (CI->use_empty()) {
00761     CI->eraseFromParent();
00762     MadeChange = true;
00763   }
00764 
00765   return MadeChange;
00766 }
00767 
00768 /// If the specified cast instruction is a noop copy (e.g. it's casting from
00769 /// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
00770 /// reduce the number of virtual registers that must be created and coalesced.
00771 ///
00772 /// Return true if any changes are made.
00773 ///
00774 static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
00775                                        const DataLayout &DL) {
00776   // If this is a noop copy,
00777   EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
00778   EVT DstVT = TLI.getValueType(DL, CI->getType());
00779 
00780   // This is an fp<->int conversion?
00781   if (SrcVT.isInteger() != DstVT.isInteger())
00782     return false;
00783 
00784   // If this is an extension, it will be a zero or sign extension, which
00785   // isn't a noop.
00786   if (SrcVT.bitsLT(DstVT)) return false;
00787 
00788   // If these values will be promoted, find out what they will be promoted
00789   // to.  This helps us consider truncates on PPC as noop copies when they
00790   // are.
00791   if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
00792       TargetLowering::TypePromoteInteger)
00793     SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
00794   if (TLI.getTypeAction(CI->getContext(), DstVT) ==
00795       TargetLowering::TypePromoteInteger)
00796     DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
00797 
00798   // If, after promotion, these are the same types, this is a noop copy.
00799   if (SrcVT != DstVT)
00800     return false;
00801 
00802   return SinkCast(CI);
00803 }
00804 
00805 /// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if
00806 /// possible.
00807 ///
00808 /// Return true if any changes were made.
00809 static bool CombineUAddWithOverflow(CmpInst *CI) {
00810   Value *A, *B;
00811   Instruction *AddI;
00812   if (!match(CI,
00813              m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
00814     return false;
00815 
00816   Type *Ty = AddI->getType();
00817   if (!isa<IntegerType>(Ty))
00818     return false;
00819 
00820   // We don't want to move around uses of condition values this late, so we we
00821   // check if it is legal to create the call to the intrinsic in the basic
00822   // block containing the icmp:
00823 
00824   if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
00825     return false;
00826 
00827 #ifndef NDEBUG
00828   // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
00829   // for now:
00830   if (AddI->hasOneUse())
00831     assert(*AddI->user_begin() == CI && "expected!");
00832 #endif
00833 
00834   Module *M = CI->getModule();
00835   Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
00836 
00837   auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
00838 
00839   auto *UAddWithOverflow =
00840       CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
00841   auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
00842   auto *Overflow =
00843       ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
00844 
00845   CI->replaceAllUsesWith(Overflow);
00846   AddI->replaceAllUsesWith(UAdd);
00847   CI->eraseFromParent();
00848   AddI->eraseFromParent();
00849   return true;
00850 }
00851 
00852 /// Sink the given CmpInst into user blocks to reduce the number of virtual
00853 /// registers that must be created and coalesced. This is a clear win except on
00854 /// targets with multiple condition code registers (PowerPC), where it might
00855 /// lose; some adjustment may be wanted there.
00856 ///
00857 /// Return true if any changes are made.
00858 static bool SinkCmpExpression(CmpInst *CI) {
00859   BasicBlock *DefBB = CI->getParent();
00860 
00861   /// Only insert a cmp in each block once.
00862   DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
00863 
00864   bool MadeChange = false;
00865   for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
00866        UI != E; ) {
00867     Use &TheUse = UI.getUse();
00868     Instruction *User = cast<Instruction>(*UI);
00869 
00870     // Preincrement use iterator so we don't invalidate it.
00871     ++UI;
00872 
00873     // Don't bother for PHI nodes.
00874     if (isa<PHINode>(User))
00875       continue;
00876 
00877     // Figure out which BB this cmp is used in.
00878     BasicBlock *UserBB = User->getParent();
00879 
00880     // If this user is in the same block as the cmp, don't change the cmp.
00881     if (UserBB == DefBB) continue;
00882 
00883     // If we have already inserted a cmp into this block, use it.
00884     CmpInst *&InsertedCmp = InsertedCmps[UserBB];
00885 
00886     if (!InsertedCmp) {
00887       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
00888       assert(InsertPt != UserBB->end());
00889       InsertedCmp =
00890           CmpInst::Create(CI->getOpcode(), CI->getPredicate(),
00891                           CI->getOperand(0), CI->getOperand(1), "", &*InsertPt);
00892     }
00893 
00894     // Replace a use of the cmp with a use of the new cmp.
00895     TheUse = InsertedCmp;
00896     MadeChange = true;
00897     ++NumCmpUses;
00898   }
00899 
00900   // If we removed all uses, nuke the cmp.
00901   if (CI->use_empty()) {
00902     CI->eraseFromParent();
00903     MadeChange = true;
00904   }
00905 
00906   return MadeChange;
00907 }
00908 
00909 static bool OptimizeCmpExpression(CmpInst *CI) {
00910   if (SinkCmpExpression(CI))
00911     return true;
00912 
00913   if (CombineUAddWithOverflow(CI))
00914     return true;
00915 
00916   return false;
00917 }
00918 
00919 /// Check if the candidates could be combined with a shift instruction, which
00920 /// includes:
00921 /// 1. Truncate instruction
00922 /// 2. And instruction and the imm is a mask of the low bits:
00923 /// imm & (imm+1) == 0
00924 static bool isExtractBitsCandidateUse(Instruction *User) {
00925   if (!isa<TruncInst>(User)) {
00926     if (User->getOpcode() != Instruction::And ||
00927         !isa<ConstantInt>(User->getOperand(1)))
00928       return false;
00929 
00930     const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
00931 
00932     if ((Cimm & (Cimm + 1)).getBoolValue())
00933       return false;
00934   }
00935   return true;
00936 }
00937 
00938 /// Sink both shift and truncate instruction to the use of truncate's BB.
00939 static bool
00940 SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
00941                      DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
00942                      const TargetLowering &TLI, const DataLayout &DL) {
00943   BasicBlock *UserBB = User->getParent();
00944   DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
00945   TruncInst *TruncI = dyn_cast<TruncInst>(User);
00946   bool MadeChange = false;
00947 
00948   for (Value::user_iterator TruncUI = TruncI->user_begin(),
00949                             TruncE = TruncI->user_end();
00950        TruncUI != TruncE;) {
00951 
00952     Use &TruncTheUse = TruncUI.getUse();
00953     Instruction *TruncUser = cast<Instruction>(*TruncUI);
00954     // Preincrement use iterator so we don't invalidate it.
00955 
00956     ++TruncUI;
00957 
00958     int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
00959     if (!ISDOpcode)
00960       continue;
00961 
00962     // If the use is actually a legal node, there will not be an
00963     // implicit truncate.
00964     // FIXME: always querying the result type is just an
00965     // approximation; some nodes' legality is determined by the
00966     // operand or other means. There's no good way to find out though.
00967     if (TLI.isOperationLegalOrCustom(
00968             ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
00969       continue;
00970 
00971     // Don't bother for PHI nodes.
00972     if (isa<PHINode>(TruncUser))
00973       continue;
00974 
00975     BasicBlock *TruncUserBB = TruncUser->getParent();
00976 
00977     if (UserBB == TruncUserBB)
00978       continue;
00979 
00980     BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
00981     CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
00982 
00983     if (!InsertedShift && !InsertedTrunc) {
00984       BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
00985       assert(InsertPt != TruncUserBB->end());
00986       // Sink the shift
00987       if (ShiftI->getOpcode() == Instruction::AShr)
00988         InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
00989                                                    "", &*InsertPt);
00990       else
00991         InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
00992                                                    "", &*InsertPt);
00993 
00994       // Sink the trunc
00995       BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
00996       TruncInsertPt++;
00997       assert(TruncInsertPt != TruncUserBB->end());
00998 
00999       InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
01000                                        TruncI->getType(), "", &*TruncInsertPt);
01001 
01002       MadeChange = true;
01003 
01004       TruncTheUse = InsertedTrunc;
01005     }
01006   }
01007   return MadeChange;
01008 }
01009 
01010 /// Sink the shift *right* instruction into user blocks if the uses could
01011 /// potentially be combined with this shift instruction and generate BitExtract
01012 /// instruction. It will only be applied if the architecture supports BitExtract
01013 /// instruction. Here is an example:
01014 /// BB1:
01015 ///   %x.extract.shift = lshr i64 %arg1, 32
01016 /// BB2:
01017 ///   %x.extract.trunc = trunc i64 %x.extract.shift to i16
01018 /// ==>
01019 ///
01020 /// BB2:
01021 ///   %x.extract.shift.1 = lshr i64 %arg1, 32
01022 ///   %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
01023 ///
01024 /// CodeGen will recoginze the pattern in BB2 and generate BitExtract
01025 /// instruction.
01026 /// Return true if any changes are made.
01027 static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
01028                                 const TargetLowering &TLI,
01029                                 const DataLayout &DL) {
01030   BasicBlock *DefBB = ShiftI->getParent();
01031 
01032   /// Only insert instructions in each block once.
01033   DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
01034 
01035   bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
01036 
01037   bool MadeChange = false;
01038   for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
01039        UI != E;) {
01040     Use &TheUse = UI.getUse();
01041     Instruction *User = cast<Instruction>(*UI);
01042     // Preincrement use iterator so we don't invalidate it.
01043     ++UI;
01044 
01045     // Don't bother for PHI nodes.
01046     if (isa<PHINode>(User))
01047       continue;
01048 
01049     if (!isExtractBitsCandidateUse(User))
01050       continue;
01051 
01052     BasicBlock *UserBB = User->getParent();
01053 
01054     if (UserBB == DefBB) {
01055       // If the shift and truncate instruction are in the same BB. The use of
01056       // the truncate(TruncUse) may still introduce another truncate if not
01057       // legal. In this case, we would like to sink both shift and truncate
01058       // instruction to the BB of TruncUse.
01059       // for example:
01060       // BB1:
01061       // i64 shift.result = lshr i64 opnd, imm
01062       // trunc.result = trunc shift.result to i16
01063       //
01064       // BB2:
01065       //   ----> We will have an implicit truncate here if the architecture does
01066       //   not have i16 compare.
01067       // cmp i16 trunc.result, opnd2
01068       //
01069       if (isa<TruncInst>(User) && shiftIsLegal
01070           // If the type of the truncate is legal, no trucate will be
01071           // introduced in other basic blocks.
01072           &&
01073           (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
01074         MadeChange =
01075             SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
01076 
01077       continue;
01078     }
01079     // If we have already inserted a shift into this block, use it.
01080     BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
01081 
01082     if (!InsertedShift) {
01083       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
01084       assert(InsertPt != UserBB->end());
01085 
01086       if (ShiftI->getOpcode() == Instruction::AShr)
01087         InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
01088                                                    "", &*InsertPt);
01089       else
01090         InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
01091                                                    "", &*InsertPt);
01092 
01093       MadeChange = true;
01094     }
01095 
01096     // Replace a use of the shift with a use of the new shift.
01097     TheUse = InsertedShift;
01098   }
01099 
01100   // If we removed all uses, nuke the shift.
01101   if (ShiftI->use_empty())
01102     ShiftI->eraseFromParent();
01103 
01104   return MadeChange;
01105 }
01106 
01107 // Translate a masked load intrinsic like
01108 // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
01109 //                               <16 x i1> %mask, <16 x i32> %passthru)
01110 // to a chain of basic blocks, with loading element one-by-one if
01111 // the appropriate mask bit is set
01112 //
01113 //  %1 = bitcast i8* %addr to i32*
01114 //  %2 = extractelement <16 x i1> %mask, i32 0
01115 //  %3 = icmp eq i1 %2, true
01116 //  br i1 %3, label %cond.load, label %else
01117 //
01118 //cond.load:                                        ; preds = %0
01119 //  %4 = getelementptr i32* %1, i32 0
01120 //  %5 = load i32* %4
01121 //  %6 = insertelement <16 x i32> undef, i32 %5, i32 0
01122 //  br label %else
01123 //
01124 //else:                                             ; preds = %0, %cond.load
01125 //  %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ]
01126 //  %7 = extractelement <16 x i1> %mask, i32 1
01127 //  %8 = icmp eq i1 %7, true
01128 //  br i1 %8, label %cond.load1, label %else2
01129 //
01130 //cond.load1:                                       ; preds = %else
01131 //  %9 = getelementptr i32* %1, i32 1
01132 //  %10 = load i32* %9
01133 //  %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1
01134 //  br label %else2
01135 //
01136 //else2:                                            ; preds = %else, %cond.load1
01137 //  %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
01138 //  %12 = extractelement <16 x i1> %mask, i32 2
01139 //  %13 = icmp eq i1 %12, true
01140 //  br i1 %13, label %cond.load4, label %else5
01141 //
01142 static void scalarizeMaskedLoad(CallInst *CI) {
01143   Value *Ptr  = CI->getArgOperand(0);
01144   Value *Alignment = CI->getArgOperand(1);
01145   Value *Mask = CI->getArgOperand(2);
01146   Value *Src0 = CI->getArgOperand(3);
01147 
01148   unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
01149   VectorType *VecType = dyn_cast<VectorType>(CI->getType());
01150   assert(VecType && "Unexpected return type of masked load intrinsic");
01151 
01152   Type *EltTy = CI->getType()->getVectorElementType();
01153 
01154   IRBuilder<> Builder(CI->getContext());
01155   Instruction *InsertPt = CI;
01156   BasicBlock *IfBlock = CI->getParent();
01157   BasicBlock *CondBlock = nullptr;
01158   BasicBlock *PrevIfBlock = CI->getParent();
01159 
01160   Builder.SetInsertPoint(InsertPt);
01161   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
01162 
01163   // Short-cut if the mask is all-true.
01164   bool IsAllOnesMask = isa<Constant>(Mask) &&
01165     cast<Constant>(Mask)->isAllOnesValue();
01166 
01167   if (IsAllOnesMask) {
01168     Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal);
01169     CI->replaceAllUsesWith(NewI);
01170     CI->eraseFromParent();
01171     return;
01172   }
01173 
01174   // Adjust alignment for the scalar instruction.
01175   AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits()/8);
01176   // Bitcast %addr fron i8* to EltTy*
01177   Type *NewPtrType =
01178     EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
01179   Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
01180   unsigned VectorWidth = VecType->getNumElements();
01181 
01182   Value *UndefVal = UndefValue::get(VecType);
01183 
01184   // The result vector
01185   Value *VResult = UndefVal;
01186 
01187   if (isa<ConstantVector>(Mask)) {
01188     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
01189       if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
01190           continue;
01191       Value *Gep =
01192           Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
01193       LoadInst* Load = Builder.CreateAlignedLoad(Gep, AlignVal);
01194       VResult = Builder.CreateInsertElement(VResult, Load,
01195                                             Builder.getInt32(Idx));
01196     }
01197     Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
01198     CI->replaceAllUsesWith(NewI);
01199     CI->eraseFromParent();
01200     return;
01201   }
01202 
01203   PHINode *Phi = nullptr;
01204   Value *PrevPhi = UndefVal;
01205 
01206   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
01207 
01208     // Fill the "else" block, created in the previous iteration
01209     //
01210     //  %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
01211     //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
01212     //  %to_load = icmp eq i1 %mask_1, true
01213     //  br i1 %to_load, label %cond.load, label %else
01214     //
01215     if (Idx > 0) {
01216       Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
01217       Phi->addIncoming(VResult, CondBlock);
01218       Phi->addIncoming(PrevPhi, PrevIfBlock);
01219       PrevPhi = Phi;
01220       VResult = Phi;
01221     }
01222 
01223     Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
01224     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
01225                                     ConstantInt::get(Predicate->getType(), 1));
01226 
01227     // Create "cond" block
01228     //
01229     //  %EltAddr = getelementptr i32* %1, i32 0
01230     //  %Elt = load i32* %EltAddr
01231     //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
01232     //
01233     CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load");
01234     Builder.SetInsertPoint(InsertPt);
01235 
01236     Value *Gep =
01237         Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
01238     LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
01239     VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
01240 
01241     // Create "else" block, fill it in the next iteration
01242     BasicBlock *NewIfBlock =
01243         CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
01244     Builder.SetInsertPoint(InsertPt);
01245     Instruction *OldBr = IfBlock->getTerminator();
01246     BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
01247     OldBr->eraseFromParent();
01248     PrevIfBlock = IfBlock;
01249     IfBlock = NewIfBlock;
01250   }
01251 
01252   Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
01253   Phi->addIncoming(VResult, CondBlock);
01254   Phi->addIncoming(PrevPhi, PrevIfBlock);
01255   Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
01256   CI->replaceAllUsesWith(NewI);
01257   CI->eraseFromParent();
01258 }
01259 
01260 // Translate a masked store intrinsic, like
01261 // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
01262 //                               <16 x i1> %mask)
01263 // to a chain of basic blocks, that stores element one-by-one if
01264 // the appropriate mask bit is set
01265 //
01266 //   %1 = bitcast i8* %addr to i32*
01267 //   %2 = extractelement <16 x i1> %mask, i32 0
01268 //   %3 = icmp eq i1 %2, true
01269 //   br i1 %3, label %cond.store, label %else
01270 //
01271 // cond.store:                                       ; preds = %0
01272 //   %4 = extractelement <16 x i32> %val, i32 0
01273 //   %5 = getelementptr i32* %1, i32 0
01274 //   store i32 %4, i32* %5
01275 //   br label %else
01276 //
01277 // else:                                             ; preds = %0, %cond.store
01278 //   %6 = extractelement <16 x i1> %mask, i32 1
01279 //   %7 = icmp eq i1 %6, true
01280 //   br i1 %7, label %cond.store1, label %else2
01281 //
01282 // cond.store1:                                      ; preds = %else
01283 //   %8 = extractelement <16 x i32> %val, i32 1
01284 //   %9 = getelementptr i32* %1, i32 1
01285 //   store i32 %8, i32* %9
01286 //   br label %else2
01287 //   . . .
01288 static void scalarizeMaskedStore(CallInst *CI) {
01289   Value *Src = CI->getArgOperand(0);
01290   Value *Ptr  = CI->getArgOperand(1);
01291   Value *Alignment = CI->getArgOperand(2);
01292   Value *Mask = CI->getArgOperand(3);
01293 
01294   unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
01295   VectorType *VecType = dyn_cast<VectorType>(Src->getType());
01296   assert(VecType && "Unexpected data type in masked store intrinsic");
01297 
01298   Type *EltTy = VecType->getElementType();
01299 
01300   IRBuilder<> Builder(CI->getContext());
01301   Instruction *InsertPt = CI;
01302   BasicBlock *IfBlock = CI->getParent();
01303   Builder.SetInsertPoint(InsertPt);
01304   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
01305 
01306   // Short-cut if the mask is all-true.
01307   bool IsAllOnesMask = isa<Constant>(Mask) &&
01308     cast<Constant>(Mask)->isAllOnesValue();
01309 
01310   if (IsAllOnesMask) {
01311     Builder.CreateAlignedStore(Src, Ptr, AlignVal);
01312     CI->eraseFromParent();
01313     return;
01314   }
01315 
01316   // Adjust alignment for the scalar instruction.
01317   AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits()/8);
01318   // Bitcast %addr fron i8* to EltTy*
01319   Type *NewPtrType =
01320     EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
01321   Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
01322   unsigned VectorWidth = VecType->getNumElements();
01323 
01324   if (isa<ConstantVector>(Mask)) {
01325     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
01326       if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
01327           continue;
01328       Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
01329       Value *Gep =
01330           Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
01331       Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
01332     }
01333     CI->eraseFromParent();
01334     return;
01335   }
01336 
01337   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
01338 
01339     // Fill the "else" block, created in the previous iteration
01340     //
01341     //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
01342     //  %to_store = icmp eq i1 %mask_1, true
01343     //  br i1 %to_store, label %cond.store, label %else
01344     //
01345     Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
01346     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
01347                                     ConstantInt::get(Predicate->getType(), 1));
01348 
01349     // Create "cond" block
01350     //
01351     //  %OneElt = extractelement <16 x i32> %Src, i32 Idx
01352     //  %EltAddr = getelementptr i32* %1, i32 0
01353     //  %store i32 %OneElt, i32* %EltAddr
01354     //
01355     BasicBlock *CondBlock =
01356         IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
01357     Builder.SetInsertPoint(InsertPt);
01358 
01359     Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
01360     Value *Gep =
01361         Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
01362     Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
01363 
01364     // Create "else" block, fill it in the next iteration
01365     BasicBlock *NewIfBlock =
01366         CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
01367     Builder.SetInsertPoint(InsertPt);
01368     Instruction *OldBr = IfBlock->getTerminator();
01369     BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
01370     OldBr->eraseFromParent();
01371     IfBlock = NewIfBlock;
01372   }
01373   CI->eraseFromParent();
01374 }
01375 
01376 // Translate a masked gather intrinsic like
01377 // <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
01378 //                               <16 x i1> %Mask, <16 x i32> %Src)
01379 // to a chain of basic blocks, with loading element one-by-one if
01380 // the appropriate mask bit is set
01381 //
01382 // % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
01383 // % Mask0 = extractelement <16 x i1> %Mask, i32 0
01384 // % ToLoad0 = icmp eq i1 % Mask0, true
01385 // br i1 % ToLoad0, label %cond.load, label %else
01386 //
01387 // cond.load:
01388 // % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
01389 // % Load0 = load i32, i32* % Ptr0, align 4
01390 // % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0
01391 // br label %else
01392 //
01393 // else:
01394 // %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0]
01395 // % Mask1 = extractelement <16 x i1> %Mask, i32 1
01396 // % ToLoad1 = icmp eq i1 % Mask1, true
01397 // br i1 % ToLoad1, label %cond.load1, label %else2
01398 //
01399 // cond.load1:
01400 // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
01401 // % Load1 = load i32, i32* % Ptr1, align 4
01402 // % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1
01403 // br label %else2
01404 // . . .
01405 // % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
01406 // ret <16 x i32> %Result
01407 static void scalarizeMaskedGather(CallInst *CI) {
01408   Value *Ptrs = CI->getArgOperand(0);
01409   Value *Alignment = CI->getArgOperand(1);
01410   Value *Mask = CI->getArgOperand(2);
01411   Value *Src0 = CI->getArgOperand(3);
01412 
01413   VectorType *VecType = dyn_cast<VectorType>(CI->getType());
01414 
01415   assert(VecType && "Unexpected return type of masked load intrinsic");
01416 
01417   IRBuilder<> Builder(CI->getContext());
01418   Instruction *InsertPt = CI;
01419   BasicBlock *IfBlock = CI->getParent();
01420   BasicBlock *CondBlock = nullptr;
01421   BasicBlock *PrevIfBlock = CI->getParent();
01422   Builder.SetInsertPoint(InsertPt);
01423   unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
01424 
01425   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
01426 
01427   Value *UndefVal = UndefValue::get(VecType);
01428 
01429   // The result vector
01430   Value *VResult = UndefVal;
01431   unsigned VectorWidth = VecType->getNumElements();
01432 
01433   // Shorten the way if the mask is a vector of constants.
01434   bool IsConstMask = isa<ConstantVector>(Mask);
01435 
01436   if (IsConstMask) {
01437     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
01438       if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
01439         continue;
01440       Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
01441                                                 "Ptr" + Twine(Idx));
01442       LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal,
01443                                                  "Load" + Twine(Idx));
01444       VResult = Builder.CreateInsertElement(VResult, Load,
01445                                             Builder.getInt32(Idx),
01446                                             "Res" + Twine(Idx));
01447     }
01448     Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
01449     CI->replaceAllUsesWith(NewI);
01450     CI->eraseFromParent();
01451     return;
01452   }
01453 
01454   PHINode *Phi = nullptr;
01455   Value *PrevPhi = UndefVal;
01456 
01457   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
01458 
01459     // Fill the "else" block, created in the previous iteration
01460     //
01461     //  %Mask1 = extractelement <16 x i1> %Mask, i32 1
01462     //  %ToLoad1 = icmp eq i1 %Mask1, true
01463     //  br i1 %ToLoad1, label %cond.load, label %else
01464     //
01465     if (Idx > 0) {
01466       Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
01467       Phi->addIncoming(VResult, CondBlock);
01468       Phi->addIncoming(PrevPhi, PrevIfBlock);
01469       PrevPhi = Phi;
01470       VResult = Phi;
01471     }
01472 
01473     Value *Predicate = Builder.CreateExtractElement(Mask,
01474                                                     Builder.getInt32(Idx),
01475                                                     "Mask" + Twine(Idx));
01476     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
01477                                     ConstantInt::get(Predicate->getType(), 1),
01478                                     "ToLoad" + Twine(Idx));
01479 
01480     // Create "cond" block
01481     //
01482     //  %EltAddr = getelementptr i32* %1, i32 0
01483     //  %Elt = load i32* %EltAddr
01484     //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
01485     //
01486     CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
01487     Builder.SetInsertPoint(InsertPt);
01488 
01489     Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
01490                                               "Ptr" + Twine(Idx));
01491     LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal,
01492                                                "Load" + Twine(Idx));
01493     VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx),
01494                                           "Res" + Twine(Idx));
01495 
01496     // Create "else" block, fill it in the next iteration
01497     BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
01498     Builder.SetInsertPoint(InsertPt);
01499     Instruction *OldBr = IfBlock->getTerminator();
01500     BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
01501     OldBr->eraseFromParent();
01502     PrevIfBlock = IfBlock;
01503     IfBlock = NewIfBlock;
01504   }
01505 
01506   Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
01507   Phi->addIncoming(VResult, CondBlock);
01508   Phi->addIncoming(PrevPhi, PrevIfBlock);
01509   Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
01510   CI->replaceAllUsesWith(NewI);
01511   CI->eraseFromParent();
01512 }
01513 
01514 // Translate a masked scatter intrinsic, like
01515 // void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
01516 //                                  <16 x i1> %Mask)
01517 // to a chain of basic blocks, that stores element one-by-one if
01518 // the appropriate mask bit is set.
01519 //
01520 // % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
01521 // % Mask0 = extractelement <16 x i1> % Mask, i32 0
01522 // % ToStore0 = icmp eq i1 % Mask0, true
01523 // br i1 %ToStore0, label %cond.store, label %else
01524 //
01525 // cond.store:
01526 // % Elt0 = extractelement <16 x i32> %Src, i32 0
01527 // % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
01528 // store i32 %Elt0, i32* % Ptr0, align 4
01529 // br label %else
01530 //
01531 // else:
01532 // % Mask1 = extractelement <16 x i1> % Mask, i32 1
01533 // % ToStore1 = icmp eq i1 % Mask1, true
01534 // br i1 % ToStore1, label %cond.store1, label %else2
01535 //
01536 // cond.store1:
01537 // % Elt1 = extractelement <16 x i32> %Src, i32 1
01538 // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
01539 // store i32 % Elt1, i32* % Ptr1, align 4
01540 // br label %else2
01541 //   . . .
01542 static void scalarizeMaskedScatter(CallInst *CI) {
01543   Value *Src = CI->getArgOperand(0);
01544   Value *Ptrs = CI->getArgOperand(1);
01545   Value *Alignment = CI->getArgOperand(2);
01546   Value *Mask = CI->getArgOperand(3);
01547 
01548   assert(isa<VectorType>(Src->getType()) &&
01549          "Unexpected data type in masked scatter intrinsic");
01550   assert(isa<VectorType>(Ptrs->getType()) &&
01551          isa<PointerType>(Ptrs->getType()->getVectorElementType()) &&
01552          "Vector of pointers is expected in masked scatter intrinsic");
01553 
01554   IRBuilder<> Builder(CI->getContext());
01555   Instruction *InsertPt = CI;
01556   BasicBlock *IfBlock = CI->getParent();
01557   Builder.SetInsertPoint(InsertPt);
01558   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
01559 
01560   unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
01561   unsigned VectorWidth = Src->getType()->getVectorNumElements();
01562 
01563   // Shorten the way if the mask is a vector of constants.
01564   bool IsConstMask = isa<ConstantVector>(Mask);
01565 
01566   if (IsConstMask) {
01567     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
01568       if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
01569         continue;
01570       Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
01571                                                    "Elt" + Twine(Idx));
01572       Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
01573                                                 "Ptr" + Twine(Idx));
01574       Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
01575     }
01576     CI->eraseFromParent();
01577     return;
01578   }
01579   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
01580     // Fill the "else" block, created in the previous iteration
01581     //
01582     //  % Mask1 = extractelement <16 x i1> % Mask, i32 Idx
01583     //  % ToStore = icmp eq i1 % Mask1, true
01584     //  br i1 % ToStore, label %cond.store, label %else
01585     //
01586     Value *Predicate = Builder.CreateExtractElement(Mask,
01587                                                     Builder.getInt32(Idx),
01588                                                     "Mask" + Twine(Idx));
01589     Value *Cmp =
01590        Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
01591                           ConstantInt::get(Predicate->getType(), 1),
01592                           "ToStore" + Twine(Idx));
01593 
01594     // Create "cond" block
01595     //
01596     //  % Elt1 = extractelement <16 x i32> %Src, i32 1
01597     //  % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
01598     //  %store i32 % Elt1, i32* % Ptr1
01599     //
01600     BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
01601     Builder.SetInsertPoint(InsertPt);
01602 
01603     Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
01604                                                  "Elt" + Twine(Idx));
01605     Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
01606                                               "Ptr" + Twine(Idx));
01607     Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
01608 
01609     // Create "else" block, fill it in the next iteration
01610     BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
01611     Builder.SetInsertPoint(InsertPt);
01612     Instruction *OldBr = IfBlock->getTerminator();
01613     BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
01614     OldBr->eraseFromParent();
01615     IfBlock = NewIfBlock;
01616   }
01617   CI->eraseFromParent();
01618 }
01619 
01620 /// If counting leading or trailing zeros is an expensive operation and a zero
01621 /// input is defined, add a check for zero to avoid calling the intrinsic.
01622 ///
01623 /// We want to transform:
01624 ///     %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
01625 ///
01626 /// into:
01627 ///   entry:
01628 ///     %cmpz = icmp eq i64 %A, 0
01629 ///     br i1 %cmpz, label %cond.end, label %cond.false
01630 ///   cond.false:
01631 ///     %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
01632 ///     br label %cond.end
01633 ///   cond.end:
01634 ///     %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
01635 ///
01636 /// If the transform is performed, return true and set ModifiedDT to true.
01637 static bool despeculateCountZeros(IntrinsicInst *CountZeros,
01638                                   const TargetLowering *TLI,
01639                                   const DataLayout *DL,
01640                                   bool &ModifiedDT) {
01641   if (!TLI || !DL)
01642     return false;
01643 
01644   // If a zero input is undefined, it doesn't make sense to despeculate that.
01645   if (match(CountZeros->getOperand(1), m_One()))
01646     return false;
01647 
01648   // If it's cheap to speculate, there's nothing to do.
01649   auto IntrinsicID = CountZeros->getIntrinsicID();
01650   if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) ||
01651       (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz()))
01652     return false;
01653 
01654   // Only handle legal scalar cases. Anything else requires too much work.
01655   Type *Ty = CountZeros->getType();
01656   unsigned SizeInBits = Ty->getPrimitiveSizeInBits();
01657   if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSize())
01658     return false;
01659 
01660   // The intrinsic will be sunk behind a compare against zero and branch.
01661   BasicBlock *StartBlock = CountZeros->getParent();
01662   BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
01663 
01664   // Create another block after the count zero intrinsic. A PHI will be added
01665   // in this block to select the result of the intrinsic or the bit-width
01666   // constant if the input to the intrinsic is zero.
01667   BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
01668   BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
01669 
01670   // Set up a builder to create a compare, conditional branch, and PHI.
01671   IRBuilder<> Builder(CountZeros->getContext());
01672   Builder.SetInsertPoint(StartBlock->getTerminator());
01673   Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
01674 
01675   // Replace the unconditional branch that was created by the first split with
01676   // a compare against zero and a conditional branch.
01677   Value *Zero = Constant::getNullValue(Ty);
01678   Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz");
01679   Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
01680   StartBlock->getTerminator()->eraseFromParent();
01681 
01682   // Create a PHI in the end block to select either the output of the intrinsic
01683   // or the bit width of the operand.
01684   Builder.SetInsertPoint(&EndBlock->front());
01685   PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
01686   CountZeros->replaceAllUsesWith(PN);
01687   Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
01688   PN->addIncoming(BitWidth, StartBlock);
01689   PN->addIncoming(CountZeros, CallBlock);
01690 
01691   // We are explicitly handling the zero case, so we can set the intrinsic's
01692   // undefined zero argument to 'true'. This will also prevent reprocessing the
01693   // intrinsic; we only despeculate when a zero input is defined.
01694   CountZeros->setArgOperand(1, Builder.getTrue());
01695   ModifiedDT = true;
01696   return true;
01697 }
01698 
01699 bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
01700   BasicBlock *BB = CI->getParent();
01701 
01702   // Lower inline assembly if we can.
01703   // If we found an inline asm expession, and if the target knows how to
01704   // lower it to normal LLVM code, do so now.
01705   if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
01706     if (TLI->ExpandInlineAsm(CI)) {
01707       // Avoid invalidating the iterator.
01708       CurInstIterator = BB->begin();
01709       // Avoid processing instructions out of order, which could cause
01710       // reuse before a value is defined.
01711       SunkAddrs.clear();
01712       return true;
01713     }
01714     // Sink address computing for memory operands into the block.
01715     if (optimizeInlineAsmInst(CI))
01716       return true;
01717   }
01718 
01719   // Align the pointer arguments to this call if the target thinks it's a good
01720   // idea
01721   unsigned MinSize, PrefAlign;
01722   if (TLI && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
01723     for (auto &Arg : CI->arg_operands()) {
01724       // We want to align both objects whose address is used directly and
01725       // objects whose address is used in casts and GEPs, though it only makes
01726       // sense for GEPs if the offset is a multiple of the desired alignment and
01727       // if size - offset meets the size threshold.
01728       if (!Arg->getType()->isPointerTy())
01729         continue;
01730       APInt Offset(DL->getPointerSizeInBits(
01731                        cast<PointerType>(Arg->getType())->getAddressSpace()),
01732                    0);
01733       Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
01734       uint64_t Offset2 = Offset.getLimitedValue();
01735       if ((Offset2 & (PrefAlign-1)) != 0)
01736         continue;
01737       AllocaInst *AI;
01738       if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
01739           DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
01740         AI->setAlignment(PrefAlign);
01741       // Global variables can only be aligned if they are defined in this
01742       // object (i.e. they are uniquely initialized in this object), and
01743       // over-aligning global variables that have an explicit section is
01744       // forbidden.
01745       GlobalVariable *GV;
01746       if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
01747           GV->getAlignment() < PrefAlign &&
01748           DL->getTypeAllocSize(GV->getValueType()) >=
01749               MinSize + Offset2)
01750         GV->setAlignment(PrefAlign);
01751     }
01752     // If this is a memcpy (or similar) then we may be able to improve the
01753     // alignment
01754     if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
01755       unsigned Align = getKnownAlignment(MI->getDest(), *DL);
01756       if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
01757         Align = std::min(Align, getKnownAlignment(MTI->getSource(), *DL));
01758       if (Align > MI->getAlignment())
01759         MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align));
01760     }
01761   }
01762 
01763   IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
01764   if (II) {
01765     switch (II->getIntrinsicID()) {
01766     default: break;
01767     case Intrinsic::objectsize: {
01768       // Lower all uses of llvm.objectsize.*
01769       bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
01770       Type *ReturnTy = CI->getType();
01771       Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
01772 
01773       // Substituting this can cause recursive simplifications, which can
01774       // invalidate our iterator.  Use a WeakVH to hold onto it in case this
01775       // happens.
01776       WeakVH IterHandle(&*CurInstIterator);
01777 
01778       replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
01779 
01780       // If the iterator instruction was recursively deleted, start over at the
01781       // start of the block.
01782       if (IterHandle != CurInstIterator.getNodePtrUnchecked()) {
01783         CurInstIterator = BB->begin();
01784         SunkAddrs.clear();
01785       }
01786       return true;
01787     }
01788     case Intrinsic::masked_load: {
01789       // Scalarize unsupported vector masked load
01790       if (!TTI->isLegalMaskedLoad(CI->getType())) {
01791         scalarizeMaskedLoad(CI);
01792         ModifiedDT = true;
01793         return true;
01794       }
01795       return false;
01796     }
01797     case Intrinsic::masked_store: {
01798       if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) {
01799         scalarizeMaskedStore(CI);
01800         ModifiedDT = true;
01801         return true;
01802       }
01803       return false;
01804     }
01805     case Intrinsic::masked_gather: {
01806       if (!TTI->isLegalMaskedGather(CI->getType())) {
01807         scalarizeMaskedGather(CI);
01808         ModifiedDT = true;
01809         return true;
01810       }
01811       return false;
01812     }
01813     case Intrinsic::masked_scatter: {
01814       if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) {
01815         scalarizeMaskedScatter(CI);
01816         ModifiedDT = true;
01817         return true;
01818       }
01819       return false;
01820     }
01821     case Intrinsic::aarch64_stlxr:
01822     case Intrinsic::aarch64_stxr: {
01823       ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
01824       if (!ExtVal || !ExtVal->hasOneUse() ||
01825           ExtVal->getParent() == CI->getParent())
01826         return false;
01827       // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
01828       ExtVal->moveBefore(CI);
01829       // Mark this instruction as "inserted by CGP", so that other
01830       // optimizations don't touch it.
01831       InsertedInsts.insert(ExtVal);
01832       return true;
01833     }
01834     case Intrinsic::invariant_group_barrier:
01835       II->replaceAllUsesWith(II->getArgOperand(0));
01836       II->eraseFromParent();
01837       return true;
01838 
01839     case Intrinsic::cttz:
01840     case Intrinsic::ctlz:
01841       // If counting zeros is expensive, try to avoid it.
01842       return despeculateCountZeros(II, TLI, DL, ModifiedDT);
01843     }
01844 
01845     if (TLI) {
01846       // Unknown address space.
01847       // TODO: Target hook to pick which address space the intrinsic cares
01848       // about?
01849       unsigned AddrSpace = ~0u;
01850       SmallVector<Value*, 2> PtrOps;
01851       Type *AccessTy;
01852       if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy, AddrSpace))
01853         while (!PtrOps.empty())
01854           if (optimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace))
01855             return true;
01856     }
01857   }
01858 
01859   // From here on out we're working with named functions.
01860   if (!CI->getCalledFunction()) return false;
01861 
01862   // Lower all default uses of _chk calls.  This is very similar
01863   // to what InstCombineCalls does, but here we are only lowering calls
01864   // to fortified library functions (e.g. __memcpy_chk) that have the default
01865   // "don't know" as the objectsize.  Anything else should be left alone.
01866   FortifiedLibCallSimplifier Simplifier(TLInfo, true);
01867   if (Value *V = Simplifier.optimizeCall(CI)) {
01868     CI->replaceAllUsesWith(V);
01869     CI->eraseFromParent();
01870     return true;
01871   }
01872   return false;
01873 }
01874 
01875 /// Look for opportunities to duplicate return instructions to the predecessor
01876 /// to enable tail call optimizations. The case it is currently looking for is:
01877 /// @code
01878 /// bb0:
01879 ///   %tmp0 = tail call i32 @f0()
01880 ///   br label %return
01881 /// bb1:
01882 ///   %tmp1 = tail call i32 @f1()
01883 ///   br label %return
01884 /// bb2:
01885 ///   %tmp2 = tail call i32 @f2()
01886 ///   br label %return
01887 /// return:
01888 ///   %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
01889 ///   ret i32 %retval
01890 /// @endcode
01891 ///
01892 /// =>
01893 ///
01894 /// @code
01895 /// bb0:
01896 ///   %tmp0 = tail call i32 @f0()
01897 ///   ret i32 %tmp0
01898 /// bb1:
01899 ///   %tmp1 = tail call i32 @f1()
01900 ///   ret i32 %tmp1
01901 /// bb2:
01902 ///   %tmp2 = tail call i32 @f2()
01903 ///   ret i32 %tmp2
01904 /// @endcode
01905 bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
01906   if (!TLI)
01907     return false;
01908 
01909   ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
01910   if (!RI)
01911     return false;
01912 
01913   PHINode *PN = nullptr;
01914   BitCastInst *BCI = nullptr;
01915   Value *V = RI->getReturnValue();
01916   if (V) {
01917     BCI = dyn_cast<BitCastInst>(V);
01918     if (BCI)
01919       V = BCI->getOperand(0);
01920 
01921     PN = dyn_cast<PHINode>(V);
01922     if (!PN)
01923       return false;
01924   }
01925 
01926   if (PN && PN->getParent() != BB)
01927     return false;
01928 
01929   // It's not safe to eliminate the sign / zero extension of the return value.
01930   // See llvm::isInTailCallPosition().
01931   const Function *F = BB->getParent();
01932   AttributeSet CallerAttrs = F->getAttributes();
01933   if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
01934       CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
01935     return false;
01936 
01937   // Make sure there are no instructions between the PHI and return, or that the
01938   // return is the first instruction in the block.
01939   if (PN) {
01940     BasicBlock::iterator BI = BB->begin();
01941     do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
01942     if (&*BI == BCI)
01943       // Also skip over the bitcast.
01944       ++BI;
01945     if (&*BI != RI)
01946       return false;
01947   } else {
01948     BasicBlock::iterator BI = BB->begin();
01949     while (isa<DbgInfoIntrinsic>(BI)) ++BI;
01950     if (&*BI != RI)
01951       return false;
01952   }
01953 
01954   /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
01955   /// call.
01956   SmallVector<CallInst*, 4> TailCalls;
01957   if (PN) {
01958     for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
01959       CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
01960       // Make sure the phi value is indeed produced by the tail call.
01961       if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
01962           TLI->mayBeEmittedAsTailCall(CI))
01963         TailCalls.push_back(CI);
01964     }
01965   } else {
01966     SmallPtrSet<BasicBlock*, 4> VisitedBBs;
01967     for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
01968       if (!VisitedBBs.insert(*PI).second)
01969         continue;
01970 
01971       BasicBlock::InstListType &InstList = (*PI)->getInstList();
01972       BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
01973       BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
01974       do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
01975       if (RI == RE)
01976         continue;
01977 
01978       CallInst *CI = dyn_cast<CallInst>(&*RI);
01979       if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
01980         TailCalls.push_back(CI);
01981     }
01982   }
01983 
01984   bool Changed = false;
01985   for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
01986     CallInst *CI = TailCalls[i];
01987     CallSite CS(CI);
01988 
01989     // Conservatively require the attributes of the call to match those of the
01990     // return. Ignore noalias because it doesn't affect the call sequence.
01991     AttributeSet CalleeAttrs = CS.getAttributes();
01992     if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
01993           removeAttribute(Attribute::NoAlias) !=
01994         AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
01995           removeAttribute(Attribute::NoAlias))
01996       continue;
01997 
01998     // Make sure the call instruction is followed by an unconditional branch to
01999     // the return block.
02000     BasicBlock *CallBB = CI->getParent();
02001     BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
02002     if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
02003       continue;
02004 
02005     // Duplicate the return into CallBB.
02006     (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
02007     ModifiedDT = Changed = true;
02008     ++NumRetsDup;
02009   }
02010 
02011   // If we eliminated all predecessors of the block, delete the block now.
02012   if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
02013     BB->eraseFromParent();
02014 
02015   return Changed;
02016 }
02017 
02018 //===----------------------------------------------------------------------===//
02019 // Memory Optimization
02020 //===----------------------------------------------------------------------===//
02021 
02022 namespace {
02023 
02024 /// This is an extended version of TargetLowering::AddrMode
02025 /// which holds actual Value*'s for register values.
02026 struct ExtAddrMode : public TargetLowering::AddrMode {
02027   Value *BaseReg;
02028   Value *ScaledReg;
02029   ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {}
02030   void print(raw_ostream &OS) const;
02031   void dump() const;
02032 
02033   bool operator==(const ExtAddrMode& O) const {
02034     return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
02035            (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
02036            (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
02037   }
02038 };
02039 
02040 #ifndef NDEBUG
02041 static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
02042   AM.print(OS);
02043   return OS;
02044 }
02045 #endif
02046 
02047 void ExtAddrMode::print(raw_ostream &OS) const {
02048   bool NeedPlus = false;
02049   OS << "[";
02050   if (BaseGV) {
02051     OS << (NeedPlus ? " + " : "")
02052        << "GV:";
02053     BaseGV->printAsOperand(OS, /*PrintType=*/false);
02054     NeedPlus = true;
02055   }
02056 
02057   if (BaseOffs) {
02058     OS << (NeedPlus ? " + " : "")
02059        << BaseOffs;
02060     NeedPlus = true;
02061   }
02062 
02063   if (BaseReg) {
02064     OS << (NeedPlus ? " + " : "")
02065        << "Base:";
02066     BaseReg->printAsOperand(OS, /*PrintType=*/false);
02067     NeedPlus = true;
02068   }
02069   if (Scale) {
02070     OS << (NeedPlus ? " + " : "")
02071        << Scale << "*";
02072     ScaledReg->printAsOperand(OS, /*PrintType=*/false);
02073   }
02074 
02075   OS << ']';
02076 }
02077 
02078 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
02079 void ExtAddrMode::dump() const {
02080   print(dbgs());
02081   dbgs() << '\n';
02082 }
02083 #endif
02084 
02085 /// \brief This class provides transaction based operation on the IR.
02086 /// Every change made through this class is recorded in the internal state and
02087 /// can be undone (rollback) until commit is called.
02088 class TypePromotionTransaction {
02089 
02090   /// \brief This represents the common interface of the individual transaction.
02091   /// Each class implements the logic for doing one specific modification on
02092   /// the IR via the TypePromotionTransaction.
02093   class TypePromotionAction {
02094   protected:
02095     /// The Instruction modified.
02096     Instruction *Inst;
02097 
02098   public:
02099     /// \brief Constructor of the action.
02100     /// The constructor performs the related action on the IR.
02101     TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
02102 
02103     virtual ~TypePromotionAction() {}
02104 
02105     /// \brief Undo the modification done by this action.
02106     /// When this method is called, the IR must be in the same state as it was
02107     /// before this action was applied.
02108     /// \pre Undoing the action works if and only if the IR is in the exact same
02109     /// state as it was directly after this action was applied.
02110     virtual void undo() = 0;
02111 
02112     /// \brief Advocate every change made by this action.
02113     /// When the results on the IR of the action are to be kept, it is important
02114     /// to call this function, otherwise hidden information may be kept forever.
02115     virtual void commit() {
02116       // Nothing to be done, this action is not doing anything.
02117     }
02118   };
02119 
02120   /// \brief Utility to remember the position of an instruction.
02121   class InsertionHandler {
02122     /// Position of an instruction.
02123     /// Either an instruction:
02124     /// - Is the first in a basic block: BB is used.
02125     /// - Has a previous instructon: PrevInst is used.
02126     union {
02127       Instruction *PrevInst;
02128       BasicBlock *BB;
02129     } Point;
02130     /// Remember whether or not the instruction had a previous instruction.
02131     bool HasPrevInstruction;
02132 
02133   public:
02134     /// \brief Record the position of \p Inst.
02135     InsertionHandler(Instruction *Inst) {
02136       BasicBlock::iterator It = Inst->getIterator();
02137       HasPrevInstruction = (It != (Inst->getParent()->begin()));
02138       if (HasPrevInstruction)
02139         Point.PrevInst = &*--It;
02140       else
02141         Point.BB = Inst->getParent();
02142     }
02143 
02144     /// \brief Insert \p Inst at the recorded position.
02145     void insert(Instruction *Inst) {
02146       if (HasPrevInstruction) {
02147         if (Inst->getParent())
02148           Inst->removeFromParent();
02149         Inst->insertAfter(Point.PrevInst);
02150       } else {
02151         Instruction *Position = &*Point.BB->getFirstInsertionPt();
02152         if (Inst->getParent())
02153           Inst->moveBefore(Position);
02154         else
02155           Inst->insertBefore(Position);
02156       }
02157     }
02158   };
02159 
02160   /// \brief Move an instruction before another.
02161   class InstructionMoveBefore : public TypePromotionAction {
02162     /// Original position of the instruction.
02163     InsertionHandler Position;
02164 
02165   public:
02166     /// \brief Move \p Inst before \p Before.
02167     InstructionMoveBefore(Instruction *Inst, Instruction *Before)
02168         : TypePromotionAction(Inst), Position(Inst) {
02169       DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n");
02170       Inst->moveBefore(Before);
02171     }
02172 
02173     /// \brief Move the instruction back to its original position.
02174     void undo() override {
02175       DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
02176       Position.insert(Inst);
02177     }
02178   };
02179 
02180   /// \brief Set the operand of an instruction with a new value.
02181   class OperandSetter : public TypePromotionAction {
02182     /// Original operand of the instruction.
02183     Value *Origin;
02184     /// Index of the modified instruction.
02185     unsigned Idx;
02186 
02187   public:
02188     /// \brief Set \p Idx operand of \p Inst with \p NewVal.
02189     OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
02190         : TypePromotionAction(Inst), Idx(Idx) {
02191       DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
02192                    << "for:" << *Inst << "\n"
02193                    << "with:" << *NewVal << "\n");
02194       Origin = Inst->getOperand(Idx);
02195       Inst->setOperand(Idx, NewVal);
02196     }
02197 
02198     /// \brief Restore the original value of the instruction.
02199     void undo() override {
02200       DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
02201                    << "for: " << *Inst << "\n"
02202                    << "with: " << *Origin << "\n");
02203       Inst->setOperand(Idx, Origin);
02204     }
02205   };
02206 
02207   /// \brief Hide the operands of an instruction.
02208   /// Do as if this instruction was not using any of its operands.
02209   class OperandsHider : public TypePromotionAction {
02210     /// The list of original operands.
02211     SmallVector<Value *, 4> OriginalValues;
02212 
02213   public:
02214     /// \brief Remove \p Inst from the uses of the operands of \p Inst.
02215     OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
02216       DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
02217       unsigned NumOpnds = Inst->getNumOperands();
02218       OriginalValues.reserve(NumOpnds);
02219       for (unsigned It = 0; It < NumOpnds; ++It) {
02220         // Save the current operand.
02221         Value *Val = Inst->getOperand(It);
02222         OriginalValues.push_back(Val);
02223         // Set a dummy one.
02224         // We could use OperandSetter here, but that would imply an overhead
02225         // that we are not willing to pay.
02226         Inst->setOperand(It, UndefValue::get(Val->getType()));
02227       }
02228     }
02229 
02230     /// \brief Restore the original list of uses.
02231     void undo() override {
02232       DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
02233       for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
02234         Inst->setOperand(It, OriginalValues[It]);
02235     }
02236   };
02237 
02238   /// \brief Build a truncate instruction.
02239   class TruncBuilder : public TypePromotionAction {
02240     Value *Val;
02241   public:
02242     /// \brief Build a truncate instruction of \p Opnd producing a \p Ty
02243     /// result.
02244     /// trunc Opnd to Ty.
02245     TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
02246       IRBuilder<> Builder(Opnd);
02247       Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
02248       DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
02249     }
02250 
02251     /// \brief Get the built value.
02252     Value *getBuiltValue() { return Val; }
02253 
02254     /// \brief Remove the built instruction.
02255     void undo() override {
02256       DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
02257       if (Instruction *IVal = dyn_cast<Instruction>(Val))
02258         IVal->eraseFromParent();
02259     }
02260   };
02261 
02262   /// \brief Build a sign extension instruction.
02263   class SExtBuilder : public TypePromotionAction {
02264     Value *Val;
02265   public:
02266     /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty
02267     /// result.
02268     /// sext Opnd to Ty.
02269     SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
02270         : TypePromotionAction(InsertPt) {
02271       IRBuilder<> Builder(InsertPt);
02272       Val = Builder.CreateSExt(Opnd, Ty, "promoted");
02273       DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
02274     }
02275 
02276     /// \brief Get the built value.
02277     Value *getBuiltValue() { return Val; }
02278 
02279     /// \brief Remove the built instruction.
02280     void undo() override {
02281       DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
02282       if (Instruction *IVal = dyn_cast<Instruction>(Val))
02283         IVal->eraseFromParent();
02284     }
02285   };
02286 
02287   /// \brief Build a zero extension instruction.
02288   class ZExtBuilder : public TypePromotionAction {
02289     Value *Val;
02290   public:
02291     /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty
02292     /// result.
02293     /// zext Opnd to Ty.
02294     ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
02295         : TypePromotionAction(InsertPt) {
02296       IRBuilder<> Builder(InsertPt);
02297       Val = Builder.CreateZExt(Opnd, Ty, "promoted");
02298       DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
02299     }
02300 
02301     /// \brief Get the built value.
02302     Value *getBuiltValue() { return Val; }
02303 
02304     /// \brief Remove the built instruction.
02305     void undo() override {
02306       DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
02307       if (Instruction *IVal = dyn_cast<Instruction>(Val))
02308         IVal->eraseFromParent();
02309     }
02310   };
02311 
02312   /// \brief Mutate an instruction to another type.
02313   class TypeMutator : public TypePromotionAction {
02314     /// Record the original type.
02315     Type *OrigTy;
02316 
02317   public:
02318     /// \brief Mutate the type of \p Inst into \p NewTy.
02319     TypeMutator(Instruction *Inst, Type *NewTy)
02320         : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
02321       DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
02322                    << "\n");
02323       Inst->mutateType(NewTy);
02324     }
02325 
02326     /// \brief Mutate the instruction back to its original type.
02327     void undo() override {
02328       DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
02329                    << "\n");
02330       Inst->mutateType(OrigTy);
02331     }
02332   };
02333 
02334   /// \brief Replace the uses of an instruction by another instruction.
02335   class UsesReplacer : public TypePromotionAction {
02336     /// Helper structure to keep track of the replaced uses.
02337     struct InstructionAndIdx {
02338       /// The instruction using the instruction.
02339       Instruction *Inst;
02340       /// The index where this instruction is used for Inst.
02341       unsigned Idx;
02342       InstructionAndIdx(Instruction *Inst, unsigned Idx)
02343           : Inst(Inst), Idx(Idx) {}
02344     };
02345 
02346     /// Keep track of the original uses (pair Instruction, Index).
02347     SmallVector<InstructionAndIdx, 4> OriginalUses;
02348     typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator;
02349 
02350   public:
02351     /// \brief Replace all the use of \p Inst by \p New.
02352     UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {
02353       DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
02354                    << "\n");
02355       // Record the original uses.
02356       for (Use &U : Inst->uses()) {
02357         Instruction *UserI = cast<Instruction>(U.getUser());
02358         OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
02359       }
02360       // Now, we can replace the uses.
02361       Inst->replaceAllUsesWith(New);
02362     }
02363 
02364     /// \brief Reassign the original uses of Inst to Inst.
02365     void undo() override {
02366       DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
02367       for (use_iterator UseIt = OriginalUses.begin(),
02368                         EndIt = OriginalUses.end();
02369            UseIt != EndIt; ++UseIt) {
02370         UseIt->Inst->setOperand(UseIt->Idx, Inst);
02371       }
02372     }
02373   };
02374 
02375   /// \brief Remove an instruction from the IR.
02376   class InstructionRemover : public TypePromotionAction {
02377     /// Original position of the instruction.
02378     InsertionHandler Inserter;
02379     /// Helper structure to hide all the link to the instruction. In other
02380     /// words, this helps to do as if the instruction was removed.
02381     OperandsHider Hider;
02382     /// Keep track of the uses replaced, if any.
02383     UsesReplacer *Replacer;
02384 
02385   public:
02386     /// \brief Remove all reference of \p Inst and optinally replace all its
02387     /// uses with New.
02388     /// \pre If !Inst->use_empty(), then New != nullptr
02389     InstructionRemover(Instruction *Inst, Value *New = nullptr)
02390         : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
02391           Replacer(nullptr) {
02392       if (New)
02393         Replacer = new UsesReplacer(Inst, New);
02394       DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
02395       Inst->removeFromParent();
02396     }
02397 
02398     ~InstructionRemover() override { delete Replacer; }
02399 
02400     /// \brief Really remove the instruction.
02401     void commit() override { delete Inst; }
02402 
02403     /// \brief Resurrect the instruction and reassign it to the proper uses if
02404     /// new value was provided when build this action.
02405     void undo() override {
02406       DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
02407       Inserter.insert(Inst);
02408       if (Replacer)
02409         Replacer->undo();
02410       Hider.undo();
02411     }
02412   };
02413 
02414 public:
02415   /// Restoration point.
02416   /// The restoration point is a pointer to an action instead of an iterator
02417   /// because the iterator may be invalidated but not the pointer.
02418   typedef const TypePromotionAction *ConstRestorationPt;
02419   /// Advocate every changes made in that transaction.
02420   void commit();
02421   /// Undo all the changes made after the given point.
02422   void rollback(ConstRestorationPt Point);
02423   /// Get the current restoration point.
02424   ConstRestorationPt getRestorationPoint() const;
02425 
02426   /// \name API for IR modification with state keeping to support rollback.
02427   /// @{
02428   /// Same as Instruction::setOperand.
02429   void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
02430   /// Same as Instruction::eraseFromParent.
02431   void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
02432   /// Same as Value::replaceAllUsesWith.
02433   void replaceAllUsesWith(Instruction *Inst, Value *New);
02434   /// Same as Value::mutateType.
02435   void mutateType(Instruction *Inst, Type *NewTy);
02436   /// Same as IRBuilder::createTrunc.
02437   Value *createTrunc(Instruction *Opnd, Type *Ty);
02438   /// Same as IRBuilder::createSExt.
02439   Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
02440   /// Same as IRBuilder::createZExt.
02441   Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
02442   /// Same as Instruction::moveBefore.
02443   void moveBefore(Instruction *Inst, Instruction *Before);
02444   /// @}
02445 
02446 private:
02447   /// The ordered list of actions made so far.
02448   SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
02449   typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt;
02450 };
02451 
02452 void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
02453                                           Value *NewVal) {
02454   Actions.push_back(
02455       make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal));
02456 }
02457 
02458 void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
02459                                                 Value *NewVal) {
02460   Actions.push_back(
02461       make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal));
02462 }
02463 
02464 void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
02465                                                   Value *New) {
02466   Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
02467 }
02468 
02469 void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
02470   Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
02471 }
02472 
02473 Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
02474                                              Type *Ty) {
02475   std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
02476   Value *Val = Ptr->getBuiltValue();
02477   Actions.push_back(std::move(Ptr));
02478   return Val;
02479 }
02480 
02481 Value *TypePromotionTransaction::createSExt(Instruction *Inst,
02482                                             Value *Opnd, Type *Ty) {
02483   std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
02484   Value *Val = Ptr->getBuiltValue();
02485   Actions.push_back(std::move(Ptr));
02486   return Val;
02487 }
02488 
02489 Value *TypePromotionTransaction::createZExt(Instruction *Inst,
02490                                             Value *Opnd, Type *Ty) {
02491   std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
02492   Value *Val = Ptr->getBuiltValue();
02493   Actions.push_back(std::move(Ptr));
02494   return Val;
02495 }
02496 
02497 void TypePromotionTransaction::moveBefore(Instruction *Inst,
02498                                           Instruction *Before) {
02499   Actions.push_back(
02500       make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before));
02501 }
02502 
02503 TypePromotionTransaction::ConstRestorationPt
02504 TypePromotionTransaction::getRestorationPoint() const {
02505   return !Actions.empty() ? Actions.back().get() : nullptr;
02506 }
02507 
02508 void TypePromotionTransaction::commit() {
02509   for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
02510        ++It)
02511     (*It)->commit();
02512   Actions.clear();
02513 }
02514 
02515 void TypePromotionTransaction::rollback(
02516     TypePromotionTransaction::ConstRestorationPt Point) {
02517   while (!Actions.empty() && Point != Actions.back().get()) {
02518     std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
02519     Curr->undo();
02520   }
02521 }
02522 
02523 /// \brief A helper class for matching addressing modes.
02524 ///
02525 /// This encapsulates the logic for matching the target-legal addressing modes.
02526 class AddressingModeMatcher {
02527   SmallVectorImpl<Instruction*> &AddrModeInsts;
02528   const TargetMachine &TM;
02529   const TargetLowering &TLI;
02530   const DataLayout &DL;
02531 
02532   /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
02533   /// the memory instruction that we're computing this address for.
02534   Type *AccessTy;
02535   unsigned AddrSpace;
02536   Instruction *MemoryInst;
02537 
02538   /// This is the addressing mode that we're building up. This is
02539   /// part of the return value of this addressing mode matching stuff.
02540   ExtAddrMode &AddrMode;
02541 
02542   /// The instructions inserted by other CodeGenPrepare optimizations.
02543   const SetOfInstrs &InsertedInsts;
02544   /// A map from the instructions to their type before promotion.
02545   InstrToOrigTy &PromotedInsts;
02546   /// The ongoing transaction where every action should be registered.
02547   TypePromotionTransaction &TPT;
02548 
02549   /// This is set to true when we should not do profitability checks.
02550   /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
02551   bool IgnoreProfitability;
02552 
02553   AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
02554                         const TargetMachine &TM, Type *AT, unsigned AS,
02555                         Instruction *MI, ExtAddrMode &AM,
02556                         const SetOfInstrs &InsertedInsts,
02557                         InstrToOrigTy &PromotedInsts,
02558                         TypePromotionTransaction &TPT)
02559       : AddrModeInsts(AMI), TM(TM),
02560         TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent())
02561                  ->getTargetLowering()),
02562         DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
02563         MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
02564         PromotedInsts(PromotedInsts), TPT(TPT) {
02565     IgnoreProfitability = false;
02566   }
02567 public:
02568 
02569   /// Find the maximal addressing mode that a load/store of V can fold,
02570   /// give an access type of AccessTy.  This returns a list of involved
02571   /// instructions in AddrModeInsts.
02572   /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
02573   /// optimizations.
02574   /// \p PromotedInsts maps the instructions to their type before promotion.
02575   /// \p The ongoing transaction where every action should be registered.
02576   static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS,
02577                            Instruction *MemoryInst,
02578                            SmallVectorImpl<Instruction*> &AddrModeInsts,
02579                            const TargetMachine &TM,
02580                            const SetOfInstrs &InsertedInsts,
02581                            InstrToOrigTy &PromotedInsts,
02582                            TypePromotionTransaction &TPT) {
02583     ExtAddrMode Result;
02584 
02585     bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, AS,
02586                                          MemoryInst, Result, InsertedInsts,
02587                                          PromotedInsts, TPT).matchAddr(V, 0);
02588     (void)Success; assert(Success && "Couldn't select *anything*?");
02589     return Result;
02590   }
02591 private:
02592   bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
02593   bool matchAddr(Value *V, unsigned Depth);
02594   bool matchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth,
02595                           bool *MovedAway = nullptr);
02596   bool isProfitableToFoldIntoAddressingMode(Instruction *I,
02597                                             ExtAddrMode &AMBefore,
02598                                             ExtAddrMode &AMAfter);
02599   bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
02600   bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
02601                              Value *PromotedOperand) const;
02602 };
02603 
02604 /// Try adding ScaleReg*Scale to the current addressing mode.
02605 /// Return true and update AddrMode if this addr mode is legal for the target,
02606 /// false if not.
02607 bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
02608                                              unsigned Depth) {
02609   // If Scale is 1, then this is the same as adding ScaleReg to the addressing
02610   // mode.  Just process that directly.
02611   if (Scale == 1)
02612     return matchAddr(ScaleReg, Depth);
02613 
02614   // If the scale is 0, it takes nothing to add this.
02615   if (Scale == 0)
02616     return true;
02617 
02618   // If we already have a scale of this value, we can add to it, otherwise, we
02619   // need an available scale field.
02620   if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
02621     return false;
02622 
02623   ExtAddrMode TestAddrMode = AddrMode;
02624 
02625   // Add scale to turn X*4+X*3 -> X*7.  This could also do things like
02626   // [A+B + A*7] -> [B+A*8].
02627   TestAddrMode.Scale += Scale;
02628   TestAddrMode.ScaledReg = ScaleReg;
02629 
02630   // If the new address isn't legal, bail out.
02631   if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
02632     return false;
02633 
02634   // It was legal, so commit it.
02635   AddrMode = TestAddrMode;
02636 
02637   // Okay, we decided that we can add ScaleReg+Scale to AddrMode.  Check now
02638   // to see if ScaleReg is actually X+C.  If so, we can turn this into adding
02639   // X*Scale + C*Scale to addr mode.
02640   ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
02641   if (isa<Instruction>(ScaleReg) &&  // not a constant expr.
02642       match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
02643     TestAddrMode.ScaledReg = AddLHS;
02644     TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
02645 
02646     // If this addressing mode is legal, commit it and remember that we folded
02647     // this instruction.
02648     if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
02649       AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
02650       AddrMode = TestAddrMode;
02651       return true;
02652     }
02653   }
02654 
02655   // Otherwise, not (x+c)*scale, just return what we have.
02656   return true;
02657 }
02658 
02659 /// This is a little filter, which returns true if an addressing computation
02660 /// involving I might be folded into a load/store accessing it.
02661 /// This doesn't need to be perfect, but needs to accept at least
02662 /// the set of instructions that MatchOperationAddr can.
02663 static bool MightBeFoldableInst(Instruction *I) {
02664   switch (I->getOpcode()) {
02665   case Instruction::BitCast:
02666   case Instruction::AddrSpaceCast:
02667     // Don't touch identity bitcasts.
02668     if (I->getType() == I->getOperand(0)->getType())
02669       return false;
02670     return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
02671   case Instruction::PtrToInt:
02672     // PtrToInt is always a noop, as we know that the int type is pointer sized.
02673     return true;
02674   case Instruction::IntToPtr:
02675     // We know the input is intptr_t, so this is foldable.
02676     return true;
02677   case Instruction::Add:
02678     return true;
02679   case Instruction::Mul:
02680   case Instruction::Shl:
02681     // Can only handle X*C and X << C.
02682     return isa<ConstantInt>(I->getOperand(1));
02683   case Instruction::GetElementPtr:
02684     return true;
02685   default:
02686     return false;
02687   }
02688 }
02689 
02690 /// \brief Check whether or not \p Val is a legal instruction for \p TLI.
02691 /// \note \p Val is assumed to be the product of some type promotion.
02692 /// Therefore if \p Val has an undefined state in \p TLI, this is assumed
02693 /// to be legal, as the non-promoted value would have had the same state.
02694 static bool isPromotedInstructionLegal(const TargetLowering &TLI,
02695                                        const DataLayout &DL, Value *Val) {
02696   Instruction *PromotedInst = dyn_cast<Instruction>(Val);
02697   if (!PromotedInst)
02698     return false;
02699   int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
02700   // If the ISDOpcode is undefined, it was undefined before the promotion.
02701   if (!ISDOpcode)
02702     return true;
02703   // Otherwise, check if the promoted instruction is legal or not.
02704   return TLI.isOperationLegalOrCustom(
02705       ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
02706 }
02707 
02708 /// \brief Hepler class to perform type promotion.
02709 class TypePromotionHelper {
02710   /// \brief Utility function to check whether or not a sign or zero extension
02711   /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
02712   /// either using the operands of \p Inst or promoting \p Inst.
02713   /// The type of the extension is defined by \p IsSExt.
02714   /// In other words, check if:
02715   /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
02716   /// #1 Promotion applies:
02717   /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
02718   /// #2 Operand reuses:
02719   /// ext opnd1 to ConsideredExtType.
02720   /// \p PromotedInsts maps the instructions to their type before promotion.
02721   static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
02722                             const InstrToOrigTy &PromotedInsts, bool IsSExt);
02723 
02724   /// \brief Utility function to determine if \p OpIdx should be promoted when
02725   /// promoting \p Inst.
02726   static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
02727     return !(isa<SelectInst>(Inst) && OpIdx == 0);
02728   }
02729 
02730   /// \brief Utility function to promote the operand of \p Ext when this
02731   /// operand is a promotable trunc or sext or zext.
02732   /// \p PromotedInsts maps the instructions to their type before promotion.
02733   /// \p CreatedInstsCost[out] contains the cost of all instructions
02734   /// created to promote the operand of Ext.
02735   /// Newly added extensions are inserted in \p Exts.
02736   /// Newly added truncates are inserted in \p Truncs.
02737   /// Should never be called directly.
02738   /// \return The promoted value which is used instead of Ext.
02739   static Value *promoteOperandForTruncAndAnyExt(
02740       Instruction *Ext, TypePromotionTransaction &TPT,
02741       InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
02742       SmallVectorImpl<Instruction *> *Exts,
02743       SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
02744 
02745   /// \brief Utility function to promote the operand of \p Ext when this
02746   /// operand is promotable and is not a supported trunc or sext.
02747   /// \p PromotedInsts maps the instructions to their type before promotion.
02748   /// \p CreatedInstsCost[out] contains the cost of all the instructions
02749   /// created to promote the operand of Ext.
02750   /// Newly added extensions are inserted in \p Exts.
02751   /// Newly added truncates are inserted in \p Truncs.
02752   /// Should never be called directly.
02753   /// \return The promoted value which is used instead of Ext.
02754   static Value *promoteOperandForOther(Instruction *Ext,
02755                                        TypePromotionTransaction &TPT,
02756                                        InstrToOrigTy &PromotedInsts,
02757                                        unsigned &CreatedInstsCost,
02758                                        SmallVectorImpl<Instruction *> *Exts,
02759                                        SmallVectorImpl<Instruction *> *Truncs,
02760                                        const TargetLowering &TLI, bool IsSExt);
02761 
02762   /// \see promoteOperandForOther.
02763   static Value *signExtendOperandForOther(
02764       Instruction *Ext, TypePromotionTransaction &TPT,
02765       InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
02766       SmallVectorImpl<Instruction *> *Exts,
02767       SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
02768     return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
02769                                   Exts, Truncs, TLI, true);
02770   }
02771 
02772   /// \see promoteOperandForOther.
02773   static Value *zeroExtendOperandForOther(
02774       Instruction *Ext, TypePromotionTransaction &TPT,
02775       InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
02776       SmallVectorImpl<Instruction *> *Exts,
02777       SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
02778     return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
02779                                   Exts, Truncs, TLI, false);
02780   }
02781 
02782 public:
02783   /// Type for the utility function that promotes the operand of Ext.
02784   typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
02785                            InstrToOrigTy &PromotedInsts,
02786                            unsigned &CreatedInstsCost,
02787                            SmallVectorImpl<Instruction *> *Exts,
02788                            SmallVectorImpl<Instruction *> *Truncs,
02789                            const TargetLowering &TLI);
02790   /// \brief Given a sign/zero extend instruction \p Ext, return the approriate
02791   /// action to promote the operand of \p Ext instead of using Ext.
02792   /// \return NULL if no promotable action is possible with the current
02793   /// sign extension.
02794   /// \p InsertedInsts keeps track of all the instructions inserted by the
02795   /// other CodeGenPrepare optimizations. This information is important
02796   /// because we do not want to promote these instructions as CodeGenPrepare
02797   /// will reinsert them later. Thus creating an infinite loop: create/remove.
02798   /// \p PromotedInsts maps the instructions to their type before promotion.
02799   static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
02800                           const TargetLowering &TLI,
02801                           const InstrToOrigTy &PromotedInsts);
02802 };
02803 
02804 bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
02805                                         Type *ConsideredExtType,
02806                                         const InstrToOrigTy &PromotedInsts,
02807                                         bool IsSExt) {
02808   // The promotion helper does not know how to deal with vector types yet.
02809   // To be able to fix that, we would need to fix the places where we
02810   // statically extend, e.g., constants and such.
02811   if (Inst->getType()->isVectorTy())
02812     return false;
02813 
02814   // We can always get through zext.
02815   if (isa<ZExtInst>(Inst))
02816     return true;
02817 
02818   // sext(sext) is ok too.
02819   if (IsSExt && isa<SExtInst>(Inst))
02820     return true;
02821 
02822   // We can get through binary operator, if it is legal. In other words, the
02823   // binary operator must have a nuw or nsw flag.
02824   const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
02825   if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
02826       ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
02827        (IsSExt && BinOp->hasNoSignedWrap())))
02828     return true;
02829 
02830   // Check if we can do the following simplification.
02831   // ext(trunc(opnd)) --> ext(opnd)
02832   if (!isa<TruncInst>(Inst))
02833     return false;
02834 
02835   Value *OpndVal = Inst->getOperand(0);
02836   // Check if we can use this operand in the extension.
02837   // If the type is larger than the result type of the extension, we cannot.
02838   if (!OpndVal->getType()->isIntegerTy() ||
02839       OpndVal->getType()->getIntegerBitWidth() >
02840           ConsideredExtType->getIntegerBitWidth())
02841     return false;
02842 
02843   // If the operand of the truncate is not an instruction, we will not have
02844   // any information on the dropped bits.
02845   // (Actually we could for constant but it is not worth the extra logic).
02846   Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
02847   if (!Opnd)
02848     return false;
02849 
02850   // Check if the source of the type is narrow enough.
02851   // I.e., check that trunc just drops extended bits of the same kind of
02852   // the extension.
02853   // #1 get the type of the operand and check the kind of the extended bits.
02854   const Type *OpndType;
02855   InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
02856   if (It != PromotedInsts.end() && It->second.getInt() == IsSExt)
02857     OpndType = It->second.getPointer();
02858   else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
02859     OpndType = Opnd->getOperand(0)->getType();
02860   else
02861     return false;
02862 
02863   // #2 check that the truncate just drops extended bits.
02864   return Inst->getType()->getIntegerBitWidth() >=
02865          OpndType->getIntegerBitWidth();
02866 }
02867 
02868 TypePromotionHelper::Action TypePromotionHelper::getAction(
02869     Instruction *Ext, const SetOfInstrs &InsertedInsts,
02870     const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
02871   assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
02872          "Unexpected instruction type");
02873   Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
02874   Type *ExtTy = Ext->getType();
02875   bool IsSExt = isa<SExtInst>(Ext);
02876   // If the operand of the extension is not an instruction, we cannot
02877   // get through.
02878   // If it, check we can get through.
02879   if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
02880     return nullptr;
02881 
02882   // Do not promote if the operand has been added by codegenprepare.
02883   // Otherwise, it means we are undoing an optimization that is likely to be
02884   // redone, thus causing potential infinite loop.
02885   if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
02886     return nullptr;
02887 
02888   // SExt or Trunc instructions.
02889   // Return the related handler.
02890   if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
02891       isa<ZExtInst>(ExtOpnd))
02892     return promoteOperandForTruncAndAnyExt;
02893 
02894   // Regular instruction.
02895   // Abort early if we will have to insert non-free instructions.
02896   if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
02897     return nullptr;
02898   return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
02899 }
02900 
02901 Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
02902     llvm::Instruction *SExt, TypePromotionTransaction &TPT,
02903     InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
02904     SmallVectorImpl<Instruction *> *Exts,
02905     SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
02906   // By construction, the operand of SExt is an instruction. Otherwise we cannot
02907   // get through it and this method should not be called.
02908   Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
02909   Value *ExtVal = SExt;
02910   bool HasMergedNonFreeExt = false;
02911   if (isa<ZExtInst>(SExtOpnd)) {
02912     // Replace s|zext(zext(opnd))
02913     // => zext(opnd).
02914     HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
02915     Value *ZExt =
02916         TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
02917     TPT.replaceAllUsesWith(SExt, ZExt);
02918     TPT.eraseInstruction(SExt);
02919     ExtVal = ZExt;
02920   } else {
02921     // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
02922     // => z|sext(opnd).
02923     TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
02924   }
02925   CreatedInstsCost = 0;
02926 
02927   // Remove dead code.
02928   if (SExtOpnd->use_empty())
02929     TPT.eraseInstruction(SExtOpnd);
02930 
02931   // Check if the extension is still needed.
02932   Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
02933   if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
02934     if (ExtInst) {
02935       if (Exts)
02936         Exts->push_back(ExtInst);
02937       CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
02938     }
02939     return ExtVal;
02940   }
02941 
02942   // At this point we have: ext ty opnd to ty.
02943   // Reassign the uses of ExtInst to the opnd and remove ExtInst.
02944   Value *NextVal = ExtInst->getOperand(0);
02945   TPT.eraseInstruction(ExtInst, NextVal);
02946   return NextVal;
02947 }
02948 
02949 Value *TypePromotionHelper::promoteOperandForOther(
02950     Instruction *Ext, TypePromotionTransaction &TPT,
02951     InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
02952     SmallVectorImpl<Instruction *> *Exts,
02953     SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
02954     bool IsSExt) {
02955   // By construction, the operand of Ext is an instruction. Otherwise we cannot
02956   // get through it and this method should not be called.
02957   Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
02958   CreatedInstsCost = 0;
02959   if (!ExtOpnd->hasOneUse()) {
02960     // ExtOpnd will be promoted.
02961     // All its uses, but Ext, will need to use a truncated value of the
02962     // promoted version.
02963     // Create the truncate now.
02964     Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
02965     if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
02966       ITrunc->removeFromParent();
02967       // Insert it just after the definition.
02968       ITrunc->insertAfter(ExtOpnd);
02969       if (Truncs)
02970         Truncs->push_back(ITrunc);
02971     }
02972 
02973     TPT.replaceAllUsesWith(ExtOpnd, Trunc);
02974     // Restore the operand of Ext (which has been replaced by the previous call
02975     // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
02976     TPT.setOperand(Ext, 0, ExtOpnd);
02977   }
02978 
02979   // Get through the Instruction:
02980   // 1. Update its type.
02981   // 2. Replace the uses of Ext by Inst.
02982   // 3. Extend each operand that needs to be extended.
02983 
02984   // Remember the original type of the instruction before promotion.
02985   // This is useful to know that the high bits are sign extended bits.
02986   PromotedInsts.insert(std::pair<Instruction *, TypeIsSExt>(
02987       ExtOpnd, TypeIsSExt(ExtOpnd->getType(), IsSExt)));
02988   // Step #1.
02989   TPT.mutateType(ExtOpnd, Ext->getType());
02990   // Step #2.
02991   TPT.replaceAllUsesWith(Ext, ExtOpnd);
02992   // Step #3.
02993   Instruction *ExtForOpnd = Ext;
02994 
02995   DEBUG(dbgs() << "Propagate Ext to operands\n");
02996   for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
02997        ++OpIdx) {
02998     DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
02999     if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
03000         !shouldExtOperand(ExtOpnd, OpIdx)) {
03001       DEBUG(dbgs() << "No need to propagate\n");
03002       continue;
03003     }
03004     // Check if we can statically extend the operand.
03005     Value *Opnd = ExtOpnd->getOperand(OpIdx);
03006     if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
03007       DEBUG(dbgs() << "Statically extend\n");
03008       unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
03009       APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
03010                             : Cst->getValue().zext(BitWidth);
03011       TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
03012       continue;
03013     }
03014     // UndefValue are typed, so we have to statically sign extend them.
03015     if (isa<UndefValue>(Opnd)) {
03016       DEBUG(dbgs() << "Statically extend\n");
03017       TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
03018       continue;
03019     }
03020 
03021     // Otherwise we have to explicity sign extend the operand.
03022     // Check if Ext was reused to extend an operand.
03023     if (!ExtForOpnd) {
03024       // If yes, create a new one.
03025       DEBUG(dbgs() << "More operands to ext\n");
03026       Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
03027         : TPT.createZExt(Ext, Opnd, Ext->getType());
03028       if (!isa<Instruction>(ValForExtOpnd)) {
03029         TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
03030         continue;
03031       }
03032       ExtForOpnd = cast<Instruction>(ValForExtOpnd);
03033     }
03034     if (Exts)
03035       Exts->push_back(ExtForOpnd);
03036     TPT.setOperand(ExtForOpnd, 0, Opnd);
03037 
03038     // Move the sign extension before the insertion point.
03039     TPT.moveBefore(ExtForOpnd, ExtOpnd);
03040     TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
03041     CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
03042     // If more sext are required, new instructions will have to be created.
03043     ExtForOpnd = nullptr;
03044   }
03045   if (ExtForOpnd == Ext) {
03046     DEBUG(dbgs() << "Extension is useless now\n");
03047     TPT.eraseInstruction(Ext);
03048   }
03049   return ExtOpnd;
03050 }
03051 
03052 /// Check whether or not promoting an instruction to a wider type is profitable.
03053 /// \p NewCost gives the cost of extension instructions created by the
03054 /// promotion.
03055 /// \p OldCost gives the cost of extension instructions before the promotion
03056 /// plus the number of instructions that have been
03057 /// matched in the addressing mode the promotion.
03058 /// \p PromotedOperand is the value that has been promoted.
03059 /// \return True if the promotion is profitable, false otherwise.
03060 bool AddressingModeMatcher::isPromotionProfitable(
03061     unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
03062   DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n');
03063   // The cost of the new extensions is greater than the cost of the
03064   // old extension plus what we folded.
03065   // This is not profitable.
03066   if (NewCost > OldCost)
03067     return false;
03068   if (NewCost < OldCost)
03069     return true;
03070   // The promotion is neutral but it may help folding the sign extension in
03071   // loads for instance.
03072   // Check that we did not create an illegal instruction.
03073   return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
03074 }
03075 
03076 /// Given an instruction or constant expr, see if we can fold the operation
03077 /// into the addressing mode. If so, update the addressing mode and return
03078 /// true, otherwise return false without modifying AddrMode.
03079 /// If \p MovedAway is not NULL, it contains the information of whether or
03080 /// not AddrInst has to be folded into the addressing mode on success.
03081 /// If \p MovedAway == true, \p AddrInst will not be part of the addressing
03082 /// because it has been moved away.
03083 /// Thus AddrInst must not be added in the matched instructions.
03084 /// This state can happen when AddrInst is a sext, since it may be moved away.
03085 /// Therefore, AddrInst may not be valid when MovedAway is true and it must
03086 /// not be referenced anymore.
03087 bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
03088                                                unsigned Depth,
03089                                                bool *MovedAway) {
03090   // Avoid exponential behavior on extremely deep expression trees.
03091   if (Depth >= 5) return false;
03092 
03093   // By default, all matched instructions stay in place.
03094   if (MovedAway)
03095     *MovedAway = false;
03096 
03097   switch (Opcode) {
03098   case Instruction::PtrToInt:
03099     // PtrToInt is always a noop, as we know that the int type is pointer sized.
03100     return matchAddr(AddrInst->getOperand(0), Depth);
03101   case Instruction::IntToPtr: {
03102     auto AS = AddrInst->getType()->getPointerAddressSpace();
03103     auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
03104     // This inttoptr is a no-op if the integer type is pointer sized.
03105     if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
03106       return matchAddr(AddrInst->getOperand(0), Depth);
03107     return false;
03108   }
03109   case Instruction::BitCast:
03110     // BitCast is always a noop, and we can handle it as long as it is
03111     // int->int or pointer->pointer (we don't want int<->fp or something).
03112     if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
03113          AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
03114         // Don't touch identity bitcasts.  These were probably put here by LSR,
03115         // and we don't want to mess around with them.  Assume it knows what it
03116         // is doing.
03117         AddrInst->getOperand(0)->getType() != AddrInst->getType())
03118       return matchAddr(AddrInst->getOperand(0), Depth);
03119     return false;
03120   case Instruction::AddrSpaceCast: {
03121     unsigned SrcAS
03122       = AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
03123     unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
03124     if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
03125       return matchAddr(AddrInst->getOperand(0), Depth);
03126     return false;
03127   }
03128   case Instruction::Add: {
03129     // Check to see if we can merge in the RHS then the LHS.  If so, we win.
03130     ExtAddrMode BackupAddrMode = AddrMode;
03131     unsigned OldSize = AddrModeInsts.size();
03132     // Start a transaction at this point.
03133     // The LHS may match but not the RHS.
03134     // Therefore, we need a higher level restoration point to undo partially
03135     // matched operation.
03136     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
03137         TPT.getRestorationPoint();
03138 
03139     if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
03140         matchAddr(AddrInst->getOperand(0), Depth+1))
03141       return true;
03142 
03143     // Restore the old addr mode info.
03144     AddrMode = BackupAddrMode;
03145     AddrModeInsts.resize(OldSize);
03146     TPT.rollback(LastKnownGood);
03147 
03148     // Otherwise this was over-aggressive.  Try merging in the LHS then the RHS.
03149     if (matchAddr(AddrInst->getOperand(0), Depth+1) &&
03150         matchAddr(AddrInst->getOperand(1), Depth+1))
03151       return true;
03152 
03153     // Otherwise we definitely can't merge the ADD in.
03154     AddrMode = BackupAddrMode;
03155     AddrModeInsts.resize(OldSize);
03156     TPT.rollback(LastKnownGood);
03157     break;
03158   }
03159   //case Instruction::Or:
03160   // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
03161   //break;
03162   case Instruction::Mul:
03163   case Instruction::Shl: {
03164     // Can only handle X*C and X << C.
03165     ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
03166     if (!RHS)
03167       return false;
03168     int64_t Scale = RHS->getSExtValue();
03169     if (Opcode == Instruction::Shl)
03170       Scale = 1LL << Scale;
03171 
03172     return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
03173   }
03174   case Instruction::GetElementPtr: {
03175     // Scan the GEP.  We check it if it contains constant offsets and at most
03176     // one variable offset.
03177     int VariableOperand = -1;
03178     unsigned VariableScale = 0;
03179 
03180     int64_t ConstantOffset = 0;
03181     gep_type_iterator GTI = gep_type_begin(AddrInst);
03182     for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
03183       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
03184         const StructLayout *SL = DL.getStructLayout(STy);
03185         unsigned Idx =
03186           cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
03187         ConstantOffset += SL->getElementOffset(Idx);
03188       } else {
03189         uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
03190         if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
03191           ConstantOffset += CI->getSExtValue()*TypeSize;
03192         } else if (TypeSize) {  // Scales of zero don't do anything.
03193           // We only allow one variable index at the moment.
03194           if (VariableOperand != -1)
03195             return false;
03196 
03197           // Remember the variable index.
03198           VariableOperand = i;
03199           VariableScale = TypeSize;
03200         }
03201       }
03202     }
03203 
03204     // A common case is for the GEP to only do a constant offset.  In this case,
03205     // just add it to the disp field and check validity.
03206     if (VariableOperand == -1) {
03207       AddrMode.BaseOffs += ConstantOffset;
03208       if (ConstantOffset == 0 ||
03209           TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
03210         // Check to see if we can fold the base pointer in too.
03211         if (matchAddr(AddrInst->getOperand(0), Depth+1))
03212           return true;
03213       }
03214       AddrMode.BaseOffs -= ConstantOffset;
03215       return false;
03216     }
03217 
03218     // Save the valid addressing mode in case we can't match.
03219     ExtAddrMode BackupAddrMode = AddrMode;
03220     unsigned OldSize = AddrModeInsts.size();
03221 
03222     // See if the scale and offset amount is valid for this target.
03223     AddrMode.BaseOffs += ConstantOffset;
03224 
03225     // Match the base operand of the GEP.
03226     if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
03227       // If it couldn't be matched, just stuff the value in a register.
03228       if (AddrMode.HasBaseReg) {
03229         AddrMode = BackupAddrMode;
03230         AddrModeInsts.resize(OldSize);
03231         return false;
03232       }
03233       AddrMode.HasBaseReg = true;
03234       AddrMode.BaseReg = AddrInst->getOperand(0);
03235     }
03236 
03237     // Match the remaining variable portion of the GEP.
03238     if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
03239                           Depth)) {
03240       // If it couldn't be matched, try stuffing the base into a register
03241       // instead of matching it, and retrying the match of the scale.
03242       AddrMode = BackupAddrMode;
03243       AddrModeInsts.resize(OldSize);
03244       if (AddrMode.HasBaseReg)
03245         return false;
03246       AddrMode.HasBaseReg = true;
03247       AddrMode.BaseReg = AddrInst->getOperand(0);
03248       AddrMode.BaseOffs += ConstantOffset;
03249       if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
03250                             VariableScale, Depth)) {
03251         // If even that didn't work, bail.
03252         AddrMode = BackupAddrMode;
03253         AddrModeInsts.resize(OldSize);
03254         return false;
03255       }
03256     }
03257 
03258     return true;
03259   }
03260   case Instruction::SExt:
03261   case Instruction::ZExt: {
03262     Instruction *Ext = dyn_cast<Instruction>(AddrInst);
03263     if (!Ext)
03264       return false;
03265 
03266     // Try to move this ext out of the way of the addressing mode.
03267     // Ask for a method for doing so.
03268     TypePromotionHelper::Action TPH =
03269         TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
03270     if (!TPH)
03271       return false;
03272 
03273     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
03274         TPT.getRestorationPoint();
03275     unsigned CreatedInstsCost = 0;
03276     unsigned ExtCost = !TLI.isExtFree(Ext);
03277     Value *PromotedOperand =
03278         TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
03279     // SExt has been moved away.
03280     // Thus either it will be rematched later in the recursive calls or it is
03281     // gone. Anyway, we must not fold it into the addressing mode at this point.
03282     // E.g.,
03283     // op = add opnd, 1
03284     // idx = ext op
03285     // addr = gep base, idx
03286     // is now:
03287     // promotedOpnd = ext opnd            <- no match here
03288     // op = promoted_add promotedOpnd, 1  <- match (later in recursive calls)
03289     // addr = gep base, op                <- match
03290     if (MovedAway)
03291       *MovedAway = true;
03292 
03293     assert(PromotedOperand &&
03294            "TypePromotionHelper should have filtered out those cases");
03295 
03296     ExtAddrMode BackupAddrMode = AddrMode;
03297     unsigned OldSize = AddrModeInsts.size();
03298 
03299     if (!matchAddr(PromotedOperand, Depth) ||
03300         // The total of the new cost is equal to the cost of the created
03301         // instructions.
03302         // The total of the old cost is equal to the cost of the extension plus
03303         // what we have saved in the addressing mode.
03304         !isPromotionProfitable(CreatedInstsCost,
03305                                ExtCost + (AddrModeInsts.size() - OldSize),
03306                                PromotedOperand)) {
03307       AddrMode = BackupAddrMode;
03308       AddrModeInsts.resize(OldSize);
03309       DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
03310       TPT.rollback(LastKnownGood);
03311       return false;
03312     }
03313     return true;
03314   }
03315   }
03316   return false;
03317 }
03318 
03319 /// If we can, try to add the value of 'Addr' into the current addressing mode.
03320 /// If Addr can't be added to AddrMode this returns false and leaves AddrMode
03321 /// unmodified. This assumes that Addr is either a pointer type or intptr_t
03322 /// for the target.
03323 ///
03324 bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
03325   // Start a transaction at this point that we will rollback if the matching
03326   // fails.
03327   TypePromotionTransaction::ConstRestorationPt LastKnownGood =
03328       TPT.getRestorationPoint();
03329   if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
03330     // Fold in immediates if legal for the target.
03331     AddrMode.BaseOffs += CI->getSExtValue();
03332     if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
03333       return true;
03334     AddrMode.BaseOffs -= CI->getSExtValue();
03335   } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
03336     // If this is a global variable, try to fold it into the addressing mode.
03337     if (!AddrMode.BaseGV) {
03338       AddrMode.BaseGV = GV;
03339       if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
03340         return true;
03341       AddrMode.BaseGV = nullptr;
03342     }
03343   } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
03344     ExtAddrMode BackupAddrMode = AddrMode;
03345     unsigned OldSize = AddrModeInsts.size();
03346 
03347     // Check to see if it is possible to fold this operation.
03348     bool MovedAway = false;
03349     if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
03350       // This instruction may have been moved away. If so, there is nothing
03351       // to check here.
03352       if (MovedAway)
03353         return true;
03354       // Okay, it's possible to fold this.  Check to see if it is actually
03355       // *profitable* to do so.  We use a simple cost model to avoid increasing
03356       // register pressure too much.
03357       if (I->hasOneUse() ||
03358           isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
03359         AddrModeInsts.push_back(I);
03360         return true;
03361       }
03362 
03363       // It isn't profitable to do this, roll back.
03364       //cerr << "NOT FOLDING: " << *I;
03365       AddrMode = BackupAddrMode;
03366       AddrModeInsts.resize(OldSize);
03367       TPT.rollback(LastKnownGood);
03368     }
03369   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
03370     if (matchOperationAddr(CE, CE->getOpcode(), Depth))
03371       return true;
03372     TPT.rollback(LastKnownGood);
03373   } else if (isa<ConstantPointerNull>(Addr)) {
03374     // Null pointer gets folded without affecting the addressing mode.
03375     return true;
03376   }
03377 
03378   // Worse case, the target should support [reg] addressing modes. :)
03379   if (!AddrMode.HasBaseReg) {
03380     AddrMode.HasBaseReg = true;
03381     AddrMode.BaseReg = Addr;
03382     // Still check for legality in case the target supports [imm] but not [i+r].
03383     if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
03384       return true;
03385     AddrMode.HasBaseReg = false;
03386     AddrMode.BaseReg = nullptr;
03387   }
03388 
03389   // If the base register is already taken, see if we can do [r+r].
03390   if (AddrMode.Scale == 0) {
03391     AddrMode.Scale = 1;
03392     AddrMode.ScaledReg = Addr;
03393     if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
03394       return true;
03395     AddrMode.Scale = 0;
03396     AddrMode.ScaledReg = nullptr;
03397   }
03398   // Couldn't match.
03399   TPT.rollback(LastKnownGood);
03400   return false;
03401 }
03402 
03403 /// Check to see if all uses of OpVal by the specified inline asm call are due
03404 /// to memory operands. If so, return true, otherwise return false.
03405 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
03406                                     const TargetMachine &TM) {
03407   const Function *F = CI->getParent()->getParent();
03408   const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering();
03409   const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo();
03410   TargetLowering::AsmOperandInfoVector TargetConstraints =
03411       TLI->ParseConstraints(F->getParent()->getDataLayout(), TRI,
03412                             ImmutableCallSite(CI));
03413   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
03414     TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
03415 
03416     // Compute the constraint code and ConstraintType to use.
03417     TLI->ComputeConstraintToUse(OpInfo, SDValue());
03418 
03419     // If this asm operand is our Value*, and if it isn't an indirect memory
03420     // operand, we can't fold it!
03421     if (OpInfo.CallOperandVal == OpVal &&
03422         (OpInfo.ConstraintType != TargetLowering::C_Memory ||
03423          !OpInfo.isIndirect))
03424       return false;
03425   }
03426 
03427   return true;
03428 }
03429 
03430 /// Recursively walk all the uses of I until we find a memory use.
03431 /// If we find an obviously non-foldable instruction, return true.
03432 /// Add the ultimately found memory instructions to MemoryUses.
03433 static bool FindAllMemoryUses(
03434     Instruction *I,
03435     SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
03436     SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetMachine &TM) {
03437   // If we already considered this instruction, we're done.
03438   if (!ConsideredInsts.insert(I).second)
03439     return false;
03440 
03441   // If this is an obviously unfoldable instruction, bail out.
03442   if (!MightBeFoldableInst(I))
03443     return true;
03444 
03445   // Loop over all the uses, recursively processing them.
03446   for (Use &U : I->uses()) {
03447     Instruction *UserI = cast<Instruction>(U.getUser());
03448 
03449     if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
03450       MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
03451       continue;
03452     }
03453 
03454     if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
03455       unsigned opNo = U.getOperandNo();
03456       if (opNo == 0) return true; // Storing addr, not into addr.
03457       MemoryUses.push_back(std::make_pair(SI, opNo));
03458       continue;
03459     }
03460 
03461     if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
03462       InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
03463       if (!IA) return true;
03464 
03465       // If this is a memory operand, we're cool, otherwise bail out.
03466       if (!IsOperandAMemoryOperand(CI, IA, I, TM))
03467         return true;
03468       continue;
03469     }
03470 
03471     if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TM))
03472       return true;
03473   }
03474 
03475   return false;
03476 }
03477 
03478 /// Return true if Val is already known to be live at the use site that we're
03479 /// folding it into. If so, there is no cost to include it in the addressing
03480 /// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
03481 /// instruction already.
03482 bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
03483                                                    Value *KnownLive2) {
03484   // If Val is either of the known-live values, we know it is live!
03485   if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
03486     return true;
03487 
03488   // All values other than instructions and arguments (e.g. constants) are live.
03489   if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
03490 
03491   // If Val is a constant sized alloca in the entry block, it is live, this is
03492   // true because it is just a reference to the stack/frame pointer, which is
03493   // live for the whole function.
03494   if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
03495     if (AI->isStaticAlloca())
03496       return true;
03497 
03498   // Check to see if this value is already used in the memory instruction's
03499   // block.  If so, it's already live into the block at the very least, so we
03500   // can reasonably fold it.
03501   return Val->isUsedInBasicBlock(MemoryInst->getParent());
03502 }
03503 
03504 /// It is possible for the addressing mode of the machine to fold the specified
03505 /// instruction into a load or store that ultimately uses it.
03506 /// However, the specified instruction has multiple uses.
03507 /// Given this, it may actually increase register pressure to fold it
03508 /// into the load. For example, consider this code:
03509 ///
03510 ///     X = ...
03511 ///     Y = X+1
03512 ///     use(Y)   -> nonload/store
03513 ///     Z = Y+1
03514 ///     load Z
03515 ///
03516 /// In this case, Y has multiple uses, and can be folded into the load of Z
03517 /// (yielding load [X+2]).  However, doing this will cause both "X" and "X+1" to
03518 /// be live at the use(Y) line.  If we don't fold Y into load Z, we use one
03519 /// fewer register.  Since Y can't be folded into "use(Y)" we don't increase the
03520 /// number of computations either.
03521 ///
03522 /// Note that this (like most of CodeGenPrepare) is just a rough heuristic.  If
03523 /// X was live across 'load Z' for other reasons, we actually *would* want to
03524 /// fold the addressing mode in the Z case.  This would make Y die earlier.
03525 bool AddressingModeMatcher::
03526 isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
03527                                      ExtAddrMode &AMAfter) {
03528   if (IgnoreProfitability) return true;
03529 
03530   // AMBefore is the addressing mode before this instruction was folded into it,
03531   // and AMAfter is the addressing mode after the instruction was folded.  Get
03532   // the set of registers referenced by AMAfter and subtract out those
03533   // referenced by AMBefore: this is the set of values which folding in this
03534   // address extends the lifetime of.
03535   //
03536   // Note that there are only two potential values being referenced here,
03537   // BaseReg and ScaleReg (global addresses are always available, as are any
03538   // folded immediates).
03539   Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
03540 
03541   // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
03542   // lifetime wasn't extended by adding this instruction.
03543   if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
03544     BaseReg = nullptr;
03545   if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
03546     ScaledReg = nullptr;
03547 
03548   // If folding this instruction (and it's subexprs) didn't extend any live
03549   // ranges, we're ok with it.
03550   if (!BaseReg && !ScaledReg)
03551     return true;
03552 
03553   // If all uses of this instruction are ultimately load/store/inlineasm's,
03554   // check to see if their addressing modes will include this instruction.  If
03555   // so, we can fold it into all uses, so it doesn't matter if it has multiple
03556   // uses.
03557   SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
03558   SmallPtrSet<Instruction*, 16> ConsideredInsts;
03559   if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM))
03560     return false;  // Has a non-memory, non-foldable use!
03561 
03562   // Now that we know that all uses of this instruction are part of a chain of
03563   // computation involving only operations that could theoretically be folded
03564   // into a memory use, loop over each of these uses and see if they could
03565   // *actually* fold the instruction.
03566   SmallVector<Instruction*, 32> MatchedAddrModeInsts;
03567   for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
03568     Instruction *User = MemoryUses[i].first;
03569     unsigned OpNo = MemoryUses[i].second;
03570 
03571     // Get the access type of this use.  If the use isn't a pointer, we don't
03572     // know what it accesses.
03573     Value *Address = User->getOperand(OpNo);
03574     PointerType *AddrTy = dyn_cast<PointerType>(Address->getType());
03575     if (!AddrTy)
03576       return false;
03577     Type *AddressAccessTy = AddrTy->getElementType();
03578     unsigned AS = AddrTy->getAddressSpace();
03579 
03580     // Do a match against the root of this address, ignoring profitability. This
03581     // will tell us if the addressing mode for the memory operation will
03582     // *actually* cover the shared instruction.
03583     ExtAddrMode Result;
03584     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
03585         TPT.getRestorationPoint();
03586     AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy, AS,
03587                                   MemoryInst, Result, InsertedInsts,
03588                                   PromotedInsts, TPT);
03589     Matcher.IgnoreProfitability = true;
03590     bool Success = Matcher.matchAddr(Address, 0);
03591     (void)Success; assert(Success && "Couldn't select *anything*?");
03592 
03593     // The match was to check the profitability, the changes made are not
03594     // part of the original matcher. Therefore, they should be dropped
03595     // otherwise the original matcher will not present the right state.
03596     TPT.rollback(LastKnownGood);
03597 
03598     // If the match didn't cover I, then it won't be shared by it.
03599     if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
03600                   I) == MatchedAddrModeInsts.end())
03601       return false;
03602 
03603     MatchedAddrModeInsts.clear();
03604   }
03605 
03606   return true;
03607 }
03608 
03609 } // end anonymous namespace
03610 
03611 /// Return true if the specified values are defined in a
03612 /// different basic block than BB.
03613 static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
03614   if (Instruction *I = dyn_cast<Instruction>(V))
03615     return I->getParent() != BB;
03616   return false;
03617 }
03618 
03619 /// Load and Store Instructions often have addressing modes that can do
03620 /// significant amounts of computation. As such, instruction selection will try
03621 /// to get the load or store to do as much computation as possible for the
03622 /// program. The problem is that isel can only see within a single block. As
03623 /// such, we sink as much legal addressing mode work into the block as possible.
03624 ///
03625 /// This method is used to optimize both load/store and inline asms with memory
03626 /// operands.
03627 bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
03628                                         Type *AccessTy, unsigned AddrSpace) {
03629   Value *Repl = Addr;
03630 
03631   // Try to collapse single-value PHI nodes.  This is necessary to undo
03632   // unprofitable PRE transformations.
03633   SmallVector<Value*, 8> worklist;
03634   SmallPtrSet<Value*, 16> Visited;
03635   worklist.push_back(Addr);
03636 
03637   // Use a worklist to iteratively look through PHI nodes, and ensure that
03638   // the addressing mode obtained from the non-PHI roots of the graph
03639   // are equivalent.
03640   Value *Consensus = nullptr;
03641   unsigned NumUsesConsensus = 0;
03642   bool IsNumUsesConsensusValid = false;
03643   SmallVector<Instruction*, 16> AddrModeInsts;
03644   ExtAddrMode AddrMode;
03645   TypePromotionTransaction TPT;
03646   TypePromotionTransaction::ConstRestorationPt LastKnownGood =
03647       TPT.getRestorationPoint();
03648   while (!worklist.empty()) {
03649     Value *V = worklist.back();
03650     worklist.pop_back();
03651 
03652     // Break use-def graph loops.
03653     if (!Visited.insert(V).second) {
03654       Consensus = nullptr;
03655       break;
03656     }
03657 
03658     // For a PHI node, push all of its incoming values.
03659     if (PHINode *P = dyn_cast<PHINode>(V)) {
03660       for (Value *IncValue : P->incoming_values())
03661         worklist.push_back(IncValue);
03662       continue;
03663     }
03664 
03665     // For non-PHIs, determine the addressing mode being computed.
03666     SmallVector<Instruction*, 16> NewAddrModeInsts;
03667     ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
03668       V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TM,
03669       InsertedInsts, PromotedInsts, TPT);
03670 
03671     // This check is broken into two cases with very similar code to avoid using
03672     // getNumUses() as much as possible. Some values have a lot of uses, so
03673     // calling getNumUses() unconditionally caused a significant compile-time
03674     // regression.
03675     if (!Consensus) {
03676       Consensus = V;
03677       AddrMode = NewAddrMode;
03678       AddrModeInsts = NewAddrModeInsts;
03679       continue;
03680     } else if (NewAddrMode == AddrMode) {
03681       if (!IsNumUsesConsensusValid) {
03682         NumUsesConsensus = Consensus->getNumUses();
03683         IsNumUsesConsensusValid = true;
03684       }
03685 
03686       // Ensure that the obtained addressing mode is equivalent to that obtained
03687       // for all other roots of the PHI traversal.  Also, when choosing one
03688       // such root as representative, select the one with the most uses in order
03689       // to keep the cost modeling heuristics in AddressingModeMatcher
03690       // applicable.
03691       unsigned NumUses = V->getNumUses();
03692       if (NumUses > NumUsesConsensus) {
03693         Consensus = V;
03694         NumUsesConsensus = NumUses;
03695         AddrModeInsts = NewAddrModeInsts;
03696       }
03697       continue;
03698     }
03699 
03700     Consensus = nullptr;
03701     break;
03702   }
03703 
03704   // If the addressing mode couldn't be determined, or if multiple different
03705   // ones were determined, bail out now.
03706   if (!Consensus) {
03707     TPT.rollback(LastKnownGood);
03708     return false;
03709   }
03710   TPT.commit();
03711 
03712   // Check to see if any of the instructions supersumed by this addr mode are
03713   // non-local to I's BB.
03714   bool AnyNonLocal = false;
03715   for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
03716     if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
03717       AnyNonLocal = true;
03718       break;
03719     }
03720   }
03721 
03722   // If all the instructions matched are already in this BB, don't do anything.
03723   if (!AnyNonLocal) {
03724     DEBUG(dbgs() << "CGP: Found      local addrmode: " << AddrMode << "\n");
03725     return false;
03726   }
03727 
03728   // Insert this computation right after this user.  Since our caller is
03729   // scanning from the top of the BB to the bottom, reuse of the expr are
03730   // guaranteed to happen later.
03731   IRBuilder<> Builder(MemoryInst);
03732 
03733   // Now that we determined the addressing expression we want to use and know
03734   // that we have to sink it into this block.  Check to see if we have already
03735   // done this for some other load/store instr in this block.  If so, reuse the
03736   // computation.
03737   Value *&SunkAddr = SunkAddrs[Addr];
03738   if (SunkAddr) {
03739     DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
03740                  << *MemoryInst << "\n");
03741     if (SunkAddr->getType() != Addr->getType())
03742       SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
03743   } else if (AddrSinkUsingGEPs ||
03744              (!AddrSinkUsingGEPs.getNumOccurrences() && TM &&
03745               TM->getSubtargetImpl(*MemoryInst->getParent()->getParent())
03746                   ->useAA())) {
03747     // By default, we use the GEP-based method when AA is used later. This
03748     // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
03749     DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
03750                  << *MemoryInst << "\n");
03751     Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
03752     Value *ResultPtr = nullptr, *ResultIndex = nullptr;
03753 
03754     // First, find the pointer.
03755     if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
03756       ResultPtr = AddrMode.BaseReg;
03757       AddrMode.BaseReg = nullptr;
03758     }
03759 
03760     if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
03761       // We can't add more than one pointer together, nor can we scale a
03762       // pointer (both of which seem meaningless).
03763       if (ResultPtr || AddrMode.Scale != 1)
03764         return false;
03765 
03766       ResultPtr = AddrMode.ScaledReg;
03767       AddrMode.Scale = 0;
03768     }
03769 
03770     if (AddrMode.BaseGV) {
03771       if (ResultPtr)
03772         return false;
03773 
03774       ResultPtr = AddrMode.BaseGV;
03775     }
03776 
03777     // If the real base value actually came from an inttoptr, then the matcher
03778     // will look through it and provide only the integer value. In that case,
03779     // use it here.
03780     if (!ResultPtr && AddrMode.BaseReg) {
03781       ResultPtr =
03782         Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr");
03783       AddrMode.BaseReg = nullptr;
03784     } else if (!ResultPtr && AddrMode.Scale == 1) {
03785       ResultPtr =
03786         Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr");
03787       AddrMode.Scale = 0;
03788     }
03789 
03790     if (!ResultPtr &&
03791         !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
03792       SunkAddr = Constant::getNullValue(Addr->getType());
03793     } else if (!ResultPtr) {
03794       return false;
03795     } else {
03796       Type *I8PtrTy =
03797           Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
03798       Type *I8Ty = Builder.getInt8Ty();
03799 
03800       // Start with the base register. Do this first so that subsequent address
03801       // matching finds it last, which will prevent it from trying to match it
03802       // as the scaled value in case it happens to be a mul. That would be
03803       // problematic if we've sunk a different mul for the scale, because then
03804       // we'd end up sinking both muls.
03805       if (AddrMode.BaseReg) {
03806         Value *V = AddrMode.BaseReg;
03807         if (V->getType() != IntPtrTy)
03808           V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
03809 
03810         ResultIndex = V;
03811       }
03812 
03813       // Add the scale value.
03814       if (AddrMode.Scale) {
03815         Value *V = AddrMode.ScaledReg;
03816         if (V->getType() == IntPtrTy) {
03817           // done.
03818         } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
03819                    cast<IntegerType>(V->getType())->getBitWidth()) {
03820           V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
03821         } else {
03822           // It is only safe to sign extend the BaseReg if we know that the math
03823           // required to create it did not overflow before we extend it. Since
03824           // the original IR value was tossed in favor of a constant back when
03825           // the AddrMode was created we need to bail out gracefully if widths
03826           // do not match instead of extending it.
03827           Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex);
03828           if (I && (ResultIndex != AddrMode.BaseReg))
03829             I->eraseFromParent();
03830           return false;
03831         }
03832 
03833         if (AddrMode.Scale != 1)
03834           V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
03835                                 "sunkaddr");
03836         if (ResultIndex)
03837           ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
03838         else
03839           ResultIndex = V;
03840       }
03841 
03842       // Add in the Base Offset if present.
03843       if (AddrMode.BaseOffs) {
03844         Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
03845         if (ResultIndex) {
03846           // We need to add this separately from the scale above to help with
03847           // SDAG consecutive load/store merging.
03848           if (ResultPtr->getType() != I8PtrTy)
03849             ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
03850           ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
03851         }
03852 
03853         ResultIndex = V;
03854       }
03855 
03856       if (!ResultIndex) {
03857         SunkAddr = ResultPtr;
03858       } else {
03859         if (ResultPtr->getType() != I8PtrTy)
03860           ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
03861         SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
03862       }
03863 
03864       if (SunkAddr->getType() != Addr->getType())
03865         SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
03866     }
03867   } else {
03868     DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
03869                  << *MemoryInst << "\n");
03870     Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
03871     Value *Result = nullptr;
03872 
03873     // Start with the base register. Do this first so that subsequent address
03874     // matching finds it last, which will prevent it from trying to match it
03875     // as the scaled value in case it happens to be a mul. That would be
03876     // problematic if we've sunk a different mul for the scale, because then
03877     // we'd end up sinking both muls.
03878     if (AddrMode.BaseReg) {
03879       Value *V = AddrMode.BaseReg;
03880       if (V->getType()->isPointerTy())
03881         V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
03882       if (V->getType() != IntPtrTy)
03883         V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
03884       Result = V;
03885     }
03886 
03887     // Add the scale value.
03888     if (AddrMode.Scale) {
03889       Value *V = AddrMode.ScaledReg;
03890       if (V->getType() == IntPtrTy) {
03891         // done.
03892       } else if (V->getType()->isPointerTy()) {
03893         V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
03894       } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
03895                  cast<IntegerType>(V->getType())->getBitWidth()) {
03896         V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
03897       } else {
03898         // It is only safe to sign extend the BaseReg if we know that the math
03899         // required to create it did not overflow before we extend it. Since
03900         // the original IR value was tossed in favor of a constant back when
03901         // the AddrMode was created we need to bail out gracefully if widths
03902         // do not match instead of extending it.
03903         Instruction *I = dyn_cast_or_null<Instruction>(Result);
03904         if (I && (Result != AddrMode.BaseReg))
03905           I->eraseFromParent();
03906         return false;
03907       }
03908       if (AddrMode.Scale != 1)
03909         V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
03910                               "sunkaddr");
03911       if (Result)
03912         Result = Builder.CreateAdd(Result, V, "sunkaddr");
03913       else
03914         Result = V;
03915     }
03916 
03917     // Add in the BaseGV if present.
03918     if (AddrMode.BaseGV) {
03919       Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
03920       if (Result)
03921         Result = Builder.CreateAdd(Result, V, "sunkaddr");
03922       else
03923         Result = V;
03924     }
03925 
03926     // Add in the Base Offset if present.
03927     if (AddrMode.BaseOffs) {
03928       Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
03929       if (Result)
03930         Result = Builder.CreateAdd(Result, V, "sunkaddr");
03931       else
03932         Result = V;
03933     }
03934 
03935     if (!Result)
03936       SunkAddr = Constant::getNullValue(Addr->getType());
03937     else
03938       SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
03939   }
03940 
03941   MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
03942 
03943   // If we have no uses, recursively delete the value and all dead instructions
03944   // using it.
03945   if (Repl->use_empty()) {
03946     // This can cause recursive deletion, which can invalidate our iterator.
03947     // Use a WeakVH to hold onto it in case this happens.
03948     WeakVH IterHandle(&*CurInstIterator);
03949     BasicBlock *BB = CurInstIterator->getParent();
03950 
03951     RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
03952 
03953     if (IterHandle != CurInstIterator.getNodePtrUnchecked()) {
03954       // If the iterator instruction was recursively deleted, start over at the
03955       // start of the block.
03956       CurInstIterator = BB->begin();
03957       SunkAddrs.clear();
03958     }
03959   }
03960   ++NumMemoryInsts;
03961   return true;
03962 }
03963 
03964 /// If there are any memory operands, use OptimizeMemoryInst to sink their
03965 /// address computing into the block when possible / profitable.
03966 bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
03967   bool MadeChange = false;
03968 
03969   const TargetRegisterInfo *TRI =
03970       TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo();
03971   TargetLowering::AsmOperandInfoVector TargetConstraints =
03972       TLI->ParseConstraints(*DL, TRI, CS);
03973   unsigned ArgNo = 0;
03974   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
03975     TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
03976 
03977     // Compute the constraint code and ConstraintType to use.
03978     TLI->ComputeConstraintToUse(OpInfo, SDValue());
03979 
03980     if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
03981         OpInfo.isIndirect) {
03982       Value *OpVal = CS->getArgOperand(ArgNo++);
03983       MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
03984     } else if (OpInfo.Type == InlineAsm::isInput)
03985       ArgNo++;
03986   }
03987 
03988   return MadeChange;
03989 }
03990 
03991 /// \brief Check if all the uses of \p Inst are equivalent (or free) zero or
03992 /// sign extensions.
03993 static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
03994   assert(!Inst->use_empty() && "Input must have at least one use");
03995   const Instruction *FirstUser = cast<Instruction>(*Inst->user_begin());
03996   bool IsSExt = isa<SExtInst>(FirstUser);
03997   Type *ExtTy = FirstUser->getType();
03998   for (const User *U : Inst->users()) {
03999     const Instruction *UI = cast<Instruction>(U);
04000     if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
04001       return false;
04002     Type *CurTy = UI->getType();
04003     // Same input and output types: Same instruction after CSE.
04004     if (CurTy == ExtTy)
04005       continue;
04006 
04007     // If IsSExt is true, we are in this situation:
04008     // a = Inst
04009     // b = sext ty1 a to ty2
04010     // c = sext ty1 a to ty3
04011     // Assuming ty2 is shorter than ty3, this could be turned into:
04012     // a = Inst
04013     // b = sext ty1 a to ty2
04014     // c = sext ty2 b to ty3
04015     // However, the last sext is not free.
04016     if (IsSExt)
04017       return false;
04018 
04019     // This is a ZExt, maybe this is free to extend from one type to another.
04020     // In that case, we would not account for a different use.
04021     Type *NarrowTy;
04022     Type *LargeTy;
04023     if (ExtTy->getScalarType()->getIntegerBitWidth() >
04024         CurTy->getScalarType()->getIntegerBitWidth()) {
04025       NarrowTy = CurTy;
04026       LargeTy = ExtTy;
04027     } else {
04028       NarrowTy = ExtTy;
04029       LargeTy = CurTy;
04030     }
04031 
04032     if (!TLI.isZExtFree(NarrowTy, LargeTy))
04033       return false;
04034   }
04035   // All uses are the same or can be derived from one another for free.
04036   return true;
04037 }
04038 
04039 /// \brief Try to form ExtLd by promoting \p Exts until they reach a
04040 /// load instruction.
04041 /// If an ext(load) can be formed, it is returned via \p LI for the load
04042 /// and \p Inst for the extension.
04043 /// Otherwise LI == nullptr and Inst == nullptr.
04044 /// When some promotion happened, \p TPT contains the proper state to
04045 /// revert them.
04046 ///
04047 /// \return true when promoting was necessary to expose the ext(load)
04048 /// opportunity, false otherwise.
04049 ///
04050 /// Example:
04051 /// \code
04052 /// %ld = load i32* %addr
04053 /// %add = add nuw i32 %ld, 4
04054 /// %zext = zext i32 %add to i64
04055 /// \endcode
04056 /// =>
04057 /// \code
04058 /// %ld = load i32* %addr
04059 /// %zext = zext i32 %ld to i64
04060 /// %add = add nuw i64 %zext, 4
04061 /// \encode
04062 /// Thanks to the promotion, we can match zext(load i32*) to i64.
04063 bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT,
04064                                     LoadInst *&LI, Instruction *&Inst,
04065                                     const SmallVectorImpl<Instruction *> &Exts,
04066                                     unsigned CreatedInstsCost = 0) {
04067   // Iterate over all the extensions to see if one form an ext(load).
04068   for (auto I : Exts) {
04069     // Check if we directly have ext(load).
04070     if ((LI = dyn_cast<LoadInst>(I->getOperand(0)))) {
04071       Inst = I;
04072       // No promotion happened here.
04073       return false;
04074     }
04075     // Check whether or not we want to do any promotion.
04076     if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
04077       continue;
04078     // Get the action to perform the promotion.
04079     TypePromotionHelper::Action TPH = TypePromotionHelper::getAction(
04080         I, InsertedInsts, *TLI, PromotedInsts);
04081     // Check if we can promote.
04082     if (!TPH)
04083       continue;
04084     // Save the current state.
04085     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
04086         TPT.getRestorationPoint();
04087     SmallVector<Instruction *, 4> NewExts;
04088     unsigned NewCreatedInstsCost = 0;
04089     unsigned ExtCost = !TLI->isExtFree(I);
04090     // Promote.
04091     Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
04092                              &NewExts, nullptr, *TLI);
04093     assert(PromotedVal &&
04094            "TypePromotionHelper should have filtered out those cases");
04095 
04096     // We would be able to merge only one extension in a load.
04097     // Therefore, if we have more than 1 new extension we heuristically
04098     // cut this search path, because it means we degrade the code quality.
04099     // With exactly 2, the transformation is neutral, because we will merge
04100     // one extension but leave one. However, we optimistically keep going,
04101     // because the new extension may be removed too.
04102     long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
04103     TotalCreatedInstsCost -= ExtCost;
04104     if (!StressExtLdPromotion &&
04105         (TotalCreatedInstsCost > 1 ||
04106          !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
04107       // The promotion is not profitable, rollback to the previous state.
04108       TPT.rollback(LastKnownGood);
04109       continue;
04110     }
04111     // The promotion is profitable.
04112     // Check if it exposes an ext(load).
04113     (void)extLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
04114     if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
04115                // If we have created a new extension, i.e., now we have two
04116                // extensions. We must make sure one of them is merged with
04117                // the load, otherwise we may degrade the code quality.
04118                (LI->hasOneUse() || hasSameExtUse(LI, *TLI))))
04119       // Promotion happened.
04120       return true;
04121     // If this does not help to expose an ext(load) then, rollback.
04122     TPT.rollback(LastKnownGood);
04123   }
04124   // None of the extension can form an ext(load).
04125   LI = nullptr;
04126   Inst = nullptr;
04127   return false;
04128 }
04129 
04130 /// Move a zext or sext fed by a load into the same basic block as the load,
04131 /// unless conditions are unfavorable. This allows SelectionDAG to fold the
04132 /// extend into the load.
04133 /// \p I[in/out] the extension may be modified during the process if some
04134 /// promotions apply.
04135 ///
04136 bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
04137   // Try to promote a chain of computation if it allows to form
04138   // an extended load.
04139   TypePromotionTransaction TPT;
04140   TypePromotionTransaction::ConstRestorationPt LastKnownGood =
04141     TPT.getRestorationPoint();
04142   SmallVector<Instruction *, 1> Exts;
04143   Exts.push_back(I);
04144   // Look for a load being extended.
04145   LoadInst *LI = nullptr;
04146   Instruction *OldExt = I;
04147   bool HasPromoted = extLdPromotion(TPT, LI, I, Exts);
04148   if (!LI || !I) {
04149     assert(!HasPromoted && !LI && "If we did not match any load instruction "
04150                                   "the code must remain the same");
04151     I = OldExt;
04152     return false;
04153   }
04154 
04155   // If they're already in the same block, there's nothing to do.
04156   // Make the cheap checks first if we did not promote.
04157   // If we promoted, we need to check if it is indeed profitable.
04158   if (!HasPromoted && LI->getParent() == I->getParent())
04159     return false;
04160 
04161   EVT VT = TLI->getValueType(*DL, I->getType());
04162   EVT LoadVT = TLI->getValueType(*DL, LI->getType());
04163 
04164   // If the load has other users and the truncate is not free, this probably
04165   // isn't worthwhile.
04166   if (!LI->hasOneUse() && TLI &&
04167       (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
04168       !TLI->isTruncateFree(I->getType(), LI->getType())) {
04169     I = OldExt;
04170     TPT.rollback(LastKnownGood);
04171     return false;
04172   }
04173 
04174   // Check whether the target supports casts folded into loads.
04175   unsigned LType;
04176   if (isa<ZExtInst>(I))
04177     LType = ISD::ZEXTLOAD;
04178   else {
04179     assert(isa<SExtInst>(I) && "Unexpected ext type!");
04180     LType = ISD::SEXTLOAD;
04181   }
04182   if (TLI && !TLI->isLoadExtLegal(LType, VT, LoadVT)) {
04183     I = OldExt;
04184     TPT.rollback(LastKnownGood);
04185     return false;
04186   }
04187 
04188   // Move the extend into the same block as the load, so that SelectionDAG
04189   // can fold it.
04190   TPT.commit();
04191   I->removeFromParent();
04192   I->insertAfter(LI);
04193   ++NumExtsMoved;
04194   return true;
04195 }
04196 
04197 bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
04198   BasicBlock *DefBB = I->getParent();
04199 
04200   // If the result of a {s|z}ext and its source are both live out, rewrite all
04201   // other uses of the source with result of extension.
04202   Value *Src = I->getOperand(0);
04203   if (Src->hasOneUse())
04204     return false;
04205 
04206   // Only do this xform if truncating is free.
04207   if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
04208     return false;
04209 
04210   // Only safe to perform the optimization if the source is also defined in
04211   // this block.
04212   if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
04213     return false;
04214 
04215   bool DefIsLiveOut = false;
04216   for (User *U : I->users()) {
04217     Instruction *UI = cast<Instruction>(U);
04218 
04219     // Figure out which BB this ext is used in.
04220     BasicBlock *UserBB = UI->getParent();
04221     if (UserBB == DefBB) continue;
04222     DefIsLiveOut = true;
04223     break;
04224   }
04225   if (!DefIsLiveOut)
04226     return false;
04227 
04228   // Make sure none of the uses are PHI nodes.
04229   for (User *U : Src->users()) {
04230     Instruction *UI = cast<Instruction>(U);
04231     BasicBlock *UserBB = UI->getParent();
04232     if (UserBB == DefBB) continue;
04233     // Be conservative. We don't want this xform to end up introducing
04234     // reloads just before load / store instructions.
04235     if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
04236       return false;
04237   }
04238 
04239   // InsertedTruncs - Only insert one trunc in each block once.
04240   DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
04241 
04242   bool MadeChange = false;
04243   for (Use &U : Src->uses()) {
04244     Instruction *User = cast<Instruction>(U.getUser());
04245 
04246     // Figure out which BB this ext is used in.
04247     BasicBlock *UserBB = User->getParent();
04248     if (UserBB == DefBB) continue;
04249 
04250     // Both src and def are live in this block. Rewrite the use.
04251     Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
04252 
04253     if (!InsertedTrunc) {
04254       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
04255       assert(InsertPt != UserBB->end());
04256       InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt);
04257       InsertedInsts.insert(InsertedTrunc);
04258     }
04259 
04260     // Replace a use of the {s|z}ext source with a use of the result.
04261     U = InsertedTrunc;
04262     ++NumExtUses;
04263     MadeChange = true;
04264   }
04265 
04266   return MadeChange;
04267 }
04268 
04269 // Find loads whose uses only use some of the loaded value's bits.  Add an "and"
04270 // just after the load if the target can fold this into one extload instruction,
04271 // with the hope of eliminating some of the other later "and" instructions using
04272 // the loaded value.  "and"s that are made trivially redundant by the insertion
04273 // of the new "and" are removed by this function, while others (e.g. those whose
04274 // path from the load goes through a phi) are left for isel to potentially
04275 // remove.
04276 //
04277 // For example:
04278 //
04279 // b0:
04280 //   x = load i32
04281 //   ...
04282 // b1:
04283 //   y = and x, 0xff
04284 //   z = use y
04285 //
04286 // becomes:
04287 //
04288 // b0:
04289 //   x = load i32
04290 //   x' = and x, 0xff
04291 //   ...
04292 // b1:
04293 //   z = use x'
04294 //
04295 // whereas:
04296 //
04297 // b0:
04298 //   x1 = load i32
04299 //   ...
04300 // b1:
04301 //   x2 = load i32
04302 //   ...
04303 // b2:
04304 //   x = phi x1, x2
04305 //   y = and x, 0xff
04306 //
04307 // becomes (after a call to optimizeLoadExt for each load):
04308 //
04309 // b0:
04310 //   x1 = load i32
04311 //   x1' = and x1, 0xff
04312 //   ...
04313 // b1:
04314 //   x2 = load i32
04315 //   x2' = and x2, 0xff
04316 //   ...
04317 // b2:
04318 //   x = phi x1', x2'
04319 //   y = and x, 0xff
04320 //
04321 
04322 bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
04323 
04324   if (!Load->isSimple() ||
04325       !(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy()))
04326     return false;
04327 
04328   // Skip loads we've already transformed or have no reason to transform.
04329   if (Load->hasOneUse()) {
04330     User *LoadUser = *Load->user_begin();
04331     if (cast<Instruction>(LoadUser)->getParent() == Load->getParent() &&
04332         !dyn_cast<PHINode>(LoadUser))
04333       return false;
04334   }
04335 
04336   // Look at all uses of Load, looking through phis, to determine how many bits
04337   // of the loaded value are needed.
04338   SmallVector<Instruction *, 8> WorkList;
04339   SmallPtrSet<Instruction *, 16> Visited;
04340   SmallVector<Instruction *, 8> AndsToMaybeRemove;
04341   for (auto *U : Load->users())
04342     WorkList.push_back(cast<Instruction>(U));
04343 
04344   EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
04345   unsigned BitWidth = LoadResultVT.getSizeInBits();
04346   APInt DemandBits(BitWidth, 0);
04347   APInt WidestAndBits(BitWidth, 0);
04348 
04349   while (!WorkList.empty()) {
04350     Instruction *I = WorkList.back();
04351     WorkList.pop_back();
04352 
04353     // Break use-def graph loops.
04354     if (!Visited.insert(I).second)
04355       continue;
04356 
04357     // For a PHI node, push all of its users.
04358     if (auto *Phi = dyn_cast<PHINode>(I)) {
04359       for (auto *U : Phi->users())
04360         WorkList.push_back(cast<Instruction>(U));
04361       continue;
04362     }
04363 
04364     switch (I->getOpcode()) {
04365     case llvm::Instruction::And: {
04366       auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
04367       if (!AndC)
04368         return false;
04369       APInt AndBits = AndC->getValue();
04370       DemandBits |= AndBits;
04371       // Keep track of the widest and mask we see.
04372       if (AndBits.ugt(WidestAndBits))
04373         WidestAndBits = AndBits;
04374       if (AndBits == WidestAndBits && I->getOperand(0) == Load)
04375         AndsToMaybeRemove.push_back(I);
04376       break;
04377     }
04378 
04379     case llvm::Instruction::Shl: {
04380       auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
04381       if (!ShlC)
04382         return false;
04383       uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
04384       auto ShlDemandBits = APInt::getAllOnesValue(BitWidth).lshr(ShiftAmt);
04385       DemandBits |= ShlDemandBits;
04386       break;
04387     }
04388 
04389     case llvm::Instruction::Trunc: {
04390       EVT TruncVT = TLI->getValueType(*DL, I->getType());
04391       unsigned TruncBitWidth = TruncVT.getSizeInBits();
04392       auto TruncBits = APInt::getAllOnesValue(TruncBitWidth).zext(BitWidth);
04393       DemandBits |= TruncBits;
04394       break;
04395     }
04396 
04397     default:
04398       return false;
04399     }
04400   }
04401 
04402   uint32_t ActiveBits = DemandBits.getActiveBits();
04403   // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
04404   // target even if isLoadExtLegal says an i1 EXTLOAD is valid.  For example,
04405   // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
04406   // (and (load x) 1) is not matched as a single instruction, rather as a LDR
04407   // followed by an AND.
04408   // TODO: Look into removing this restriction by fixing backends to either
04409   // return false for isLoadExtLegal for i1 or have them select this pattern to
04410   // a single instruction.
04411   //
04412   // Also avoid hoisting if we didn't see any ands with the exact DemandBits
04413   // mask, since these are the only ands that will be removed by isel.
04414   if (ActiveBits <= 1 || !APIntOps::isMask(ActiveBits, DemandBits) ||
04415       WidestAndBits != DemandBits)
04416     return false;
04417 
04418   LLVMContext &Ctx = Load->getType()->getContext();
04419   Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
04420   EVT TruncVT = TLI->getValueType(*DL, TruncTy);
04421 
04422   // Reject cases that won't be matched as extloads.
04423   if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
04424       !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
04425     return false;
04426 
04427   IRBuilder<> Builder(Load->getNextNode());
04428   auto *NewAnd = dyn_cast<Instruction>(
04429       Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
04430 
04431   // Replace all uses of load with new and (except for the use of load in the
04432   // new and itself).
04433   Load->replaceAllUsesWith(NewAnd);
04434   NewAnd->setOperand(0, Load);
04435 
04436   // Remove any and instructions that are now redundant.
04437   for (auto *And : AndsToMaybeRemove)
04438     // Check that the and mask is the same as the one we decided to put on the
04439     // new and.
04440     if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
04441       And->replaceAllUsesWith(NewAnd);
04442       if (&*CurInstIterator == And)
04443         CurInstIterator = std::next(And->getIterator());
04444       And->eraseFromParent();
04445       ++NumAndUses;
04446     }
04447 
04448   ++NumAndsAdded;
04449   return true;
04450 }
04451 
04452 /// Check if V (an operand of a select instruction) is an expensive instruction
04453 /// that is only used once.
04454 static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {
04455   auto *I = dyn_cast<Instruction>(V);
04456   // If it's safe to speculatively execute, then it should not have side
04457   // effects; therefore, it's safe to sink and possibly *not* execute.
04458   return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
04459          TTI->getUserCost(I) >= TargetTransformInfo::TCC_Expensive;
04460 }
04461 
04462 /// Returns true if a SelectInst should be turned into an explicit branch.
04463 static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
04464                                                 SelectInst *SI) {
04465   // FIXME: This should use the same heuristics as IfConversion to determine
04466   // whether a select is better represented as a branch.  This requires that
04467   // branch probability metadata is preserved for the select, which is not the
04468   // case currently.
04469 
04470   CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
04471 
04472   // If a branch is predictable, an out-of-order CPU can avoid blocking on its
04473   // comparison condition. If the compare has more than one use, there's
04474   // probably another cmov or setcc around, so it's not worth emitting a branch.
04475   if (!Cmp || !Cmp->hasOneUse())
04476     return false;
04477 
04478   Value *CmpOp0 = Cmp->getOperand(0);
04479   Value *CmpOp1 = Cmp->getOperand(1);
04480 
04481   // Emit "cmov on compare with a memory operand" as a branch to avoid stalls
04482   // on a load from memory. But if the load is used more than once, do not
04483   // change the select to a branch because the load is probably needed
04484   // regardless of whether the branch is taken or not.
04485   if ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
04486       (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()))
04487     return true;
04488 
04489   // If either operand of the select is expensive and only needed on one side
04490   // of the select, we should form a branch.
04491   if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
04492       sinkSelectOperand(TTI, SI->getFalseValue()))
04493     return true;
04494 
04495   return false;
04496 }
04497 
04498 
04499 /// If we have a SelectInst that will likely profit from branch prediction,
04500 /// turn it into a branch.
04501 bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
04502   bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
04503 
04504   // Can we convert the 'select' to CF ?
04505   if (DisableSelectToBranch || OptSize || !TLI || VectorCond)
04506     return false;
04507 
04508   TargetLowering::SelectSupportKind SelectKind;
04509   if (VectorCond)
04510     SelectKind = TargetLowering::VectorMaskSelect;
04511   else if (SI->getType()->isVectorTy())
04512     SelectKind = TargetLowering::ScalarCondVectorVal;
04513   else
04514     SelectKind = TargetLowering::ScalarValSelect;
04515 
04516   // Do we have efficient codegen support for this kind of 'selects' ?
04517   if (TLI->isSelectSupported(SelectKind)) {
04518     // We have efficient codegen support for the select instruction.
04519     // Check if it is profitable to keep this 'select'.
04520     if (!TLI->isPredictableSelectExpensive() ||
04521         !isFormingBranchFromSelectProfitable(TTI, SI))
04522       return false;
04523   }
04524 
04525   ModifiedDT = true;
04526 
04527   // Transform a sequence like this:
04528   //    start:
04529   //       %cmp = cmp uge i32 %a, %b
04530   //       %sel = select i1 %cmp, i32 %c, i32 %d
04531   //
04532   // Into:
04533   //    start:
04534   //       %cmp = cmp uge i32 %a, %b
04535   //       br i1 %cmp, label %select.true, label %select.false
04536   //    select.true:
04537   //       br label %select.end
04538   //    select.false:
04539   //       br label %select.end
04540   //    select.end:
04541   //       %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
04542   //
04543   // In addition, we may sink instructions that produce %c or %d from
04544   // the entry block into the destination(s) of the new branch.
04545   // If the true or false blocks do not contain a sunken instruction, that
04546   // block and its branch may be optimized away. In that case, one side of the
04547   // first branch will point directly to select.end, and the corresponding PHI
04548   // predecessor block will be the start block.
04549 
04550   // First, we split the block containing the select into 2 blocks.
04551   BasicBlock *StartBlock = SI->getParent();
04552   BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
04553   BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
04554 
04555   // Delete the unconditional branch that was just created by the split.
04556   StartBlock->getTerminator()->eraseFromParent();
04557 
04558   // These are the new basic blocks for the conditional branch.
04559   // At least one will become an actual new basic block.
04560   BasicBlock *TrueBlock = nullptr;
04561   BasicBlock *FalseBlock = nullptr;
04562 
04563   // Sink expensive instructions into the conditional blocks to avoid executing
04564   // them speculatively.
04565   if (sinkSelectOperand(TTI, SI->getTrueValue())) {
04566     TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
04567                                    EndBlock->getParent(), EndBlock);
04568     auto *TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
04569     auto *TrueInst = cast<Instruction>(SI->getTrueValue());
04570     TrueInst->moveBefore(TrueBranch);
04571   }
04572   if (sinkSelectOperand(TTI, SI->getFalseValue())) {
04573     FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
04574                                     EndBlock->getParent(), EndBlock);
04575     auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
04576     auto *FalseInst = cast<Instruction>(SI->getFalseValue());
04577     FalseInst->moveBefore(FalseBranch);
04578   }
04579 
04580   // If there was nothing to sink, then arbitrarily choose the 'false' side
04581   // for a new input value to the PHI.
04582   if (TrueBlock == FalseBlock) {
04583     assert(TrueBlock == nullptr &&
04584            "Unexpected basic block transform while optimizing select");
04585 
04586     FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
04587                                     EndBlock->getParent(), EndBlock);
04588     BranchInst::Create(EndBlock, FalseBlock);
04589   }
04590 
04591   // Insert the real conditional branch based on the original condition.
04592   // If we did not create a new block for one of the 'true' or 'false' paths
04593   // of the condition, it means that side of the branch goes to the end block
04594   // directly and the path originates from the start block from the point of
04595   // view of the new PHI.
04596   if (TrueBlock == nullptr) {
04597     BranchInst::Create(EndBlock, FalseBlock, SI->getCondition(), SI);
04598     TrueBlock = StartBlock;
04599   } else if (FalseBlock == nullptr) {
04600     BranchInst::Create(TrueBlock, EndBlock, SI->getCondition(), SI);
04601     FalseBlock = StartBlock;
04602   } else {
04603     BranchInst::Create(TrueBlock, FalseBlock, SI->getCondition(), SI);
04604   }
04605 
04606   // The select itself is replaced with a PHI Node.
04607   PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
04608   PN->takeName(SI);
04609   PN->addIncoming(SI->getTrueValue(), TrueBlock);
04610   PN->addIncoming(SI->getFalseValue(), FalseBlock);
04611 
04612   SI->replaceAllUsesWith(PN);
04613   SI->eraseFromParent();
04614 
04615   // Instruct OptimizeBlock to skip to the next block.
04616   CurInstIterator = StartBlock->end();
04617   ++NumSelectsExpanded;
04618   return true;
04619 }
04620 
04621 static bool isBroadcastShuffle(ShuffleVectorInst *SVI) {
04622   SmallVector<int, 16> Mask(SVI->getShuffleMask());
04623   int SplatElem = -1;
04624   for (unsigned i = 0; i < Mask.size(); ++i) {
04625     if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem)
04626       return false;
04627     SplatElem = Mask[i];
04628   }
04629 
04630   return true;
04631 }
04632 
04633 /// Some targets have expensive vector shifts if the lanes aren't all the same
04634 /// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
04635 /// it's often worth sinking a shufflevector splat down to its use so that
04636 /// codegen can spot all lanes are identical.
04637 bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
04638   BasicBlock *DefBB = SVI->getParent();
04639 
04640   // Only do this xform if variable vector shifts are particularly expensive.
04641   if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType()))
04642     return false;
04643 
04644   // We only expect better codegen by sinking a shuffle if we can recognise a
04645   // constant splat.
04646   if (!isBroadcastShuffle(SVI))
04647     return false;
04648 
04649   // InsertedShuffles - Only insert a shuffle in each block once.
04650   DenseMap<BasicBlock*, Instruction*> InsertedShuffles;
04651 
04652   bool MadeChange = false;
04653   for (User *U : SVI->users()) {
04654     Instruction *UI = cast<Instruction>(U);
04655 
04656     // Figure out which BB this ext is used in.
04657     BasicBlock *UserBB = UI->getParent();
04658     if (UserBB == DefBB) continue;
04659 
04660     // For now only apply this when the splat is used by a shift instruction.
04661     if (!UI->isShift()) continue;
04662 
04663     // Everything checks out, sink the shuffle if the user's block doesn't
04664     // already have a copy.
04665     Instruction *&InsertedShuffle = InsertedShuffles[UserBB];
04666 
04667     if (!InsertedShuffle) {
04668       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
04669       assert(InsertPt != UserBB->end());
04670       InsertedShuffle =
04671           new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1),
04672                                 SVI->getOperand(2), "", &*InsertPt);
04673     }
04674 
04675     UI->replaceUsesOfWith(SVI, InsertedShuffle);
04676     MadeChange = true;
04677   }
04678 
04679   // If we removed all uses, nuke the shuffle.
04680   if (SVI->use_empty()) {
04681     SVI->eraseFromParent();
04682     MadeChange = true;
04683   }
04684 
04685   return MadeChange;
04686 }
04687 
04688 bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
04689   if (!TLI || !DL)
04690     return false;
04691 
04692   Value *Cond = SI->getCondition();
04693   Type *OldType = Cond->getType();
04694   LLVMContext &Context = Cond->getContext();
04695   MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType));
04696   unsigned RegWidth = RegType.getSizeInBits();
04697 
04698   if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
04699     return false;
04700 
04701   // If the register width is greater than the type width, expand the condition
04702   // of the switch instruction and each case constant to the width of the
04703   // register. By widening the type of the switch condition, subsequent
04704   // comparisons (for case comparisons) will not need to be extended to the
04705   // preferred register width, so we will potentially eliminate N-1 extends,
04706   // where N is the number of cases in the switch.
04707   auto *NewType = Type::getIntNTy(Context, RegWidth);
04708 
04709   // Zero-extend the switch condition and case constants unless the switch
04710   // condition is a function argument that is already being sign-extended.
04711   // In that case, we can avoid an unnecessary mask/extension by sign-extending
04712   // everything instead.
04713   Instruction::CastOps ExtType = Instruction::ZExt;
04714   if (auto *Arg = dyn_cast<Argument>(Cond))
04715     if (Arg->hasSExtAttr())
04716       ExtType = Instruction::SExt;
04717 
04718   auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
04719   ExtInst->insertBefore(SI);
04720   SI->setCondition(ExtInst);
04721   for (SwitchInst::CaseIt Case : SI->cases()) {
04722     APInt NarrowConst = Case.getCaseValue()->getValue();
04723     APInt WideConst = (ExtType == Instruction::ZExt) ?
04724                       NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth);
04725     Case.setValue(ConstantInt::get(Context, WideConst));
04726   }
04727 
04728   return true;
04729 }
04730 
04731 namespace {
04732 /// \brief Helper class to promote a scalar operation to a vector one.
04733 /// This class is used to move downward extractelement transition.
04734 /// E.g.,
04735 /// a = vector_op <2 x i32>
04736 /// b = extractelement <2 x i32> a, i32 0
04737 /// c = scalar_op b
04738 /// store c
04739 ///
04740 /// =>
04741 /// a = vector_op <2 x i32>
04742 /// c = vector_op a (equivalent to scalar_op on the related lane)
04743 /// * d = extractelement <2 x i32> c, i32 0
04744 /// * store d
04745 /// Assuming both extractelement and store can be combine, we get rid of the
04746 /// transition.
04747 class VectorPromoteHelper {
04748   /// DataLayout associated with the current module.
04749   const DataLayout &DL;
04750 
04751   /// Used to perform some checks on the legality of vector operations.
04752   const TargetLowering &TLI;
04753 
04754   /// Used to estimated the cost of the promoted chain.
04755   const TargetTransformInfo &TTI;
04756 
04757   /// The transition being moved downwards.
04758   Instruction *Transition;
04759   /// The sequence of instructions to be promoted.
04760   SmallVector<Instruction *, 4> InstsToBePromoted;
04761   /// Cost of combining a store and an extract.
04762   unsigned StoreExtractCombineCost;
04763   /// Instruction that will be combined with the transition.
04764   Instruction *CombineInst;
04765 
04766   /// \brief The instruction that represents the current end of the transition.
04767   /// Since we are faking the promotion until we reach the end of the chain
04768   /// of computation, we need a way to get the current end of the transition.
04769   Instruction *getEndOfTransition() const {
04770     if (InstsToBePromoted.empty())
04771       return Transition;
04772     return InstsToBePromoted.back();
04773   }
04774 
04775   /// \brief Return the index of the original value in the transition.
04776   /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
04777   /// c, is at index 0.
04778   unsigned getTransitionOriginalValueIdx() const {
04779     assert(isa<ExtractElementInst>(Transition) &&
04780            "Other kind of transitions are not supported yet");
04781     return 0;
04782   }
04783 
04784   /// \brief Return the index of the index in the transition.
04785   /// E.g., for "extractelement <2 x i32> c, i32 0" the index
04786   /// is at index 1.
04787   unsigned getTransitionIdx() const {
04788     assert(isa<ExtractElementInst>(Transition) &&
04789            "Other kind of transitions are not supported yet");
04790     return 1;
04791   }
04792 
04793   /// \brief Get the type of the transition.
04794   /// This is the type of the original value.
04795   /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
04796   /// transition is <2 x i32>.
04797   Type *getTransitionType() const {
04798     return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
04799   }
04800 
04801   /// \brief Promote \p ToBePromoted by moving \p Def downward through.
04802   /// I.e., we have the following sequence:
04803   /// Def = Transition <ty1> a to <ty2>
04804   /// b = ToBePromoted <ty2> Def, ...
04805   /// =>
04806   /// b = ToBePromoted <ty1> a, ...
04807   /// Def = Transition <ty1> ToBePromoted to <ty2>
04808   void promoteImpl(Instruction *ToBePromoted);
04809 
04810   /// \brief Check whether or not it is profitable to promote all the
04811   /// instructions enqueued to be promoted.
04812   bool isProfitableToPromote() {
04813     Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
04814     unsigned Index = isa<ConstantInt>(ValIdx)
04815                          ? cast<ConstantInt>(ValIdx)->getZExtValue()
04816                          : -1;
04817     Type *PromotedType = getTransitionType();
04818 
04819     StoreInst *ST = cast<StoreInst>(CombineInst);
04820     unsigned AS = ST->getPointerAddressSpace();
04821     unsigned Align = ST->getAlignment();
04822     // Check if this store is supported.
04823     if (!TLI.allowsMisalignedMemoryAccesses(
04824             TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
04825             Align)) {
04826       // If this is not supported, there is no way we can combine
04827       // the extract with the store.
04828       return false;
04829     }
04830 
04831     // The scalar chain of computation has to pay for the transition
04832     // scalar to vector.
04833     // The vector chain has to account for the combining cost.
04834     uint64_t ScalarCost =
04835         TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index);
04836     uint64_t VectorCost = StoreExtractCombineCost;
04837     for (const auto &Inst : InstsToBePromoted) {
04838       // Compute the cost.
04839       // By construction, all instructions being promoted are arithmetic ones.
04840       // Moreover, one argument is a constant that can be viewed as a splat
04841       // constant.
04842       Value *Arg0 = Inst->getOperand(0);
04843       bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
04844                             isa<ConstantFP>(Arg0);
04845       TargetTransformInfo::OperandValueKind Arg0OVK =
04846           IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
04847                          : TargetTransformInfo::OK_AnyValue;
04848       TargetTransformInfo::OperandValueKind Arg1OVK =
04849           !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
04850                           : TargetTransformInfo::OK_AnyValue;
04851       ScalarCost += TTI.getArithmeticInstrCost(
04852           Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK);
04853       VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
04854                                                Arg0OVK, Arg1OVK);
04855     }
04856     DEBUG(dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
04857                  << ScalarCost << "\nVector: " << VectorCost << '\n');
04858     return ScalarCost > VectorCost;
04859   }
04860 
04861   /// \brief Generate a constant vector with \p Val with the same
04862   /// number of elements as the transition.
04863   /// \p UseSplat defines whether or not \p Val should be replicated
04864   /// across the whole vector.
04865   /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
04866   /// otherwise we generate a vector with as many undef as possible:
04867   /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
04868   /// used at the index of the extract.
04869   Value *getConstantVector(Constant *Val, bool UseSplat) const {
04870     unsigned ExtractIdx = UINT_MAX;
04871     if (!UseSplat) {
04872       // If we cannot determine where the constant must be, we have to
04873       // use a splat constant.
04874       Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
04875       if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
04876         ExtractIdx = CstVal->getSExtValue();
04877       else
04878         UseSplat = true;
04879     }
04880 
04881     unsigned End = getTransitionType()->getVectorNumElements();
04882     if (UseSplat)
04883       return ConstantVector::getSplat(End, Val);
04884 
04885     SmallVector<Constant *, 4> ConstVec;
04886     UndefValue *UndefVal = UndefValue::get(Val->getType());
04887     for (unsigned Idx = 0; Idx != End; ++Idx) {
04888       if (Idx == ExtractIdx)
04889         ConstVec.push_back(Val);
04890       else
04891         ConstVec.push_back(UndefVal);
04892     }
04893     return ConstantVector::get(ConstVec);
04894   }
04895 
04896   /// \brief Check if promoting to a vector type an operand at \p OperandIdx
04897   /// in \p Use can trigger undefined behavior.
04898   static bool canCauseUndefinedBehavior(const Instruction *Use,
04899                                         unsigned OperandIdx) {
04900     // This is not safe to introduce undef when the operand is on
04901     // the right hand side of a division-like instruction.
04902     if (OperandIdx != 1)
04903       return false;
04904     switch (Use->getOpcode()) {
04905     default:
04906       return false;
04907     case Instruction::SDiv:
04908     case Instruction::UDiv:
04909     case Instruction::SRem:
04910     case Instruction::URem:
04911       return true;
04912     case Instruction::FDiv:
04913     case Instruction::FRem:
04914       return !Use->hasNoNaNs();
04915     }
04916     llvm_unreachable(nullptr);
04917   }
04918 
04919 public:
04920   VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
04921                       const TargetTransformInfo &TTI, Instruction *Transition,
04922                       unsigned CombineCost)
04923       : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
04924         StoreExtractCombineCost(CombineCost), CombineInst(nullptr) {
04925     assert(Transition && "Do not know how to promote null");
04926   }
04927 
04928   /// \brief Check if we can promote \p ToBePromoted to \p Type.
04929   bool canPromote(const Instruction *ToBePromoted) const {
04930     // We could support CastInst too.
04931     return isa<BinaryOperator>(ToBePromoted);
04932   }
04933 
04934   /// \brief Check if it is profitable to promote \p ToBePromoted
04935   /// by moving downward the transition through.
04936   bool shouldPromote(const Instruction *ToBePromoted) const {
04937     // Promote only if all the operands can be statically expanded.
04938     // Indeed, we do not want to introduce any new kind of transitions.
04939     for (const Use &U : ToBePromoted->operands()) {
04940       const Value *Val = U.get();
04941       if (Val == getEndOfTransition()) {
04942         // If the use is a division and the transition is on the rhs,
04943         // we cannot promote the operation, otherwise we may create a
04944         // division by zero.
04945         if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
04946           return false;
04947         continue;
04948       }
04949       if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
04950           !isa<ConstantFP>(Val))
04951         return false;
04952     }
04953     // Check that the resulting operation is legal.
04954     int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
04955     if (!ISDOpcode)
04956       return false;
04957     return StressStoreExtract ||
04958            TLI.isOperationLegalOrCustom(
04959                ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
04960   }
04961 
04962   /// \brief Check whether or not \p Use can be combined
04963   /// with the transition.
04964   /// I.e., is it possible to do Use(Transition) => AnotherUse?
04965   bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
04966 
04967   /// \brief Record \p ToBePromoted as part of the chain to be promoted.
04968   void enqueueForPromotion(Instruction *ToBePromoted) {
04969     InstsToBePromoted.push_back(ToBePromoted);
04970   }
04971 
04972   /// \brief Set the instruction that will be combined with the transition.
04973   void recordCombineInstruction(Instruction *ToBeCombined) {
04974     assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
04975     CombineInst = ToBeCombined;
04976   }
04977 
04978   /// \brief Promote all the instructions enqueued for promotion if it is
04979   /// is profitable.
04980   /// \return True if the promotion happened, false otherwise.
04981   bool promote() {
04982     // Check if there is something to promote.
04983     // Right now, if we do not have anything to combine with,
04984     // we assume the promotion is not profitable.
04985     if (InstsToBePromoted.empty() || !CombineInst)
04986       return false;
04987 
04988     // Check cost.
04989     if (!StressStoreExtract && !isProfitableToPromote())
04990       return false;
04991 
04992     // Promote.
04993     for (auto &ToBePromoted : InstsToBePromoted)
04994       promoteImpl(ToBePromoted);
04995     InstsToBePromoted.clear();
04996     return true;
04997   }
04998 };
04999 } // End of anonymous namespace.
05000 
05001 void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
05002   // At this point, we know that all the operands of ToBePromoted but Def
05003   // can be statically promoted.
05004   // For Def, we need to use its parameter in ToBePromoted:
05005   // b = ToBePromoted ty1 a
05006   // Def = Transition ty1 b to ty2
05007   // Move the transition down.
05008   // 1. Replace all uses of the promoted operation by the transition.
05009   // = ... b => = ... Def.
05010   assert(ToBePromoted->getType() == Transition->getType() &&
05011          "The type of the result of the transition does not match "
05012          "the final type");
05013   ToBePromoted->replaceAllUsesWith(Transition);
05014   // 2. Update the type of the uses.
05015   // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
05016   Type *TransitionTy = getTransitionType();
05017   ToBePromoted->mutateType(TransitionTy);
05018   // 3. Update all the operands of the promoted operation with promoted
05019   // operands.
05020   // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
05021   for (Use &U : ToBePromoted->operands()) {
05022     Value *Val = U.get();
05023     Value *NewVal = nullptr;
05024     if (Val == Transition)
05025       NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
05026     else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
05027              isa<ConstantFP>(Val)) {
05028       // Use a splat constant if it is not safe to use undef.
05029       NewVal = getConstantVector(
05030           cast<Constant>(Val),
05031           isa<UndefValue>(Val) ||
05032               canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
05033     } else
05034       llvm_unreachable("Did you modified shouldPromote and forgot to update "
05035                        "this?");
05036     ToBePromoted->setOperand(U.getOperandNo(), NewVal);
05037   }
05038   Transition->removeFromParent();
05039   Transition->insertAfter(ToBePromoted);
05040   Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
05041 }
05042 
05043 /// Some targets can do store(extractelement) with one instruction.
05044 /// Try to push the extractelement towards the stores when the target
05045 /// has this feature and this is profitable.
05046 bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
05047   unsigned CombineCost = UINT_MAX;
05048   if (DisableStoreExtract || !TLI ||
05049       (!StressStoreExtract &&
05050        !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(),
05051                                        Inst->getOperand(1), CombineCost)))
05052     return false;
05053 
05054   // At this point we know that Inst is a vector to scalar transition.
05055   // Try to move it down the def-use chain, until:
05056   // - We can combine the transition with its single use
05057   //   => we got rid of the transition.
05058   // - We escape the current basic block
05059   //   => we would need to check that we are moving it at a cheaper place and
05060   //      we do not do that for now.
05061   BasicBlock *Parent = Inst->getParent();
05062   DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
05063   VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
05064   // If the transition has more than one use, assume this is not going to be
05065   // beneficial.
05066   while (Inst->hasOneUse()) {
05067     Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
05068     DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
05069 
05070     if (ToBePromoted->getParent() != Parent) {
05071       DEBUG(dbgs() << "Instruction to promote is in a different block ("
05072                    << ToBePromoted->getParent()->getName()
05073                    << ") than the transition (" << Parent->getName() << ").\n");
05074       return false;
05075     }
05076 
05077     if (VPH.canCombine(ToBePromoted)) {
05078       DEBUG(dbgs() << "Assume " << *Inst << '\n'
05079                    << "will be combined with: " << *ToBePromoted << '\n');
05080       VPH.recordCombineInstruction(ToBePromoted);
05081       bool Changed = VPH.promote();
05082       NumStoreExtractExposed += Changed;
05083       return Changed;
05084     }
05085 
05086     DEBUG(dbgs() << "Try promoting.\n");
05087     if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
05088       return false;
05089 
05090     DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
05091 
05092     VPH.enqueueForPromotion(ToBePromoted);
05093     Inst = ToBePromoted;
05094   }
05095   return false;
05096 }
05097 
05098 bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
05099   // Bail out if we inserted the instruction to prevent optimizations from
05100   // stepping on each other's toes.
05101   if (InsertedInsts.count(I))
05102     return false;
05103 
05104   if (PHINode *P = dyn_cast<PHINode>(I)) {
05105     // It is possible for very late stage optimizations (such as SimplifyCFG)
05106     // to introduce PHI nodes too late to be cleaned up.  If we detect such a
05107     // trivial PHI, go ahead and zap it here.
05108     if (Value *V = SimplifyInstruction(P, *DL, TLInfo, nullptr)) {
05109       P->replaceAllUsesWith(V);
05110       P->eraseFromParent();
05111       ++NumPHIsElim;
05112       return true;
05113     }
05114     return false;
05115   }
05116 
05117   if (CastInst *CI = dyn_cast<CastInst>(I)) {
05118     // If the source of the cast is a constant, then this should have
05119     // already been constant folded.  The only reason NOT to constant fold
05120     // it is if something (e.g. LSR) was careful to place the constant
05121     // evaluation in a block other than then one that uses it (e.g. to hoist
05122     // the address of globals out of a loop).  If this is the case, we don't
05123     // want to forward-subst the cast.
05124     if (isa<Constant>(CI->getOperand(0)))
05125       return false;
05126 
05127     if (TLI && OptimizeNoopCopyExpression(CI, *TLI, *DL))
05128       return true;
05129 
05130     if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
05131       /// Sink a zext or sext into its user blocks if the target type doesn't
05132       /// fit in one register
05133       if (TLI &&
05134           TLI->getTypeAction(CI->getContext(),
05135                              TLI->getValueType(*DL, CI->getType())) ==
05136               TargetLowering::TypeExpandInteger) {
05137         return SinkCast(CI);
05138       } else {
05139         bool MadeChange = moveExtToFormExtLoad(I);
05140         return MadeChange | optimizeExtUses(I);
05141       }
05142     }
05143     return false;
05144   }
05145 
05146   if (CmpInst *CI = dyn_cast<CmpInst>(I))
05147     if (!TLI || !TLI->hasMultipleConditionRegisters())
05148       return OptimizeCmpExpression(CI);
05149 
05150   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
05151     stripInvariantGroupMetadata(*LI);
05152     if (TLI) {
05153       bool Modified = optimizeLoadExt(LI);
05154       unsigned AS = LI->getPointerAddressSpace();
05155       Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
05156       return Modified;
05157     }
05158     return false;
05159   }
05160 
05161   if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
05162     stripInvariantGroupMetadata(*SI);
05163     if (TLI) {
05164       unsigned AS = SI->getPointerAddressSpace();
05165       return optimizeMemoryInst(I, SI->getOperand(1),
05166                                 SI->getOperand(0)->getType(), AS);
05167     }
05168     return false;
05169   }
05170 
05171   BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
05172 
05173   if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
05174                 BinOp->getOpcode() == Instruction::LShr)) {
05175     ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
05176     if (TLI && CI && TLI->hasExtractBitsInsn())
05177       return OptimizeExtractBits(BinOp, CI, *TLI, *DL);
05178 
05179     return false;
05180   }
05181 
05182   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
05183     if (GEPI->hasAllZeroIndices()) {
05184       /// The GEP operand must be a pointer, so must its result -> BitCast
05185       Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
05186                                         GEPI->getName(), GEPI);
05187       GEPI->replaceAllUsesWith(NC);
05188       GEPI->eraseFromParent();
05189       ++NumGEPsElim;
05190       optimizeInst(NC, ModifiedDT);
05191       return true;
05192     }
05193     return false;
05194   }
05195 
05196   if (CallInst *CI = dyn_cast<CallInst>(I))
05197     return optimizeCallInst(CI, ModifiedDT);
05198 
05199   if (SelectInst *SI = dyn_cast<SelectInst>(I))
05200     return optimizeSelectInst(SI);
05201 
05202   if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
05203     return optimizeShuffleVectorInst(SVI);
05204 
05205   if (auto *Switch = dyn_cast<SwitchInst>(I))
05206     return optimizeSwitchInst(Switch);
05207 
05208   if (isa<ExtractElementInst>(I))
05209     return optimizeExtractElementInst(I);
05210 
05211   return false;
05212 }
05213 
05214 /// Given an OR instruction, check to see if this is a bitreverse
05215 /// idiom. If so, insert the new intrinsic and return true.
05216 static bool makeBitReverse(Instruction &I, const DataLayout &DL,
05217                            const TargetLowering &TLI) {
05218   if (!I.getType()->isIntegerTy() ||
05219       !TLI.isOperationLegalOrCustom(ISD::BITREVERSE,
05220                                     TLI.getValueType(DL, I.getType(), true)))
05221     return false;
05222 
05223   SmallVector<Instruction*, 4> Insts;
05224   if (!recognizeBitReverseOrBSwapIdiom(&I, false, true, Insts))
05225     return false;
05226   Instruction *LastInst = Insts.back();
05227   I.replaceAllUsesWith(LastInst);
05228   RecursivelyDeleteTriviallyDeadInstructions(&I);
05229   return true;
05230 }
05231 
05232 // In this pass we look for GEP and cast instructions that are used
05233 // across basic blocks and rewrite them to improve basic-block-at-a-time
05234 // selection.
05235 bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
05236   SunkAddrs.clear();
05237   bool MadeChange = false;
05238 
05239   CurInstIterator = BB.begin();
05240   while (CurInstIterator != BB.end()) {
05241     MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
05242     if (ModifiedDT)
05243       return true;
05244   }
05245 
05246   bool MadeBitReverse = true;
05247   while (TLI && MadeBitReverse) {
05248     MadeBitReverse = false;
05249     for (auto &I : reverse(BB)) {
05250       if (makeBitReverse(I, *DL, *TLI)) {
05251         MadeBitReverse = MadeChange = true;
05252         break;
05253       }
05254     }
05255   }
05256   MadeChange |= dupRetToEnableTailCallOpts(&BB);
05257   
05258   return MadeChange;
05259 }
05260 
05261 // llvm.dbg.value is far away from the value then iSel may not be able
05262 // handle it properly. iSel will drop llvm.dbg.value if it can not
05263 // find a node corresponding to the value.
05264 bool CodeGenPrepare::placeDbgValues(Function &F) {
05265   bool MadeChange = false;
05266   for (BasicBlock &BB : F) {
05267     Instruction *PrevNonDbgInst = nullptr;
05268     for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
05269       Instruction *Insn = &*BI++;
05270       DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
05271       // Leave dbg.values that refer to an alloca alone. These
05272       // instrinsics describe the address of a variable (= the alloca)
05273       // being taken.  They should not be moved next to the alloca
05274       // (and to the beginning of the scope), but rather stay close to
05275       // where said address is used.
05276       if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) {
05277         PrevNonDbgInst = Insn;
05278         continue;
05279       }
05280 
05281       Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
05282       if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
05283         // If VI is a phi in a block with an EHPad terminator, we can't insert
05284         // after it.
05285         if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
05286           continue;
05287         DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
05288         DVI->removeFromParent();
05289         if (isa<PHINode>(VI))
05290           DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
05291         else
05292           DVI->insertAfter(VI);
05293         MadeChange = true;
05294         ++NumDbgValueMoved;
05295       }
05296     }
05297   }
05298   return MadeChange;
05299 }
05300 
05301 // If there is a sequence that branches based on comparing a single bit
05302 // against zero that can be combined into a single instruction, and the
05303 // target supports folding these into a single instruction, sink the
05304 // mask and compare into the branch uses. Do this before OptimizeBlock ->
05305 // OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being
05306 // searched for.
05307 bool CodeGenPrepare::sinkAndCmp(Function &F) {
05308   if (!EnableAndCmpSinking)
05309     return false;
05310   if (!TLI || !TLI->isMaskAndBranchFoldingLegal())
05311     return false;
05312   bool MadeChange = false;
05313   for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
05314     BasicBlock *BB = &*I++;
05315 
05316     // Does this BB end with the following?
05317     //   %andVal = and %val, #single-bit-set
05318     //   %icmpVal = icmp %andResult, 0
05319     //   br i1 %cmpVal label %dest1, label %dest2"
05320     BranchInst *Brcc = dyn_cast<BranchInst>(BB->getTerminator());
05321     if (!Brcc || !Brcc->isConditional())
05322       continue;
05323     ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0));
05324     if (!Cmp || Cmp->getParent() != BB)
05325       continue;
05326     ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1));
05327     if (!Zero || !Zero->isZero())
05328       continue;
05329     Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0));
05330     if (!And || And->getOpcode() != Instruction::And || And->getParent() != BB)
05331       continue;
05332     ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1));
05333     if (!Mask || !Mask->getUniqueInteger().isPowerOf2())
05334       continue;
05335     DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB->dump());
05336 
05337     // Push the "and; icmp" for any users that are conditional branches.
05338     // Since there can only be one branch use per BB, we don't need to keep
05339     // track of which BBs we insert into.
05340     for (Value::use_iterator UI = Cmp->use_begin(), E = Cmp->use_end();
05341          UI != E; ) {
05342       Use &TheUse = *UI;
05343       // Find brcc use.
05344       BranchInst *BrccUser = dyn_cast<BranchInst>(*UI);
05345       ++UI;
05346       if (!BrccUser || !BrccUser->isConditional())
05347         continue;
05348       BasicBlock *UserBB = BrccUser->getParent();
05349       if (UserBB == BB) continue;
05350       DEBUG(dbgs() << "found Brcc use\n");
05351 
05352       // Sink the "and; icmp" to use.
05353       MadeChange = true;
05354       BinaryOperator *NewAnd =
05355         BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "",
05356                                   BrccUser);
05357       CmpInst *NewCmp =
05358         CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero,
05359                         "", BrccUser);
05360       TheUse = NewCmp;
05361       ++NumAndCmpsMoved;
05362       DEBUG(BrccUser->getParent()->dump());
05363     }
05364   }
05365   return MadeChange;
05366 }
05367 
05368 /// \brief Retrieve the probabilities of a conditional branch. Returns true on
05369 /// success, or returns false if no or invalid metadata was found.
05370 static bool extractBranchMetadata(BranchInst *BI,
05371                                   uint64_t &ProbTrue, uint64_t &ProbFalse) {
05372   assert(BI->isConditional() &&
05373          "Looking for probabilities on unconditional branch?");
05374   auto *ProfileData = BI->getMetadata(LLVMContext::MD_prof);
05375   if (!ProfileData || ProfileData->getNumOperands() != 3)
05376     return false;
05377 
05378   const auto *CITrue =
05379       mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1));
05380   const auto *CIFalse =
05381       mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2));
05382   if (!CITrue || !CIFalse)
05383     return false;
05384 
05385   ProbTrue = CITrue->getValue().getZExtValue();
05386   ProbFalse = CIFalse->getValue().getZExtValue();
05387 
05388   return true;
05389 }
05390 
05391 /// \brief Scale down both weights to fit into uint32_t.
05392 static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
05393   uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
05394   uint32_t Scale = (NewMax / UINT32_MAX) + 1;
05395   NewTrue = NewTrue / Scale;
05396   NewFalse = NewFalse / Scale;
05397 }
05398 
05399 /// \brief Some targets prefer to split a conditional branch like:
05400 /// \code
05401 ///   %0 = icmp ne i32 %a, 0
05402 ///   %1 = icmp ne i32 %b, 0
05403 ///   %or.cond = or i1 %0, %1
05404 ///   br i1 %or.cond, label %TrueBB, label %FalseBB
05405 /// \endcode
05406 /// into multiple branch instructions like:
05407 /// \code
05408 ///   bb1:
05409 ///     %0 = icmp ne i32 %a, 0
05410 ///     br i1 %0, label %TrueBB, label %bb2
05411 ///   bb2:
05412 ///     %1 = icmp ne i32 %b, 0
05413 ///     br i1 %1, label %TrueBB, label %FalseBB
05414 /// \endcode
05415 /// This usually allows instruction selection to do even further optimizations
05416 /// and combine the compare with the branch instruction. Currently this is
05417 /// applied for targets which have "cheap" jump instructions.
05418 ///
05419 /// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
05420 ///
05421 bool CodeGenPrepare::splitBranchCondition(Function &F) {
05422   if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive())
05423     return false;
05424 
05425   bool MadeChange = false;
05426   for (auto &BB : F) {
05427     // Does this BB end with the following?
05428     //   %cond1 = icmp|fcmp|binary instruction ...
05429     //   %cond2 = icmp|fcmp|binary instruction ...
05430     //   %cond.or = or|and i1 %cond1, cond2
05431     //   br i1 %cond.or label %dest1, label %dest2"
05432     BinaryOperator *LogicOp;
05433     BasicBlock *TBB, *FBB;
05434     if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB)))
05435       continue;
05436 
05437     auto *Br1 = cast<BranchInst>(BB.getTerminator());
05438     if (Br1->getMetadata(LLVMContext::MD_unpredictable))
05439       continue;
05440 
05441     unsigned Opc;
05442     Value *Cond1, *Cond2;
05443     if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
05444                              m_OneUse(m_Value(Cond2)))))
05445       Opc = Instruction::And;
05446     else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)),
05447                                  m_OneUse(m_Value(Cond2)))))
05448       Opc = Instruction::Or;
05449     else
05450       continue;
05451 
05452     if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) ||
05453         !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp()))   )
05454       continue;
05455 
05456     DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
05457 
05458     // Create a new BB.
05459     auto *InsertBefore = std::next(Function::iterator(BB))
05460         .getNodePtrUnchecked();
05461     auto TmpBB = BasicBlock::Create(BB.getContext(),
05462                                     BB.getName() + ".cond.split",
05463                                     BB.getParent(), InsertBefore);
05464 
05465     // Update original basic block by using the first condition directly by the
05466     // branch instruction and removing the no longer needed and/or instruction.
05467     Br1->setCondition(Cond1);
05468     LogicOp->eraseFromParent();
05469 
05470     // Depending on the conditon we have to either replace the true or the false
05471     // successor of the original branch instruction.
05472     if (Opc == Instruction::And)
05473       Br1->setSuccessor(0, TmpBB);
05474     else
05475       Br1->setSuccessor(1, TmpBB);
05476 
05477     // Fill in the new basic block.
05478     auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
05479     if (auto *I = dyn_cast<Instruction>(Cond2)) {
05480       I->removeFromParent();
05481       I->insertBefore(Br2);
05482     }
05483 
05484     // Update PHI nodes in both successors. The original BB needs to be
05485     // replaced in one succesor's PHI nodes, because the branch comes now from
05486     // the newly generated BB (NewBB). In the other successor we need to add one
05487     // incoming edge to the PHI nodes, because both branch instructions target
05488     // now the same successor. Depending on the original branch condition
05489     // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
05490     // we perfrom the correct update for the PHI nodes.
05491     // This doesn't change the successor order of the just created branch
05492     // instruction (or any other instruction).
05493     if (Opc == Instruction::Or)
05494       std::swap(TBB, FBB);
05495 
05496     // Replace the old BB with the new BB.
05497     for (auto &I : *TBB) {
05498       PHINode *PN = dyn_cast<PHINode>(&I);
05499       if (!PN)
05500         break;
05501       int i;
05502       while ((i = PN->getBasicBlockIndex(&BB)) >= 0)
05503         PN->setIncomingBlock(i, TmpBB);
05504     }
05505 
05506     // Add another incoming edge form the new BB.
05507     for (auto &I : *FBB) {
05508       PHINode *PN = dyn_cast<PHINode>(&I);
05509       if (!PN)
05510         break;
05511       auto *Val = PN->getIncomingValueForBlock(&BB);
05512       PN->addIncoming(Val, TmpBB);
05513     }
05514 
05515     // Update the branch weights (from SelectionDAGBuilder::
05516     // FindMergedConditions).
05517     if (Opc == Instruction::Or) {
05518       // Codegen X | Y as:
05519       // BB1:
05520       //   jmp_if_X TBB
05521       //   jmp TmpBB
05522       // TmpBB:
05523       //   jmp_if_Y TBB
05524       //   jmp FBB
05525       //
05526 
05527       // We have flexibility in setting Prob for BB1 and Prob for NewBB.
05528       // The requirement is that
05529       //   TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
05530       //     = TrueProb for orignal BB.
05531       // Assuming the orignal weights are A and B, one choice is to set BB1's
05532       // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
05533       // assumes that
05534       //   TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
05535       // Another choice is to assume TrueProb for BB1 equals to TrueProb for
05536       // TmpBB, but the math is more complicated.
05537       uint64_t TrueWeight, FalseWeight;
05538       if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) {
05539         uint64_t NewTrueWeight = TrueWeight;
05540         uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
05541         scaleWeights(NewTrueWeight, NewFalseWeight);
05542         Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
05543                          .createBranchWeights(TrueWeight, FalseWeight));
05544 
05545         NewTrueWeight = TrueWeight;
05546         NewFalseWeight = 2 * FalseWeight;
05547         scaleWeights(NewTrueWeight, NewFalseWeight);
05548         Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
05549                          .createBranchWeights(TrueWeight, FalseWeight));
05550       }
05551     } else {
05552       // Codegen X & Y as:
05553       // BB1:
05554       //   jmp_if_X TmpBB
05555       //   jmp FBB
05556       // TmpBB:
05557       //   jmp_if_Y TBB
05558       //   jmp FBB
05559       //
05560       //  This requires creation of TmpBB after CurBB.
05561 
05562       // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
05563       // The requirement is that
05564       //   FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
05565       //     = FalseProb for orignal BB.
05566       // Assuming the orignal weights are A and B, one choice is to set BB1's
05567       // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
05568       // assumes that
05569       //   FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
05570       uint64_t TrueWeight, FalseWeight;
05571       if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) {
05572         uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
05573         uint64_t NewFalseWeight = FalseWeight;
05574         scaleWeights(NewTrueWeight, NewFalseWeight);
05575         Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
05576                          .createBranchWeights(TrueWeight, FalseWeight));
05577 
05578         NewTrueWeight = 2 * TrueWeight;
05579         NewFalseWeight = FalseWeight;
05580         scaleWeights(NewTrueWeight, NewFalseWeight);
05581         Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
05582                          .createBranchWeights(TrueWeight, FalseWeight));
05583       }
05584     }
05585 
05586     // Note: No point in getting fancy here, since the DT info is never
05587     // available to CodeGenPrepare.
05588     ModifiedDT = true;
05589 
05590     MadeChange = true;
05591 
05592     DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
05593           TmpBB->dump());
05594   }
05595   return MadeChange;
05596 }
05597 
05598 void CodeGenPrepare::stripInvariantGroupMetadata(Instruction &I) {
05599   if (auto *InvariantMD = I.getMetadata(LLVMContext::MD_invariant_group))
05600     I.dropUnknownNonDebugMetadata(InvariantMD->getMetadataID());
05601 }