LLVM  3.7.0
CodeGenPrepare.cpp
Go to the documentation of this file.
1 //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass munges the code in the input function to better prepare it for
11 // SelectionDAG-based code generation. This works around limitations in it's
12 // basic-block-at-a-time approach. It should eventually be removed.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/CodeGen/Passes.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/ADT/Statistic.h"
23 #include "llvm/IR/CallSite.h"
24 #include "llvm/IR/Constants.h"
25 #include "llvm/IR/DataLayout.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/Function.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/InlineAsm.h"
32 #include "llvm/IR/Instructions.h"
33 #include "llvm/IR/IntrinsicInst.h"
34 #include "llvm/IR/MDBuilder.h"
35 #include "llvm/IR/PatternMatch.h"
36 #include "llvm/IR/Statepoint.h"
37 #include "llvm/IR/ValueHandle.h"
38 #include "llvm/IR/ValueMap.h"
39 #include "llvm/Pass.h"
41 #include "llvm/Support/Debug.h"
50 using namespace llvm;
51 using namespace llvm::PatternMatch;
52 
53 #define DEBUG_TYPE "codegenprepare"
54 
55 STATISTIC(NumBlocksElim, "Number of blocks eliminated");
56 STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
57 STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
58 STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
59  "sunken Cmps");
60 STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
61  "of sunken Casts");
62 STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
63  "computations were sunk");
64 STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
65 STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
66 STATISTIC(NumRetsDup, "Number of return instructions duplicated");
67 STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
68 STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
69 STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches");
70 STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
71 
73  "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
74  cl::desc("Disable branch optimizations in CodeGenPrepare"));
75 
76 static cl::opt<bool>
77  DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
78  cl::desc("Disable GC optimizations in CodeGenPrepare"));
79 
81  "disable-cgp-select2branch", cl::Hidden, cl::init(false),
82  cl::desc("Disable select to branch conversion."));
83 
85  "addr-sink-using-gep", cl::Hidden, cl::init(false),
86  cl::desc("Address sinking in CGP using GEPs."));
87 
89  "enable-andcmp-sinking", cl::Hidden, cl::init(true),
90  cl::desc("Enable sinkinig and/cmp into branches."));
91 
93  "disable-cgp-store-extract", cl::Hidden, cl::init(false),
94  cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
95 
97  "stress-cgp-store-extract", cl::Hidden, cl::init(false),
98  cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
99 
101  "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
102  cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
103  "CodeGenPrepare"));
104 
106  "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
107  cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
108  "optimization in CodeGenPrepare"));
109 
110 namespace {
111 typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
112 struct TypeIsSExt {
113  Type *Ty;
114  bool IsSExt;
115  TypeIsSExt(Type *Ty, bool IsSExt) : Ty(Ty), IsSExt(IsSExt) {}
116 };
117 typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
118 class TypePromotionTransaction;
119 
120  class CodeGenPrepare : public FunctionPass {
121  /// TLI - Keep a pointer of a TargetLowering to consult for determining
122  /// transformation profitability.
123  const TargetMachine *TM;
124  const TargetLowering *TLI;
125  const TargetTransformInfo *TTI;
126  const TargetLibraryInfo *TLInfo;
127 
128  /// CurInstIterator - As we scan instructions optimizing them, this is the
129  /// next instruction to optimize. Xforms that can invalidate this should
130  /// update it.
131  BasicBlock::iterator CurInstIterator;
132 
133  /// Keeps track of non-local addresses that have been sunk into a block.
134  /// This allows us to avoid inserting duplicate code for blocks with
135  /// multiple load/stores of the same address.
136  ValueMap<Value*, Value*> SunkAddrs;
137 
138  /// Keeps track of all instructions inserted for the current function.
139  SetOfInstrs InsertedInsts;
140  /// Keeps track of the type of the related instruction before their
141  /// promotion for the current function.
142  InstrToOrigTy PromotedInsts;
143 
144  /// ModifiedDT - If CFG is modified in anyway.
145  bool ModifiedDT;
146 
147  /// OptSize - True if optimizing for size.
148  bool OptSize;
149 
150  /// DataLayout for the Function being processed.
151  const DataLayout *DL;
152 
153  public:
154  static char ID; // Pass identification, replacement for typeid
155  explicit CodeGenPrepare(const TargetMachine *TM = nullptr)
156  : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr), DL(nullptr) {
158  }
159  bool runOnFunction(Function &F) override;
160 
161  const char *getPassName() const override { return "CodeGen Prepare"; }
162 
163  void getAnalysisUsage(AnalysisUsage &AU) const override {
167  }
168 
169  private:
170  bool EliminateFallThrough(Function &F);
171  bool EliminateMostlyEmptyBlocks(Function &F);
172  bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
173  void EliminateMostlyEmptyBlock(BasicBlock *BB);
174  bool OptimizeBlock(BasicBlock &BB, bool& ModifiedDT);
175  bool OptimizeInst(Instruction *I, bool& ModifiedDT);
176  bool OptimizeMemoryInst(Instruction *I, Value *Addr,
177  Type *AccessTy, unsigned AS);
178  bool OptimizeInlineAsmInst(CallInst *CS);
179  bool OptimizeCallInst(CallInst *CI, bool& ModifiedDT);
180  bool MoveExtToFormExtLoad(Instruction *&I);
181  bool OptimizeExtUses(Instruction *I);
182  bool OptimizeSelectInst(SelectInst *SI);
183  bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI);
184  bool OptimizeExtractElementInst(Instruction *Inst);
185  bool DupRetToEnableTailCallOpts(BasicBlock *BB);
186  bool PlaceDbgValues(Function &F);
187  bool sinkAndCmp(Function &F);
188  bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
189  Instruction *&Inst,
190  const SmallVectorImpl<Instruction *> &Exts,
191  unsigned CreatedInstCost);
192  bool splitBranchCondition(Function &F);
193  bool simplifyOffsetableRelocate(Instruction &I);
194  };
195 }
196 
197 char CodeGenPrepare::ID = 0;
198 INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare",
199  "Optimize for code generation", false, false)
200 
202  return new CodeGenPrepare(TM);
203 }
204 
205 bool CodeGenPrepare::runOnFunction(Function &F) {
206  if (skipOptnoneFunction(F))
207  return false;
208 
209  DL = &F.getParent()->getDataLayout();
210 
211  bool EverMadeChange = false;
212  // Clear per function information.
213  InsertedInsts.clear();
214  PromotedInsts.clear();
215 
216  ModifiedDT = false;
217  if (TM)
218  TLI = TM->getSubtargetImpl(F)->getTargetLowering();
219  TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
220  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
222 
223  /// This optimization identifies DIV instructions that can be
224  /// profitably bypassed and carried out with a shorter, faster divide.
225  if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
226  const DenseMap<unsigned int, unsigned int> &BypassWidths =
227  TLI->getBypassSlowDivWidths();
228  for (Function::iterator I = F.begin(); I != F.end(); I++)
229  EverMadeChange |= bypassSlowDivision(F, I, BypassWidths);
230  }
231 
232  // Eliminate blocks that contain only PHI nodes and an
233  // unconditional branch.
234  EverMadeChange |= EliminateMostlyEmptyBlocks(F);
235 
236  // llvm.dbg.value is far away from the value then iSel may not be able
237  // handle it properly. iSel will drop llvm.dbg.value if it can not
238  // find a node corresponding to the value.
239  EverMadeChange |= PlaceDbgValues(F);
240 
241  // If there is a mask, compare against zero, and branch that can be combined
242  // into a single target instruction, push the mask and compare into branch
243  // users. Do this before OptimizeBlock -> OptimizeInst ->
244  // OptimizeCmpExpression, which perturbs the pattern being searched for.
245  if (!DisableBranchOpts) {
246  EverMadeChange |= sinkAndCmp(F);
247  EverMadeChange |= splitBranchCondition(F);
248  }
249 
250  bool MadeChange = true;
251  while (MadeChange) {
252  MadeChange = false;
253  for (Function::iterator I = F.begin(); I != F.end(); ) {
254  BasicBlock *BB = I++;
255  bool ModifiedDTOnIteration = false;
256  MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration);
257 
258  // Restart BB iteration if the dominator tree of the Function was changed
259  if (ModifiedDTOnIteration)
260  break;
261  }
262  EverMadeChange |= MadeChange;
263  }
264 
265  SunkAddrs.clear();
266 
267  if (!DisableBranchOpts) {
268  MadeChange = false;
270  for (BasicBlock &BB : F) {
271  SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));
272  MadeChange |= ConstantFoldTerminator(&BB, true);
273  if (!MadeChange) continue;
274 
276  II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
277  if (pred_begin(*II) == pred_end(*II))
278  WorkList.insert(*II);
279  }
280 
281  // Delete the dead blocks and any of their dead successors.
282  MadeChange |= !WorkList.empty();
283  while (!WorkList.empty()) {
284  BasicBlock *BB = *WorkList.begin();
285  WorkList.erase(BB);
286  SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
287 
288  DeleteDeadBlock(BB);
289 
291  II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
292  if (pred_begin(*II) == pred_end(*II))
293  WorkList.insert(*II);
294  }
295 
296  // Merge pairs of basic blocks with unconditional branches, connected by
297  // a single edge.
298  if (EverMadeChange || MadeChange)
299  MadeChange |= EliminateFallThrough(F);
300 
301  EverMadeChange |= MadeChange;
302  }
303 
304  if (!DisableGCOpts) {
305  SmallVector<Instruction *, 2> Statepoints;
306  for (BasicBlock &BB : F)
307  for (Instruction &I : BB)
308  if (isStatepoint(I))
309  Statepoints.push_back(&I);
310  for (auto &I : Statepoints)
311  EverMadeChange |= simplifyOffsetableRelocate(*I);
312  }
313 
314  return EverMadeChange;
315 }
316 
317 /// EliminateFallThrough - Merge basic blocks which are connected
318 /// by a single edge, where one of the basic blocks has a single successor
319 /// pointing to the other basic block, which has a single predecessor.
320 bool CodeGenPrepare::EliminateFallThrough(Function &F) {
321  bool Changed = false;
322  // Scan all of the blocks in the function, except for the entry block.
323  for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
324  BasicBlock *BB = I++;
325  // If the destination block has a single pred, then this is a trivial
326  // edge, just collapse it.
327  BasicBlock *SinglePred = BB->getSinglePredecessor();
328 
329  // Don't merge if BB's address is taken.
330  if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
331 
332  BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
333  if (Term && !Term->isConditional()) {
334  Changed = true;
335  DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n");
336  // Remember if SinglePred was the entry block of the function.
337  // If so, we will need to move BB back to the entry position.
338  bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
339  MergeBasicBlockIntoOnlyPred(BB, nullptr);
340 
341  if (isEntry && BB != &BB->getParent()->getEntryBlock())
342  BB->moveBefore(&BB->getParent()->getEntryBlock());
343 
344  // We have erased a block. Update the iterator.
345  I = BB;
346  }
347  }
348  return Changed;
349 }
350 
351 /// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
352 /// debug info directives, and an unconditional branch. Passes before isel
353 /// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
354 /// isel. Start by eliminating these blocks so we can split them the way we
355 /// want them.
356 bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
357  bool MadeChange = false;
358  // Note that this intentionally skips the entry block.
359  for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
360  BasicBlock *BB = I++;
361 
362  // If this block doesn't end with an uncond branch, ignore it.
364  if (!BI || !BI->isUnconditional())
365  continue;
366 
367  // If the instruction before the branch (skipping debug info) isn't a phi
368  // node, then other stuff is happening here.
369  BasicBlock::iterator BBI = BI;
370  if (BBI != BB->begin()) {
371  --BBI;
372  while (isa<DbgInfoIntrinsic>(BBI)) {
373  if (BBI == BB->begin())
374  break;
375  --BBI;
376  }
377  if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
378  continue;
379  }
380 
381  // Do not break infinite loops.
382  BasicBlock *DestBB = BI->getSuccessor(0);
383  if (DestBB == BB)
384  continue;
385 
386  if (!CanMergeBlocks(BB, DestBB))
387  continue;
388 
389  EliminateMostlyEmptyBlock(BB);
390  MadeChange = true;
391  }
392  return MadeChange;
393 }
394 
395 /// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
396 /// single uncond branch between them, and BB contains no other non-phi
397 /// instructions.
398 bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
399  const BasicBlock *DestBB) const {
400  // We only want to eliminate blocks whose phi nodes are used by phi nodes in
401  // the successor. If there are more complex condition (e.g. preheaders),
402  // don't mess around with them.
403  BasicBlock::const_iterator BBI = BB->begin();
404  while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
405  for (const User *U : PN->users()) {
406  const Instruction *UI = cast<Instruction>(U);
407  if (UI->getParent() != DestBB || !isa<PHINode>(UI))
408  return false;
409  // If User is inside DestBB block and it is a PHINode then check
410  // incoming value. If incoming value is not from BB then this is
411  // a complex condition (e.g. preheaders) we want to avoid here.
412  if (UI->getParent() == DestBB) {
413  if (const PHINode *UPN = dyn_cast<PHINode>(UI))
414  for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
415  Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
416  if (Insn && Insn->getParent() == BB &&
417  Insn->getParent() != UPN->getIncomingBlock(I))
418  return false;
419  }
420  }
421  }
422  }
423 
424  // If BB and DestBB contain any common predecessors, then the phi nodes in BB
425  // and DestBB may have conflicting incoming values for the block. If so, we
426  // can't merge the block.
427  const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
428  if (!DestBBPN) return true; // no conflict.
429 
430  // Collect the preds of BB.
432  if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
433  // It is faster to get preds from a PHI than with pred_iterator.
434  for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
435  BBPreds.insert(BBPN->getIncomingBlock(i));
436  } else {
437  BBPreds.insert(pred_begin(BB), pred_end(BB));
438  }
439 
440  // Walk the preds of DestBB.
441  for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
442  BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
443  if (BBPreds.count(Pred)) { // Common predecessor?
444  BBI = DestBB->begin();
445  while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
446  const Value *V1 = PN->getIncomingValueForBlock(Pred);
447  const Value *V2 = PN->getIncomingValueForBlock(BB);
448 
449  // If V2 is a phi node in BB, look up what the mapped value will be.
450  if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
451  if (V2PN->getParent() == BB)
452  V2 = V2PN->getIncomingValueForBlock(Pred);
453 
454  // If there is a conflict, bail out.
455  if (V1 != V2) return false;
456  }
457  }
458  }
459 
460  return true;
461 }
462 
463 
464 /// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
465 /// an unconditional branch in it.
466 void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
467  BranchInst *BI = cast<BranchInst>(BB->getTerminator());
468  BasicBlock *DestBB = BI->getSuccessor(0);
469 
470  DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
471 
472  // If the destination block has a single pred, then this is a trivial edge,
473  // just collapse it.
474  if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
475  if (SinglePred != DestBB) {
476  // Remember if SinglePred was the entry block of the function. If so, we
477  // will need to move BB back to the entry position.
478  bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
479  MergeBasicBlockIntoOnlyPred(DestBB, nullptr);
480 
481  if (isEntry && BB != &BB->getParent()->getEntryBlock())
482  BB->moveBefore(&BB->getParent()->getEntryBlock());
483 
484  DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
485  return;
486  }
487  }
488 
489  // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
490  // to handle the new incoming edges it is about to have.
491  PHINode *PN;
492  for (BasicBlock::iterator BBI = DestBB->begin();
493  (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
494  // Remove the incoming value for BB, and remember it.
495  Value *InVal = PN->removeIncomingValue(BB, false);
496 
497  // Two options: either the InVal is a phi node defined in BB or it is some
498  // value that dominates BB.
499  PHINode *InValPhi = dyn_cast<PHINode>(InVal);
500  if (InValPhi && InValPhi->getParent() == BB) {
501  // Add all of the input values of the input PHI as inputs of this phi.
502  for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
503  PN->addIncoming(InValPhi->getIncomingValue(i),
504  InValPhi->getIncomingBlock(i));
505  } else {
506  // Otherwise, add one instance of the dominating value for each edge that
507  // we will be adding.
508  if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
509  for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
510  PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
511  } else {
512  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
513  PN->addIncoming(InVal, *PI);
514  }
515  }
516  }
517 
518  // The PHIs are now updated, change everything that refers to BB to use
519  // DestBB and remove BB.
520  BB->replaceAllUsesWith(DestBB);
521  BB->eraseFromParent();
522  ++NumBlocksElim;
523 
524  DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
525 }
526 
527 // Computes a map of base pointer relocation instructions to corresponding
528 // derived pointer relocation instructions given a vector of all relocate calls
530  const SmallVectorImpl<User *> &AllRelocateCalls,
532  RelocateInstMap) {
533  // Collect information in two maps: one primarily for locating the base object
534  // while filling the second map; the second map is the final structure holding
535  // a mapping between Base and corresponding Derived relocate calls
537  for (auto &U : AllRelocateCalls) {
538  GCRelocateOperands ThisRelocate(U);
539  IntrinsicInst *I = cast<IntrinsicInst>(U);
540  auto K = std::make_pair(ThisRelocate.getBasePtrIndex(),
541  ThisRelocate.getDerivedPtrIndex());
542  RelocateIdxMap.insert(std::make_pair(K, I));
543  }
544  for (auto &Item : RelocateIdxMap) {
545  std::pair<unsigned, unsigned> Key = Item.first;
546  if (Key.first == Key.second)
547  // Base relocation: nothing to insert
548  continue;
549 
550  IntrinsicInst *I = Item.second;
551  auto BaseKey = std::make_pair(Key.first, Key.first);
552 
553  // We're iterating over RelocateIdxMap so we cannot modify it.
554  auto MaybeBase = RelocateIdxMap.find(BaseKey);
555  if (MaybeBase == RelocateIdxMap.end())
556  // TODO: We might want to insert a new base object relocate and gep off
557  // that, if there are enough derived object relocates.
558  continue;
559 
560  RelocateInstMap[MaybeBase->second].push_back(I);
561  }
562 }
563 
564 // Accepts a GEP and extracts the operands into a vector provided they're all
565 // small integer constants
567  SmallVectorImpl<Value *> &OffsetV) {
568  for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
569  // Only accept small constant integer operands
570  auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
571  if (!Op || Op->getZExtValue() > 20)
572  return false;
573  }
574 
575  for (unsigned i = 1; i < GEP->getNumOperands(); i++)
576  OffsetV.push_back(GEP->getOperand(i));
577  return true;
578 }
579 
580 // Takes a RelocatedBase (base pointer relocation instruction) and Targets to
581 // replace, computes a replacement, and affects it.
582 static bool
584  const SmallVectorImpl<IntrinsicInst *> &Targets) {
585  bool MadeChange = false;
586  for (auto &ToReplace : Targets) {
587  GCRelocateOperands MasterRelocate(RelocatedBase);
588  GCRelocateOperands ThisRelocate(ToReplace);
589 
590  assert(ThisRelocate.getBasePtrIndex() == MasterRelocate.getBasePtrIndex() &&
591  "Not relocating a derived object of the original base object");
592  if (ThisRelocate.getBasePtrIndex() == ThisRelocate.getDerivedPtrIndex()) {
593  // A duplicate relocate call. TODO: coalesce duplicates.
594  continue;
595  }
596 
597  Value *Base = ThisRelocate.getBasePtr();
598  auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.getDerivedPtr());
599  if (!Derived || Derived->getPointerOperand() != Base)
600  continue;
601 
602  SmallVector<Value *, 2> OffsetV;
603  if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
604  continue;
605 
606  // Create a Builder and replace the target callsite with a gep
607  assert(RelocatedBase->getNextNode() && "Should always have one since it's not a terminator");
608 
609  // Insert after RelocatedBase
610  IRBuilder<> Builder(RelocatedBase->getNextNode());
611  Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
612 
613  // If gc_relocate does not match the actual type, cast it to the right type.
614  // In theory, there must be a bitcast after gc_relocate if the type does not
615  // match, and we should reuse it to get the derived pointer. But it could be
616  // cases like this:
617  // bb1:
618  // ...
619  // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
620  // br label %merge
621  //
622  // bb2:
623  // ...
624  // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
625  // br label %merge
626  //
627  // merge:
628  // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
629  // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
630  //
631  // In this case, we can not find the bitcast any more. So we insert a new bitcast
632  // no matter there is already one or not. In this way, we can handle all cases, and
633  // the extra bitcast should be optimized away in later passes.
634  Instruction *ActualRelocatedBase = RelocatedBase;
635  if (RelocatedBase->getType() != Base->getType()) {
636  ActualRelocatedBase =
637  cast<Instruction>(Builder.CreateBitCast(RelocatedBase, Base->getType()));
638  }
639  Value *Replacement = Builder.CreateGEP(
640  Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
641  Instruction *ReplacementInst = cast<Instruction>(Replacement);
642  Replacement->takeName(ToReplace);
643  // If the newly generated derived pointer's type does not match the original derived
644  // pointer's type, cast the new derived pointer to match it. Same reasoning as above.
645  Instruction *ActualReplacement = ReplacementInst;
646  if (ReplacementInst->getType() != ToReplace->getType()) {
647  ActualReplacement =
648  cast<Instruction>(Builder.CreateBitCast(ReplacementInst, ToReplace->getType()));
649  }
650  ToReplace->replaceAllUsesWith(ActualReplacement);
651  ToReplace->eraseFromParent();
652 
653  MadeChange = true;
654  }
655  return MadeChange;
656 }
657 
658 // Turns this:
659 //
660 // %base = ...
661 // %ptr = gep %base + 15
662 // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
663 // %base' = relocate(%tok, i32 4, i32 4)
664 // %ptr' = relocate(%tok, i32 4, i32 5)
665 // %val = load %ptr'
666 //
667 // into this:
668 //
669 // %base = ...
670 // %ptr = gep %base + 15
671 // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
672 // %base' = gc.relocate(%tok, i32 4, i32 4)
673 // %ptr' = gep %base' + 15
674 // %val = load %ptr'
675 bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
676  bool MadeChange = false;
677  SmallVector<User *, 2> AllRelocateCalls;
678 
679  for (auto *U : I.users())
680  if (isGCRelocate(dyn_cast<Instruction>(U)))
681  // Collect all the relocate calls associated with a statepoint
682  AllRelocateCalls.push_back(U);
683 
684  // We need atleast one base pointer relocation + one derived pointer
685  // relocation to mangle
686  if (AllRelocateCalls.size() < 2)
687  return false;
688 
689  // RelocateInstMap is a mapping from the base relocate instruction to the
690  // corresponding derived relocate instructions
692  computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
693  if (RelocateInstMap.empty())
694  return false;
695 
696  for (auto &Item : RelocateInstMap)
697  // Item.first is the RelocatedBase to offset against
698  // Item.second is the vector of Targets to replace
699  MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
700  return MadeChange;
701 }
702 
703 /// SinkCast - Sink the specified cast instruction into its user blocks
704 static bool SinkCast(CastInst *CI) {
705  BasicBlock *DefBB = CI->getParent();
706 
707  /// InsertedCasts - Only insert a cast in each block once.
708  DenseMap<BasicBlock*, CastInst*> InsertedCasts;
709 
710  bool MadeChange = false;
711  for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
712  UI != E; ) {
713  Use &TheUse = UI.getUse();
714  Instruction *User = cast<Instruction>(*UI);
715 
716  // Figure out which BB this cast is used in. For PHI's this is the
717  // appropriate predecessor block.
718  BasicBlock *UserBB = User->getParent();
719  if (PHINode *PN = dyn_cast<PHINode>(User)) {
720  UserBB = PN->getIncomingBlock(TheUse);
721  }
722 
723  // Preincrement use iterator so we don't invalidate it.
724  ++UI;
725 
726  // If this user is in the same block as the cast, don't change the cast.
727  if (UserBB == DefBB) continue;
728 
729  // If we have already inserted a cast into this block, use it.
730  CastInst *&InsertedCast = InsertedCasts[UserBB];
731 
732  if (!InsertedCast) {
733  BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
734  InsertedCast =
735  CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
736  InsertPt);
737  }
738 
739  // Replace a use of the cast with a use of the new cast.
740  TheUse = InsertedCast;
741  MadeChange = true;
742  ++NumCastUses;
743  }
744 
745  // If we removed all uses, nuke the cast.
746  if (CI->use_empty()) {
747  CI->eraseFromParent();
748  MadeChange = true;
749  }
750 
751  return MadeChange;
752 }
753 
754 /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
755 /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
756 /// sink it into user blocks to reduce the number of virtual
757 /// registers that must be created and coalesced.
758 ///
759 /// Return true if any changes are made.
760 ///
762  const DataLayout &DL) {
763  // If this is a noop copy,
764  EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
765  EVT DstVT = TLI.getValueType(DL, CI->getType());
766 
767  // This is an fp<->int conversion?
768  if (SrcVT.isInteger() != DstVT.isInteger())
769  return false;
770 
771  // If this is an extension, it will be a zero or sign extension, which
772  // isn't a noop.
773  if (SrcVT.bitsLT(DstVT)) return false;
774 
775  // If these values will be promoted, find out what they will be promoted
776  // to. This helps us consider truncates on PPC as noop copies when they
777  // are.
778  if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
780  SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
781  if (TLI.getTypeAction(CI->getContext(), DstVT) ==
783  DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
784 
785  // If, after promotion, these are the same types, this is a noop copy.
786  if (SrcVT != DstVT)
787  return false;
788 
789  return SinkCast(CI);
790 }
791 
792 /// CombineUAddWithOverflow - try to combine CI into a call to the
793 /// llvm.uadd.with.overflow intrinsic if possible.
794 ///
795 /// Return true if any changes were made.
797  Value *A, *B;
798  Instruction *AddI;
799  if (!match(CI,
801  return false;
802 
803  Type *Ty = AddI->getType();
804  if (!isa<IntegerType>(Ty))
805  return false;
806 
807  // We don't want to move around uses of condition values this late, so we we
808  // check if it is legal to create the call to the intrinsic in the basic
809  // block containing the icmp:
810 
811  if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
812  return false;
813 
814 #ifndef NDEBUG
815  // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
816  // for now:
817  if (AddI->hasOneUse())
818  assert(*AddI->user_begin() == CI && "expected!");
819 #endif
820 
821  Module *M = CI->getParent()->getParent()->getParent();
822  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
823 
824  auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
825 
826  auto *UAddWithOverflow =
827  CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
828  auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
829  auto *Overflow =
830  ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
831 
832  CI->replaceAllUsesWith(Overflow);
833  AddI->replaceAllUsesWith(UAdd);
834  CI->eraseFromParent();
835  AddI->eraseFromParent();
836  return true;
837 }
838 
839 /// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce
840 /// the number of virtual registers that must be created and coalesced. This is
841 /// a clear win except on targets with multiple condition code registers
842 /// (PowerPC), where it might lose; some adjustment may be wanted there.
843 ///
844 /// Return true if any changes are made.
845 static bool SinkCmpExpression(CmpInst *CI) {
846  BasicBlock *DefBB = CI->getParent();
847 
848  /// InsertedCmp - Only insert a cmp in each block once.
849  DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
850 
851  bool MadeChange = false;
852  for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
853  UI != E; ) {
854  Use &TheUse = UI.getUse();
855  Instruction *User = cast<Instruction>(*UI);
856 
857  // Preincrement use iterator so we don't invalidate it.
858  ++UI;
859 
860  // Don't bother for PHI nodes.
861  if (isa<PHINode>(User))
862  continue;
863 
864  // Figure out which BB this cmp is used in.
865  BasicBlock *UserBB = User->getParent();
866 
867  // If this user is in the same block as the cmp, don't change the cmp.
868  if (UserBB == DefBB) continue;
869 
870  // If we have already inserted a cmp into this block, use it.
871  CmpInst *&InsertedCmp = InsertedCmps[UserBB];
872 
873  if (!InsertedCmp) {
874  BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
875  InsertedCmp =
877  CI->getPredicate(), CI->getOperand(0),
878  CI->getOperand(1), "", InsertPt);
879  }
880 
881  // Replace a use of the cmp with a use of the new cmp.
882  TheUse = InsertedCmp;
883  MadeChange = true;
884  ++NumCmpUses;
885  }
886 
887  // If we removed all uses, nuke the cmp.
888  if (CI->use_empty()) {
889  CI->eraseFromParent();
890  MadeChange = true;
891  }
892 
893  return MadeChange;
894 }
895 
896 static bool OptimizeCmpExpression(CmpInst *CI) {
897  if (SinkCmpExpression(CI))
898  return true;
899 
900  if (CombineUAddWithOverflow(CI))
901  return true;
902 
903  return false;
904 }
905 
906 /// isExtractBitsCandidateUse - Check if the candidates could
907 /// be combined with shift instruction, which includes:
908 /// 1. Truncate instruction
909 /// 2. And instruction and the imm is a mask of the low bits:
910 /// imm & (imm+1) == 0
912  if (!isa<TruncInst>(User)) {
913  if (User->getOpcode() != Instruction::And ||
914  !isa<ConstantInt>(User->getOperand(1)))
915  return false;
916 
917  const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
918 
919  if ((Cimm & (Cimm + 1)).getBoolValue())
920  return false;
921  }
922  return true;
923 }
924 
925 /// SinkShiftAndTruncate - sink both shift and truncate instruction
926 /// to the use of truncate's BB.
927 static bool
930  const TargetLowering &TLI, const DataLayout &DL) {
931  BasicBlock *UserBB = User->getParent();
932  DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
933  TruncInst *TruncI = dyn_cast<TruncInst>(User);
934  bool MadeChange = false;
935 
936  for (Value::user_iterator TruncUI = TruncI->user_begin(),
937  TruncE = TruncI->user_end();
938  TruncUI != TruncE;) {
939 
940  Use &TruncTheUse = TruncUI.getUse();
941  Instruction *TruncUser = cast<Instruction>(*TruncUI);
942  // Preincrement use iterator so we don't invalidate it.
943 
944  ++TruncUI;
945 
946  int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
947  if (!ISDOpcode)
948  continue;
949 
950  // If the use is actually a legal node, there will not be an
951  // implicit truncate.
952  // FIXME: always querying the result type is just an
953  // approximation; some nodes' legality is determined by the
954  // operand or other means. There's no good way to find out though.
955  if (TLI.isOperationLegalOrCustom(
956  ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
957  continue;
958 
959  // Don't bother for PHI nodes.
960  if (isa<PHINode>(TruncUser))
961  continue;
962 
963  BasicBlock *TruncUserBB = TruncUser->getParent();
964 
965  if (UserBB == TruncUserBB)
966  continue;
967 
968  BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
969  CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
970 
971  if (!InsertedShift && !InsertedTrunc) {
972  BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
973  // Sink the shift
974  if (ShiftI->getOpcode() == Instruction::AShr)
975  InsertedShift =
976  BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
977  else
978  InsertedShift =
979  BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
980 
981  // Sink the trunc
982  BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
983  TruncInsertPt++;
984 
985  InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
986  TruncI->getType(), "", TruncInsertPt);
987 
988  MadeChange = true;
989 
990  TruncTheUse = InsertedTrunc;
991  }
992  }
993  return MadeChange;
994 }
995 
996 /// OptimizeExtractBits - sink the shift *right* instruction into user blocks if
997 /// the uses could potentially be combined with this shift instruction and
998 /// generate BitExtract instruction. It will only be applied if the architecture
999 /// supports BitExtract instruction. Here is an example:
1000 /// BB1:
1001 /// %x.extract.shift = lshr i64 %arg1, 32
1002 /// BB2:
1003 /// %x.extract.trunc = trunc i64 %x.extract.shift to i16
1004 /// ==>
1005 ///
1006 /// BB2:
1007 /// %x.extract.shift.1 = lshr i64 %arg1, 32
1008 /// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
1009 ///
1010 /// CodeGen will recoginze the pattern in BB2 and generate BitExtract
1011 /// instruction.
1012 /// Return true if any changes are made.
1014  const TargetLowering &TLI,
1015  const DataLayout &DL) {
1016  BasicBlock *DefBB = ShiftI->getParent();
1017 
1018  /// Only insert instructions in each block once.
1020 
1021  bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
1022 
1023  bool MadeChange = false;
1024  for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
1025  UI != E;) {
1026  Use &TheUse = UI.getUse();
1027  Instruction *User = cast<Instruction>(*UI);
1028  // Preincrement use iterator so we don't invalidate it.
1029  ++UI;
1030 
1031  // Don't bother for PHI nodes.
1032  if (isa<PHINode>(User))
1033  continue;
1034 
1035  if (!isExtractBitsCandidateUse(User))
1036  continue;
1037 
1038  BasicBlock *UserBB = User->getParent();
1039 
1040  if (UserBB == DefBB) {
1041  // If the shift and truncate instruction are in the same BB. The use of
1042  // the truncate(TruncUse) may still introduce another truncate if not
1043  // legal. In this case, we would like to sink both shift and truncate
1044  // instruction to the BB of TruncUse.
1045  // for example:
1046  // BB1:
1047  // i64 shift.result = lshr i64 opnd, imm
1048  // trunc.result = trunc shift.result to i16
1049  //
1050  // BB2:
1051  // ----> We will have an implicit truncate here if the architecture does
1052  // not have i16 compare.
1053  // cmp i16 trunc.result, opnd2
1054  //
1055  if (isa<TruncInst>(User) && shiftIsLegal
1056  // If the type of the truncate is legal, no trucate will be
1057  // introduced in other basic blocks.
1058  &&
1059  (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
1060  MadeChange =
1061  SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
1062 
1063  continue;
1064  }
1065  // If we have already inserted a shift into this block, use it.
1066  BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
1067 
1068  if (!InsertedShift) {
1069  BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1070 
1071  if (ShiftI->getOpcode() == Instruction::AShr)
1072  InsertedShift =
1073  BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
1074  else
1075  InsertedShift =
1076  BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
1077 
1078  MadeChange = true;
1079  }
1080 
1081  // Replace a use of the shift with a use of the new shift.
1082  TheUse = InsertedShift;
1083  }
1084 
1085  // If we removed all uses, nuke the shift.
1086  if (ShiftI->use_empty())
1087  ShiftI->eraseFromParent();
1088 
1089  return MadeChange;
1090 }
1091 
1092 // ScalarizeMaskedLoad() translates masked load intrinsic, like
1093 // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
1094 // <16 x i1> %mask, <16 x i32> %passthru)
1095 // to a chain of basic blocks, whith loading element one-by-one if
1096 // the appropriate mask bit is set
1097 //
1098 // %1 = bitcast i8* %addr to i32*
1099 // %2 = extractelement <16 x i1> %mask, i32 0
1100 // %3 = icmp eq i1 %2, true
1101 // br i1 %3, label %cond.load, label %else
1102 //
1103 //cond.load: ; preds = %0
1104 // %4 = getelementptr i32* %1, i32 0
1105 // %5 = load i32* %4
1106 // %6 = insertelement <16 x i32> undef, i32 %5, i32 0
1107 // br label %else
1108 //
1109 //else: ; preds = %0, %cond.load
1110 // %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ]
1111 // %7 = extractelement <16 x i1> %mask, i32 1
1112 // %8 = icmp eq i1 %7, true
1113 // br i1 %8, label %cond.load1, label %else2
1114 //
1115 //cond.load1: ; preds = %else
1116 // %9 = getelementptr i32* %1, i32 1
1117 // %10 = load i32* %9
1118 // %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1
1119 // br label %else2
1120 //
1121 //else2: ; preds = %else, %cond.load1
1122 // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
1123 // %12 = extractelement <16 x i1> %mask, i32 2
1124 // %13 = icmp eq i1 %12, true
1125 // br i1 %13, label %cond.load4, label %else5
1126 //
1127 static void ScalarizeMaskedLoad(CallInst *CI) {
1128  Value *Ptr = CI->getArgOperand(0);
1129  Value *Src0 = CI->getArgOperand(3);
1130  Value *Mask = CI->getArgOperand(2);
1132  Type *EltTy = VecType->getElementType();
1133 
1134  assert(VecType && "Unexpected return type of masked load intrinsic");
1135 
1136  IRBuilder<> Builder(CI->getContext());
1137  Instruction *InsertPt = CI;
1138  BasicBlock *IfBlock = CI->getParent();
1139  BasicBlock *CondBlock = nullptr;
1140  BasicBlock *PrevIfBlock = CI->getParent();
1141  Builder.SetInsertPoint(InsertPt);
1142 
1143  Builder.SetCurrentDebugLocation(CI->getDebugLoc());
1144 
1145  // Bitcast %addr fron i8* to EltTy*
1146  Type *NewPtrType =
1147  EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
1148  Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
1149  Value *UndefVal = UndefValue::get(VecType);
1150 
1151  // The result vector
1152  Value *VResult = UndefVal;
1153 
1154  PHINode *Phi = nullptr;
1155  Value *PrevPhi = UndefVal;
1156 
1157  unsigned VectorWidth = VecType->getNumElements();
1158  for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
1159 
1160  // Fill the "else" block, created in the previous iteration
1161  //
1162  // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
1163  // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
1164  // %to_load = icmp eq i1 %mask_1, true
1165  // br i1 %to_load, label %cond.load, label %else
1166  //
1167  if (Idx > 0) {
1168  Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
1169  Phi->addIncoming(VResult, CondBlock);
1170  Phi->addIncoming(PrevPhi, PrevIfBlock);
1171  PrevPhi = Phi;
1172  VResult = Phi;
1173  }
1174 
1175  Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
1176  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
1177  ConstantInt::get(Predicate->getType(), 1));
1178 
1179  // Create "cond" block
1180  //
1181  // %EltAddr = getelementptr i32* %1, i32 0
1182  // %Elt = load i32* %EltAddr
1183  // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
1184  //
1185  CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
1186  Builder.SetInsertPoint(InsertPt);
1187 
1188  Value *Gep =
1189  Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
1190  LoadInst* Load = Builder.CreateLoad(Gep, false);
1191  VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
1192 
1193  // Create "else" block, fill it in the next iteration
1194  BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
1195  Builder.SetInsertPoint(InsertPt);
1196  Instruction *OldBr = IfBlock->getTerminator();
1197  BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
1198  OldBr->eraseFromParent();
1199  PrevIfBlock = IfBlock;
1200  IfBlock = NewIfBlock;
1201  }
1202 
1203  Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
1204  Phi->addIncoming(VResult, CondBlock);
1205  Phi->addIncoming(PrevPhi, PrevIfBlock);
1206  Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
1207  CI->replaceAllUsesWith(NewI);
1208  CI->eraseFromParent();
1209 }
1210 
1211 // ScalarizeMaskedStore() translates masked store intrinsic, like
1212 // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
1213 // <16 x i1> %mask)
1214 // to a chain of basic blocks, that stores element one-by-one if
1215 // the appropriate mask bit is set
1216 //
1217 // %1 = bitcast i8* %addr to i32*
1218 // %2 = extractelement <16 x i1> %mask, i32 0
1219 // %3 = icmp eq i1 %2, true
1220 // br i1 %3, label %cond.store, label %else
1221 //
1222 // cond.store: ; preds = %0
1223 // %4 = extractelement <16 x i32> %val, i32 0
1224 // %5 = getelementptr i32* %1, i32 0
1225 // store i32 %4, i32* %5
1226 // br label %else
1227 //
1228 // else: ; preds = %0, %cond.store
1229 // %6 = extractelement <16 x i1> %mask, i32 1
1230 // %7 = icmp eq i1 %6, true
1231 // br i1 %7, label %cond.store1, label %else2
1232 //
1233 // cond.store1: ; preds = %else
1234 // %8 = extractelement <16 x i32> %val, i32 1
1235 // %9 = getelementptr i32* %1, i32 1
1236 // store i32 %8, i32* %9
1237 // br label %else2
1238 // . . .
1239 static void ScalarizeMaskedStore(CallInst *CI) {
1240  Value *Ptr = CI->getArgOperand(1);
1241  Value *Src = CI->getArgOperand(0);
1242  Value *Mask = CI->getArgOperand(3);
1243 
1245  Type *EltTy = VecType->getElementType();
1246 
1247  assert(VecType && "Unexpected data type in masked store intrinsic");
1248 
1249  IRBuilder<> Builder(CI->getContext());
1250  Instruction *InsertPt = CI;
1251  BasicBlock *IfBlock = CI->getParent();
1252  Builder.SetInsertPoint(InsertPt);
1253  Builder.SetCurrentDebugLocation(CI->getDebugLoc());
1254 
1255  // Bitcast %addr fron i8* to EltTy*
1256  Type *NewPtrType =
1257  EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
1258  Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
1259 
1260  unsigned VectorWidth = VecType->getNumElements();
1261  for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
1262 
1263  // Fill the "else" block, created in the previous iteration
1264  //
1265  // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
1266  // %to_store = icmp eq i1 %mask_1, true
1267  // br i1 %to_load, label %cond.store, label %else
1268  //
1269  Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
1270  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
1271  ConstantInt::get(Predicate->getType(), 1));
1272 
1273  // Create "cond" block
1274  //
1275  // %OneElt = extractelement <16 x i32> %Src, i32 Idx
1276  // %EltAddr = getelementptr i32* %1, i32 0
1277  // %store i32 %OneElt, i32* %EltAddr
1278  //
1279  BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
1280  Builder.SetInsertPoint(InsertPt);
1281 
1282  Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
1283  Value *Gep =
1284  Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
1285  Builder.CreateStore(OneElt, Gep);
1286 
1287  // Create "else" block, fill it in the next iteration
1288  BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
1289  Builder.SetInsertPoint(InsertPt);
1290  Instruction *OldBr = IfBlock->getTerminator();
1291  BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
1292  OldBr->eraseFromParent();
1293  IfBlock = NewIfBlock;
1294  }
1295  CI->eraseFromParent();
1296 }
1297 
1298 bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
1299  BasicBlock *BB = CI->getParent();
1300 
1301  // Lower inline assembly if we can.
1302  // If we found an inline asm expession, and if the target knows how to
1303  // lower it to normal LLVM code, do so now.
1304  if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
1305  if (TLI->ExpandInlineAsm(CI)) {
1306  // Avoid invalidating the iterator.
1307  CurInstIterator = BB->begin();
1308  // Avoid processing instructions out of order, which could cause
1309  // reuse before a value is defined.
1310  SunkAddrs.clear();
1311  return true;
1312  }
1313  // Sink address computing for memory operands into the block.
1314  if (OptimizeInlineAsmInst(CI))
1315  return true;
1316  }
1317 
1318  // Align the pointer arguments to this call if the target thinks it's a good
1319  // idea
1320  unsigned MinSize, PrefAlign;
1321  if (TLI && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
1322  for (auto &Arg : CI->arg_operands()) {
1323  // We want to align both objects whose address is used directly and
1324  // objects whose address is used in casts and GEPs, though it only makes
1325  // sense for GEPs if the offset is a multiple of the desired alignment and
1326  // if size - offset meets the size threshold.
1327  if (!Arg->getType()->isPointerTy())
1328  continue;
1329  APInt Offset(DL->getPointerSizeInBits(
1330  cast<PointerType>(Arg->getType())->getAddressSpace()),
1331  0);
1332  Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
1333  uint64_t Offset2 = Offset.getLimitedValue();
1334  if ((Offset2 & (PrefAlign-1)) != 0)
1335  continue;
1336  AllocaInst *AI;
1337  if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
1338  DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
1339  AI->setAlignment(PrefAlign);
1340  // Global variables can only be aligned if they are defined in this
1341  // object (i.e. they are uniquely initialized in this object), and
1342  // over-aligning global variables that have an explicit section is
1343  // forbidden.
1344  GlobalVariable *GV;
1345  if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->hasUniqueInitializer() &&
1346  !GV->hasSection() && GV->getAlignment() < PrefAlign &&
1347  DL->getTypeAllocSize(GV->getType()->getElementType()) >=
1348  MinSize + Offset2)
1349  GV->setAlignment(PrefAlign);
1350  }
1351  // If this is a memcpy (or similar) then we may be able to improve the
1352  // alignment
1353  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
1354  unsigned Align = getKnownAlignment(MI->getDest(), *DL);
1355  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
1356  Align = std::min(Align, getKnownAlignment(MTI->getSource(), *DL));
1357  if (Align > MI->getAlignment())
1358  MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align));
1359  }
1360  }
1361 
1363  if (II) {
1364  switch (II->getIntrinsicID()) {
1365  default: break;
1366  case Intrinsic::objectsize: {
1367  // Lower all uses of llvm.objectsize.*
1368  bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
1369  Type *ReturnTy = CI->getType();
1370  Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
1371 
1372  // Substituting this can cause recursive simplifications, which can
1373  // invalidate our iterator. Use a WeakVH to hold onto it in case this
1374  // happens.
1375  WeakVH IterHandle(CurInstIterator);
1376 
1377  replaceAndRecursivelySimplify(CI, RetVal,
1378  TLInfo, nullptr);
1379 
1380  // If the iterator instruction was recursively deleted, start over at the
1381  // start of the block.
1382  if (IterHandle != CurInstIterator) {
1383  CurInstIterator = BB->begin();
1384  SunkAddrs.clear();
1385  }
1386  return true;
1387  }
1388  case Intrinsic::masked_load: {
1389  // Scalarize unsupported vector masked load
1390  if (!TTI->isLegalMaskedLoad(CI->getType(), 1)) {
1391  ScalarizeMaskedLoad(CI);
1392  ModifiedDT = true;
1393  return true;
1394  }
1395  return false;
1396  }
1397  case Intrinsic::masked_store: {
1398  if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(), 1)) {
1400  ModifiedDT = true;
1401  return true;
1402  }
1403  return false;
1404  }
1405  case Intrinsic::aarch64_stlxr:
1406  case Intrinsic::aarch64_stxr: {
1407  ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
1408  if (!ExtVal || !ExtVal->hasOneUse() ||
1409  ExtVal->getParent() == CI->getParent())
1410  return false;
1411  // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
1412  ExtVal->moveBefore(CI);
1413  // Mark this instruction as "inserted by CGP", so that other
1414  // optimizations don't touch it.
1415  InsertedInsts.insert(ExtVal);
1416  return true;
1417  }
1418  }
1419 
1420  if (TLI) {
1421  // Unknown address space.
1422  // TODO: Target hook to pick which address space the intrinsic cares
1423  // about?
1424  unsigned AddrSpace = ~0u;
1425  SmallVector<Value*, 2> PtrOps;
1426  Type *AccessTy;
1427  if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy, AddrSpace))
1428  while (!PtrOps.empty())
1429  if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace))
1430  return true;
1431  }
1432  }
1433 
1434  // From here on out we're working with named functions.
1435  if (!CI->getCalledFunction()) return false;
1436 
1437  // Lower all default uses of _chk calls. This is very similar
1438  // to what InstCombineCalls does, but here we are only lowering calls
1439  // to fortified library functions (e.g. __memcpy_chk) that have the default
1440  // "don't know" as the objectsize. Anything else should be left alone.
1441  FortifiedLibCallSimplifier Simplifier(TLInfo, true);
1442  if (Value *V = Simplifier.optimizeCall(CI)) {
1443  CI->replaceAllUsesWith(V);
1444  CI->eraseFromParent();
1445  return true;
1446  }
1447  return false;
1448 }
1449 
1450 /// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
1451 /// instructions to the predecessor to enable tail call optimizations. The
1452 /// case it is currently looking for is:
1453 /// @code
1454 /// bb0:
1455 /// %tmp0 = tail call i32 @f0()
1456 /// br label %return
1457 /// bb1:
1458 /// %tmp1 = tail call i32 @f1()
1459 /// br label %return
1460 /// bb2:
1461 /// %tmp2 = tail call i32 @f2()
1462 /// br label %return
1463 /// return:
1464 /// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
1465 /// ret i32 %retval
1466 /// @endcode
1467 ///
1468 /// =>
1469 ///
1470 /// @code
1471 /// bb0:
1472 /// %tmp0 = tail call i32 @f0()
1473 /// ret i32 %tmp0
1474 /// bb1:
1475 /// %tmp1 = tail call i32 @f1()
1476 /// ret i32 %tmp1
1477 /// bb2:
1478 /// %tmp2 = tail call i32 @f2()
1479 /// ret i32 %tmp2
1480 /// @endcode
1481 bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
1482  if (!TLI)
1483  return false;
1484 
1486  if (!RI)
1487  return false;
1488 
1489  PHINode *PN = nullptr;
1490  BitCastInst *BCI = nullptr;
1491  Value *V = RI->getReturnValue();
1492  if (V) {
1493  BCI = dyn_cast<BitCastInst>(V);
1494  if (BCI)
1495  V = BCI->getOperand(0);
1496 
1497  PN = dyn_cast<PHINode>(V);
1498  if (!PN)
1499  return false;
1500  }
1501 
1502  if (PN && PN->getParent() != BB)
1503  return false;
1504 
1505  // It's not safe to eliminate the sign / zero extension of the return value.
1506  // See llvm::isInTailCallPosition().
1507  const Function *F = BB->getParent();
1508  AttributeSet CallerAttrs = F->getAttributes();
1511  return false;
1512 
1513  // Make sure there are no instructions between the PHI and return, or that the
1514  // return is the first instruction in the block.
1515  if (PN) {
1516  BasicBlock::iterator BI = BB->begin();
1517  do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
1518  if (&*BI == BCI)
1519  // Also skip over the bitcast.
1520  ++BI;
1521  if (&*BI != RI)
1522  return false;
1523  } else {
1524  BasicBlock::iterator BI = BB->begin();
1525  while (isa<DbgInfoIntrinsic>(BI)) ++BI;
1526  if (&*BI != RI)
1527  return false;
1528  }
1529 
1530  /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
1531  /// call.
1532  SmallVector<CallInst*, 4> TailCalls;
1533  if (PN) {
1534  for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
1535  CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
1536  // Make sure the phi value is indeed produced by the tail call.
1537  if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
1538  TLI->mayBeEmittedAsTailCall(CI))
1539  TailCalls.push_back(CI);
1540  }
1541  } else {
1542  SmallPtrSet<BasicBlock*, 4> VisitedBBs;
1543  for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
1544  if (!VisitedBBs.insert(*PI).second)
1545  continue;
1546 
1547  BasicBlock::InstListType &InstList = (*PI)->getInstList();
1550  do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
1551  if (RI == RE)
1552  continue;
1553 
1554  CallInst *CI = dyn_cast<CallInst>(&*RI);
1555  if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
1556  TailCalls.push_back(CI);
1557  }
1558  }
1559 
1560  bool Changed = false;
1561  for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
1562  CallInst *CI = TailCalls[i];
1563  CallSite CS(CI);
1564 
1565  // Conservatively require the attributes of the call to match those of the
1566  // return. Ignore noalias because it doesn't affect the call sequence.
1567  AttributeSet CalleeAttrs = CS.getAttributes();
1568  if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
1569  removeAttribute(Attribute::NoAlias) !=
1570  AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
1571  removeAttribute(Attribute::NoAlias))
1572  continue;
1573 
1574  // Make sure the call instruction is followed by an unconditional branch to
1575  // the return block.
1576  BasicBlock *CallBB = CI->getParent();
1577  BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
1578  if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
1579  continue;
1580 
1581  // Duplicate the return into CallBB.
1582  (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
1583  ModifiedDT = Changed = true;
1584  ++NumRetsDup;
1585  }
1586 
1587  // If we eliminated all predecessors of the block, delete the block now.
1588  if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
1589  BB->eraseFromParent();
1590 
1591  return Changed;
1592 }
1593 
1594 //===----------------------------------------------------------------------===//
1595 // Memory Optimization
1596 //===----------------------------------------------------------------------===//
1597 
1598 namespace {
1599 
1600 /// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
1601 /// which holds actual Value*'s for register values.
1602 struct ExtAddrMode : public TargetLowering::AddrMode {
1603  Value *BaseReg;
1604  Value *ScaledReg;
1605  ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {}
1606  void print(raw_ostream &OS) const;
1607  void dump() const;
1608 
1609  bool operator==(const ExtAddrMode& O) const {
1610  return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
1611  (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
1612  (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
1613  }
1614 };
1615 
1616 #ifndef NDEBUG
1617 static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
1618  AM.print(OS);
1619  return OS;
1620 }
1621 #endif
1622 
1623 void ExtAddrMode::print(raw_ostream &OS) const {
1624  bool NeedPlus = false;
1625  OS << "[";
1626  if (BaseGV) {
1627  OS << (NeedPlus ? " + " : "")
1628  << "GV:";
1629  BaseGV->printAsOperand(OS, /*PrintType=*/false);
1630  NeedPlus = true;
1631  }
1632 
1633  if (BaseOffs) {
1634  OS << (NeedPlus ? " + " : "")
1635  << BaseOffs;
1636  NeedPlus = true;
1637  }
1638 
1639  if (BaseReg) {
1640  OS << (NeedPlus ? " + " : "")
1641  << "Base:";
1642  BaseReg->printAsOperand(OS, /*PrintType=*/false);
1643  NeedPlus = true;
1644  }
1645  if (Scale) {
1646  OS << (NeedPlus ? " + " : "")
1647  << Scale << "*";
1648  ScaledReg->printAsOperand(OS, /*PrintType=*/false);
1649  }
1650 
1651  OS << ']';
1652 }
1653 
1654 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1655 void ExtAddrMode::dump() const {
1656  print(dbgs());
1657  dbgs() << '\n';
1658 }
1659 #endif
1660 
1661 /// \brief This class provides transaction based operation on the IR.
1662 /// Every change made through this class is recorded in the internal state and
1663 /// can be undone (rollback) until commit is called.
1664 class TypePromotionTransaction {
1665 
1666  /// \brief This represents the common interface of the individual transaction.
1667  /// Each class implements the logic for doing one specific modification on
1668  /// the IR via the TypePromotionTransaction.
1669  class TypePromotionAction {
1670  protected:
1671  /// The Instruction modified.
1672  Instruction *Inst;
1673 
1674  public:
1675  /// \brief Constructor of the action.
1676  /// The constructor performs the related action on the IR.
1677  TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
1678 
1679  virtual ~TypePromotionAction() {}
1680 
1681  /// \brief Undo the modification done by this action.
1682  /// When this method is called, the IR must be in the same state as it was
1683  /// before this action was applied.
1684  /// \pre Undoing the action works if and only if the IR is in the exact same
1685  /// state as it was directly after this action was applied.
1686  virtual void undo() = 0;
1687 
1688  /// \brief Advocate every change made by this action.
1689  /// When the results on the IR of the action are to be kept, it is important
1690  /// to call this function, otherwise hidden information may be kept forever.
1691  virtual void commit() {
1692  // Nothing to be done, this action is not doing anything.
1693  }
1694  };
1695 
1696  /// \brief Utility to remember the position of an instruction.
1697  class InsertionHandler {
1698  /// Position of an instruction.
1699  /// Either an instruction:
1700  /// - Is the first in a basic block: BB is used.
1701  /// - Has a previous instructon: PrevInst is used.
1702  union {
1703  Instruction *PrevInst;
1704  BasicBlock *BB;
1705  } Point;
1706  /// Remember whether or not the instruction had a previous instruction.
1707  bool HasPrevInstruction;
1708 
1709  public:
1710  /// \brief Record the position of \p Inst.
1711  InsertionHandler(Instruction *Inst) {
1712  BasicBlock::iterator It = Inst;
1713  HasPrevInstruction = (It != (Inst->getParent()->begin()));
1714  if (HasPrevInstruction)
1715  Point.PrevInst = --It;
1716  else
1717  Point.BB = Inst->getParent();
1718  }
1719 
1720  /// \brief Insert \p Inst at the recorded position.
1721  void insert(Instruction *Inst) {
1722  if (HasPrevInstruction) {
1723  if (Inst->getParent())
1724  Inst->removeFromParent();
1725  Inst->insertAfter(Point.PrevInst);
1726  } else {
1727  Instruction *Position = Point.BB->getFirstInsertionPt();
1728  if (Inst->getParent())
1729  Inst->moveBefore(Position);
1730  else
1731  Inst->insertBefore(Position);
1732  }
1733  }
1734  };
1735 
1736  /// \brief Move an instruction before another.
1737  class InstructionMoveBefore : public TypePromotionAction {
1738  /// Original position of the instruction.
1739  InsertionHandler Position;
1740 
1741  public:
1742  /// \brief Move \p Inst before \p Before.
1743  InstructionMoveBefore(Instruction *Inst, Instruction *Before)
1744  : TypePromotionAction(Inst), Position(Inst) {
1745  DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n");
1746  Inst->moveBefore(Before);
1747  }
1748 
1749  /// \brief Move the instruction back to its original position.
1750  void undo() override {
1751  DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
1752  Position.insert(Inst);
1753  }
1754  };
1755 
1756  /// \brief Set the operand of an instruction with a new value.
1757  class OperandSetter : public TypePromotionAction {
1758  /// Original operand of the instruction.
1759  Value *Origin;
1760  /// Index of the modified instruction.
1761  unsigned Idx;
1762 
1763  public:
1764  /// \brief Set \p Idx operand of \p Inst with \p NewVal.
1765  OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
1766  : TypePromotionAction(Inst), Idx(Idx) {
1767  DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
1768  << "for:" << *Inst << "\n"
1769  << "with:" << *NewVal << "\n");
1770  Origin = Inst->getOperand(Idx);
1771  Inst->setOperand(Idx, NewVal);
1772  }
1773 
1774  /// \brief Restore the original value of the instruction.
1775  void undo() override {
1776  DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
1777  << "for: " << *Inst << "\n"
1778  << "with: " << *Origin << "\n");
1779  Inst->setOperand(Idx, Origin);
1780  }
1781  };
1782 
1783  /// \brief Hide the operands of an instruction.
1784  /// Do as if this instruction was not using any of its operands.
1785  class OperandsHider : public TypePromotionAction {
1786  /// The list of original operands.
1787  SmallVector<Value *, 4> OriginalValues;
1788 
1789  public:
1790  /// \brief Remove \p Inst from the uses of the operands of \p Inst.
1791  OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
1792  DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
1793  unsigned NumOpnds = Inst->getNumOperands();
1794  OriginalValues.reserve(NumOpnds);
1795  for (unsigned It = 0; It < NumOpnds; ++It) {
1796  // Save the current operand.
1797  Value *Val = Inst->getOperand(It);
1798  OriginalValues.push_back(Val);
1799  // Set a dummy one.
1800  // We could use OperandSetter here, but that would implied an overhead
1801  // that we are not willing to pay.
1802  Inst->setOperand(It, UndefValue::get(Val->getType()));
1803  }
1804  }
1805 
1806  /// \brief Restore the original list of uses.
1807  void undo() override {
1808  DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
1809  for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
1810  Inst->setOperand(It, OriginalValues[It]);
1811  }
1812  };
1813 
1814  /// \brief Build a truncate instruction.
1815  class TruncBuilder : public TypePromotionAction {
1816  Value *Val;
1817  public:
1818  /// \brief Build a truncate instruction of \p Opnd producing a \p Ty
1819  /// result.
1820  /// trunc Opnd to Ty.
1821  TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
1822  IRBuilder<> Builder(Opnd);
1823  Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
1824  DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
1825  }
1826 
1827  /// \brief Get the built value.
1828  Value *getBuiltValue() { return Val; }
1829 
1830  /// \brief Remove the built instruction.
1831  void undo() override {
1832  DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
1833  if (Instruction *IVal = dyn_cast<Instruction>(Val))
1834  IVal->eraseFromParent();
1835  }
1836  };
1837 
1838  /// \brief Build a sign extension instruction.
1839  class SExtBuilder : public TypePromotionAction {
1840  Value *Val;
1841  public:
1842  /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty
1843  /// result.
1844  /// sext Opnd to Ty.
1845  SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
1846  : TypePromotionAction(InsertPt) {
1847  IRBuilder<> Builder(InsertPt);
1848  Val = Builder.CreateSExt(Opnd, Ty, "promoted");
1849  DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
1850  }
1851 
1852  /// \brief Get the built value.
1853  Value *getBuiltValue() { return Val; }
1854 
1855  /// \brief Remove the built instruction.
1856  void undo() override {
1857  DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
1858  if (Instruction *IVal = dyn_cast<Instruction>(Val))
1859  IVal->eraseFromParent();
1860  }
1861  };
1862 
1863  /// \brief Build a zero extension instruction.
1864  class ZExtBuilder : public TypePromotionAction {
1865  Value *Val;
1866  public:
1867  /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty
1868  /// result.
1869  /// zext Opnd to Ty.
1870  ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
1871  : TypePromotionAction(InsertPt) {
1872  IRBuilder<> Builder(InsertPt);
1873  Val = Builder.CreateZExt(Opnd, Ty, "promoted");
1874  DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
1875  }
1876 
1877  /// \brief Get the built value.
1878  Value *getBuiltValue() { return Val; }
1879 
1880  /// \brief Remove the built instruction.
1881  void undo() override {
1882  DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
1883  if (Instruction *IVal = dyn_cast<Instruction>(Val))
1884  IVal->eraseFromParent();
1885  }
1886  };
1887 
1888  /// \brief Mutate an instruction to another type.
1889  class TypeMutator : public TypePromotionAction {
1890  /// Record the original type.
1891  Type *OrigTy;
1892 
1893  public:
1894  /// \brief Mutate the type of \p Inst into \p NewTy.
1895  TypeMutator(Instruction *Inst, Type *NewTy)
1896  : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
1897  DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
1898  << "\n");
1899  Inst->mutateType(NewTy);
1900  }
1901 
1902  /// \brief Mutate the instruction back to its original type.
1903  void undo() override {
1904  DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
1905  << "\n");
1906  Inst->mutateType(OrigTy);
1907  }
1908  };
1909 
1910  /// \brief Replace the uses of an instruction by another instruction.
1911  class UsesReplacer : public TypePromotionAction {
1912  /// Helper structure to keep track of the replaced uses.
1913  struct InstructionAndIdx {
1914  /// The instruction using the instruction.
1915  Instruction *Inst;
1916  /// The index where this instruction is used for Inst.
1917  unsigned Idx;
1918  InstructionAndIdx(Instruction *Inst, unsigned Idx)
1919  : Inst(Inst), Idx(Idx) {}
1920  };
1921 
1922  /// Keep track of the original uses (pair Instruction, Index).
1923  SmallVector<InstructionAndIdx, 4> OriginalUses;
1924  typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator;
1925 
1926  public:
1927  /// \brief Replace all the use of \p Inst by \p New.
1928  UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {
1929  DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
1930  << "\n");
1931  // Record the original uses.
1932  for (Use &U : Inst->uses()) {
1933  Instruction *UserI = cast<Instruction>(U.getUser());
1934  OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
1935  }
1936  // Now, we can replace the uses.
1937  Inst->replaceAllUsesWith(New);
1938  }
1939 
1940  /// \brief Reassign the original uses of Inst to Inst.
1941  void undo() override {
1942  DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
1943  for (use_iterator UseIt = OriginalUses.begin(),
1944  EndIt = OriginalUses.end();
1945  UseIt != EndIt; ++UseIt) {
1946  UseIt->Inst->setOperand(UseIt->Idx, Inst);
1947  }
1948  }
1949  };
1950 
1951  /// \brief Remove an instruction from the IR.
1952  class InstructionRemover : public TypePromotionAction {
1953  /// Original position of the instruction.
1954  InsertionHandler Inserter;
1955  /// Helper structure to hide all the link to the instruction. In other
1956  /// words, this helps to do as if the instruction was removed.
1957  OperandsHider Hider;
1958  /// Keep track of the uses replaced, if any.
1959  UsesReplacer *Replacer;
1960 
1961  public:
1962  /// \brief Remove all reference of \p Inst and optinally replace all its
1963  /// uses with New.
1964  /// \pre If !Inst->use_empty(), then New != nullptr
1965  InstructionRemover(Instruction *Inst, Value *New = nullptr)
1966  : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
1967  Replacer(nullptr) {
1968  if (New)
1969  Replacer = new UsesReplacer(Inst, New);
1970  DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
1971  Inst->removeFromParent();
1972  }
1973 
1974  ~InstructionRemover() override { delete Replacer; }
1975 
1976  /// \brief Really remove the instruction.
1977  void commit() override { delete Inst; }
1978 
1979  /// \brief Resurrect the instruction and reassign it to the proper uses if
1980  /// new value was provided when build this action.
1981  void undo() override {
1982  DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
1983  Inserter.insert(Inst);
1984  if (Replacer)
1985  Replacer->undo();
1986  Hider.undo();
1987  }
1988  };
1989 
1990 public:
1991  /// Restoration point.
1992  /// The restoration point is a pointer to an action instead of an iterator
1993  /// because the iterator may be invalidated but not the pointer.
1994  typedef const TypePromotionAction *ConstRestorationPt;
1995  /// Advocate every changes made in that transaction.
1996  void commit();
1997  /// Undo all the changes made after the given point.
1998  void rollback(ConstRestorationPt Point);
1999  /// Get the current restoration point.
2000  ConstRestorationPt getRestorationPoint() const;
2001 
2002  /// \name API for IR modification with state keeping to support rollback.
2003  /// @{
2004  /// Same as Instruction::setOperand.
2005  void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
2006  /// Same as Instruction::eraseFromParent.
2007  void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
2008  /// Same as Value::replaceAllUsesWith.
2009  void replaceAllUsesWith(Instruction *Inst, Value *New);
2010  /// Same as Value::mutateType.
2011  void mutateType(Instruction *Inst, Type *NewTy);
2012  /// Same as IRBuilder::createTrunc.
2013  Value *createTrunc(Instruction *Opnd, Type *Ty);
2014  /// Same as IRBuilder::createSExt.
2015  Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
2016  /// Same as IRBuilder::createZExt.
2017  Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
2018  /// Same as Instruction::moveBefore.
2019  void moveBefore(Instruction *Inst, Instruction *Before);
2020  /// @}
2021 
2022 private:
2023  /// The ordered list of actions made so far.
2025  typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt;
2026 };
2027 
2028 void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
2029  Value *NewVal) {
2030  Actions.push_back(
2031  make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal));
2032 }
2033 
2034 void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
2035  Value *NewVal) {
2036  Actions.push_back(
2037  make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal));
2038 }
2039 
2040 void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
2041  Value *New) {
2042  Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
2043 }
2044 
2045 void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
2046  Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
2047 }
2048 
2049 Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
2050  Type *Ty) {
2051  std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
2052  Value *Val = Ptr->getBuiltValue();
2053  Actions.push_back(std::move(Ptr));
2054  return Val;
2055 }
2056 
2057 Value *TypePromotionTransaction::createSExt(Instruction *Inst,
2058  Value *Opnd, Type *Ty) {
2059  std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
2060  Value *Val = Ptr->getBuiltValue();
2061  Actions.push_back(std::move(Ptr));
2062  return Val;
2063 }
2064 
2065 Value *TypePromotionTransaction::createZExt(Instruction *Inst,
2066  Value *Opnd, Type *Ty) {
2067  std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
2068  Value *Val = Ptr->getBuiltValue();
2069  Actions.push_back(std::move(Ptr));
2070  return Val;
2071 }
2072 
2073 void TypePromotionTransaction::moveBefore(Instruction *Inst,
2074  Instruction *Before) {
2075  Actions.push_back(
2076  make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before));
2077 }
2078 
2079 TypePromotionTransaction::ConstRestorationPt
2080 TypePromotionTransaction::getRestorationPoint() const {
2081  return !Actions.empty() ? Actions.back().get() : nullptr;
2082 }
2083 
2084 void TypePromotionTransaction::commit() {
2085  for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
2086  ++It)
2087  (*It)->commit();
2088  Actions.clear();
2089 }
2090 
2091 void TypePromotionTransaction::rollback(
2092  TypePromotionTransaction::ConstRestorationPt Point) {
2093  while (!Actions.empty() && Point != Actions.back().get()) {
2094  std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
2095  Curr->undo();
2096  }
2097 }
2098 
2099 /// \brief A helper class for matching addressing modes.
2100 ///
2101 /// This encapsulates the logic for matching the target-legal addressing modes.
2102 class AddressingModeMatcher {
2103  SmallVectorImpl<Instruction*> &AddrModeInsts;
2104  const TargetMachine &TM;
2105  const TargetLowering &TLI;
2106  const DataLayout &DL;
2107 
2108  /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
2109  /// the memory instruction that we're computing this address for.
2110  Type *AccessTy;
2111  unsigned AddrSpace;
2112  Instruction *MemoryInst;
2113 
2114  /// AddrMode - This is the addressing mode that we're building up. This is
2115  /// part of the return value of this addressing mode matching stuff.
2116  ExtAddrMode &AddrMode;
2117 
2118  /// The instructions inserted by other CodeGenPrepare optimizations.
2119  const SetOfInstrs &InsertedInsts;
2120  /// A map from the instructions to their type before promotion.
2121  InstrToOrigTy &PromotedInsts;
2122  /// The ongoing transaction where every action should be registered.
2123  TypePromotionTransaction &TPT;
2124 
2125  /// IgnoreProfitability - This is set to true when we should not do
2126  /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode
2127  /// always returns true.
2128  bool IgnoreProfitability;
2129 
2130  AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
2131  const TargetMachine &TM, Type *AT, unsigned AS,
2132  Instruction *MI, ExtAddrMode &AM,
2133  const SetOfInstrs &InsertedInsts,
2134  InstrToOrigTy &PromotedInsts,
2135  TypePromotionTransaction &TPT)
2136  : AddrModeInsts(AMI), TM(TM),
2137  TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent())
2138  ->getTargetLowering()),
2139  DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
2140  MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
2141  PromotedInsts(PromotedInsts), TPT(TPT) {
2142  IgnoreProfitability = false;
2143  }
2144 public:
2145 
2146  /// Match - Find the maximal addressing mode that a load/store of V can fold,
2147  /// give an access type of AccessTy. This returns a list of involved
2148  /// instructions in AddrModeInsts.
2149  /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
2150  /// optimizations.
2151  /// \p PromotedInsts maps the instructions to their type before promotion.
2152  /// \p The ongoing transaction where every action should be registered.
2153  static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS,
2154  Instruction *MemoryInst,
2155  SmallVectorImpl<Instruction*> &AddrModeInsts,
2156  const TargetMachine &TM,
2157  const SetOfInstrs &InsertedInsts,
2158  InstrToOrigTy &PromotedInsts,
2159  TypePromotionTransaction &TPT) {
2160  ExtAddrMode Result;
2161 
2162  bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, AS,
2163  MemoryInst, Result, InsertedInsts,
2164  PromotedInsts, TPT).MatchAddr(V, 0);
2165  (void)Success; assert(Success && "Couldn't select *anything*?");
2166  return Result;
2167  }
2168 private:
2169  bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
2170  bool MatchAddr(Value *V, unsigned Depth);
2171  bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth,
2172  bool *MovedAway = nullptr);
2173  bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
2174  ExtAddrMode &AMBefore,
2175  ExtAddrMode &AMAfter);
2176  bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
2177  bool IsPromotionProfitable(unsigned NewCost, unsigned OldCost,
2178  Value *PromotedOperand) const;
2179 };
2180 
2181 /// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
2182 /// Return true and update AddrMode if this addr mode is legal for the target,
2183 /// false if not.
2184 bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
2185  unsigned Depth) {
2186  // If Scale is 1, then this is the same as adding ScaleReg to the addressing
2187  // mode. Just process that directly.
2188  if (Scale == 1)
2189  return MatchAddr(ScaleReg, Depth);
2190 
2191  // If the scale is 0, it takes nothing to add this.
2192  if (Scale == 0)
2193  return true;
2194 
2195  // If we already have a scale of this value, we can add to it, otherwise, we
2196  // need an available scale field.
2197  if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
2198  return false;
2199 
2200  ExtAddrMode TestAddrMode = AddrMode;
2201 
2202  // Add scale to turn X*4+X*3 -> X*7. This could also do things like
2203  // [A+B + A*7] -> [B+A*8].
2204  TestAddrMode.Scale += Scale;
2205  TestAddrMode.ScaledReg = ScaleReg;
2206 
2207  // If the new address isn't legal, bail out.
2208  if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
2209  return false;
2210 
2211  // It was legal, so commit it.
2212  AddrMode = TestAddrMode;
2213 
2214  // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
2215  // to see if ScaleReg is actually X+C. If so, we can turn this into adding
2216  // X*Scale + C*Scale to addr mode.
2217  ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
2218  if (isa<Instruction>(ScaleReg) && // not a constant expr.
2219  match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
2220  TestAddrMode.ScaledReg = AddLHS;
2221  TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
2222 
2223  // If this addressing mode is legal, commit it and remember that we folded
2224  // this instruction.
2225  if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
2226  AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
2227  AddrMode = TestAddrMode;
2228  return true;
2229  }
2230  }
2231 
2232  // Otherwise, not (x+c)*scale, just return what we have.
2233  return true;
2234 }
2235 
2236 /// MightBeFoldableInst - This is a little filter, which returns true if an
2237 /// addressing computation involving I might be folded into a load/store
2238 /// accessing it. This doesn't need to be perfect, but needs to accept at least
2239 /// the set of instructions that MatchOperationAddr can.
2240 static bool MightBeFoldableInst(Instruction *I) {
2241  switch (I->getOpcode()) {
2242  case Instruction::BitCast:
2243  case Instruction::AddrSpaceCast:
2244  // Don't touch identity bitcasts.
2245  if (I->getType() == I->getOperand(0)->getType())
2246  return false;
2247  return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
2248  case Instruction::PtrToInt:
2249  // PtrToInt is always a noop, as we know that the int type is pointer sized.
2250  return true;
2251  case Instruction::IntToPtr:
2252  // We know the input is intptr_t, so this is foldable.
2253  return true;
2254  case Instruction::Add:
2255  return true;
2256  case Instruction::Mul:
2257  case Instruction::Shl:
2258  // Can only handle X*C and X << C.
2259  return isa<ConstantInt>(I->getOperand(1));
2260  case Instruction::GetElementPtr:
2261  return true;
2262  default:
2263  return false;
2264  }
2265 }
2266 
2267 /// \brief Check whether or not \p Val is a legal instruction for \p TLI.
2268 /// \note \p Val is assumed to be the product of some type promotion.
2269 /// Therefore if \p Val has an undefined state in \p TLI, this is assumed
2270 /// to be legal, as the non-promoted value would have had the same state.
2271 static bool isPromotedInstructionLegal(const TargetLowering &TLI,
2272  const DataLayout &DL, Value *Val) {
2273  Instruction *PromotedInst = dyn_cast<Instruction>(Val);
2274  if (!PromotedInst)
2275  return false;
2276  int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
2277  // If the ISDOpcode is undefined, it was undefined before the promotion.
2278  if (!ISDOpcode)
2279  return true;
2280  // Otherwise, check if the promoted instruction is legal or not.
2281  return TLI.isOperationLegalOrCustom(
2282  ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
2283 }
2284 
2285 /// \brief Hepler class to perform type promotion.
2286 class TypePromotionHelper {
2287  /// \brief Utility function to check whether or not a sign or zero extension
2288  /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
2289  /// either using the operands of \p Inst or promoting \p Inst.
2290  /// The type of the extension is defined by \p IsSExt.
2291  /// In other words, check if:
2292  /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
2293  /// #1 Promotion applies:
2294  /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
2295  /// #2 Operand reuses:
2296  /// ext opnd1 to ConsideredExtType.
2297  /// \p PromotedInsts maps the instructions to their type before promotion.
2298  static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
2299  const InstrToOrigTy &PromotedInsts, bool IsSExt);
2300 
2301  /// \brief Utility function to determine if \p OpIdx should be promoted when
2302  /// promoting \p Inst.
2303  static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
2304  if (isa<SelectInst>(Inst) && OpIdx == 0)
2305  return false;
2306  return true;
2307  }
2308 
2309  /// \brief Utility function to promote the operand of \p Ext when this
2310  /// operand is a promotable trunc or sext or zext.
2311  /// \p PromotedInsts maps the instructions to their type before promotion.
2312  /// \p CreatedInstsCost[out] contains the cost of all instructions
2313  /// created to promote the operand of Ext.
2314  /// Newly added extensions are inserted in \p Exts.
2315  /// Newly added truncates are inserted in \p Truncs.
2316  /// Should never be called directly.
2317  /// \return The promoted value which is used instead of Ext.
2318  static Value *promoteOperandForTruncAndAnyExt(
2319  Instruction *Ext, TypePromotionTransaction &TPT,
2320  InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
2322  SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
2323 
2324  /// \brief Utility function to promote the operand of \p Ext when this
2325  /// operand is promotable and is not a supported trunc or sext.
2326  /// \p PromotedInsts maps the instructions to their type before promotion.
2327  /// \p CreatedInstsCost[out] contains the cost of all the instructions
2328  /// created to promote the operand of Ext.
2329  /// Newly added extensions are inserted in \p Exts.
2330  /// Newly added truncates are inserted in \p Truncs.
2331  /// Should never be called directly.
2332  /// \return The promoted value which is used instead of Ext.
2333  static Value *promoteOperandForOther(Instruction *Ext,
2334  TypePromotionTransaction &TPT,
2335  InstrToOrigTy &PromotedInsts,
2336  unsigned &CreatedInstsCost,
2339  const TargetLowering &TLI, bool IsSExt);
2340 
2341  /// \see promoteOperandForOther.
2342  static Value *signExtendOperandForOther(
2343  Instruction *Ext, TypePromotionTransaction &TPT,
2344  InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
2346  SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
2347  return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
2348  Exts, Truncs, TLI, true);
2349  }
2350 
2351  /// \see promoteOperandForOther.
2352  static Value *zeroExtendOperandForOther(
2353  Instruction *Ext, TypePromotionTransaction &TPT,
2354  InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
2356  SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
2357  return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
2358  Exts, Truncs, TLI, false);
2359  }
2360 
2361 public:
2362  /// Type for the utility function that promotes the operand of Ext.
2363  typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
2364  InstrToOrigTy &PromotedInsts,
2365  unsigned &CreatedInstsCost,
2368  const TargetLowering &TLI);
2369  /// \brief Given a sign/zero extend instruction \p Ext, return the approriate
2370  /// action to promote the operand of \p Ext instead of using Ext.
2371  /// \return NULL if no promotable action is possible with the current
2372  /// sign extension.
2373  /// \p InsertedInsts keeps track of all the instructions inserted by the
2374  /// other CodeGenPrepare optimizations. This information is important
2375  /// because we do not want to promote these instructions as CodeGenPrepare
2376  /// will reinsert them later. Thus creating an infinite loop: create/remove.
2377  /// \p PromotedInsts maps the instructions to their type before promotion.
2378  static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
2379  const TargetLowering &TLI,
2380  const InstrToOrigTy &PromotedInsts);
2381 };
2382 
2383 bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
2384  Type *ConsideredExtType,
2385  const InstrToOrigTy &PromotedInsts,
2386  bool IsSExt) {
2387  // The promotion helper does not know how to deal with vector types yet.
2388  // To be able to fix that, we would need to fix the places where we
2389  // statically extend, e.g., constants and such.
2390  if (Inst->getType()->isVectorTy())
2391  return false;
2392 
2393  // We can always get through zext.
2394  if (isa<ZExtInst>(Inst))
2395  return true;
2396 
2397  // sext(sext) is ok too.
2398  if (IsSExt && isa<SExtInst>(Inst))
2399  return true;
2400 
2401  // We can get through binary operator, if it is legal. In other words, the
2402  // binary operator must have a nuw or nsw flag.
2403  const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
2404  if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
2405  ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
2406  (IsSExt && BinOp->hasNoSignedWrap())))
2407  return true;
2408 
2409  // Check if we can do the following simplification.
2410  // ext(trunc(opnd)) --> ext(opnd)
2411  if (!isa<TruncInst>(Inst))
2412  return false;
2413 
2414  Value *OpndVal = Inst->getOperand(0);
2415  // Check if we can use this operand in the extension.
2416  // If the type is larger than the result type of the extension,
2417  // we cannot.
2418  if (!OpndVal->getType()->isIntegerTy() ||
2419  OpndVal->getType()->getIntegerBitWidth() >
2420  ConsideredExtType->getIntegerBitWidth())
2421  return false;
2422 
2423  // If the operand of the truncate is not an instruction, we will not have
2424  // any information on the dropped bits.
2425  // (Actually we could for constant but it is not worth the extra logic).
2426  Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
2427  if (!Opnd)
2428  return false;
2429 
2430  // Check if the source of the type is narrow enough.
2431  // I.e., check that trunc just drops extended bits of the same kind of
2432  // the extension.
2433  // #1 get the type of the operand and check the kind of the extended bits.
2434  const Type *OpndType;
2435  InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
2436  if (It != PromotedInsts.end() && It->second.IsSExt == IsSExt)
2437  OpndType = It->second.Ty;
2438  else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
2439  OpndType = Opnd->getOperand(0)->getType();
2440  else
2441  return false;
2442 
2443  // #2 check that the truncate just drop extended bits.
2444  if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth())
2445  return true;
2446 
2447  return false;
2448 }
2449 
2450 TypePromotionHelper::Action TypePromotionHelper::getAction(
2451  Instruction *Ext, const SetOfInstrs &InsertedInsts,
2452  const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
2453  assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
2454  "Unexpected instruction type");
2455  Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
2456  Type *ExtTy = Ext->getType();
2457  bool IsSExt = isa<SExtInst>(Ext);
2458  // If the operand of the extension is not an instruction, we cannot
2459  // get through.
2460  // If it, check we can get through.
2461  if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
2462  return nullptr;
2463 
2464  // Do not promote if the operand has been added by codegenprepare.
2465  // Otherwise, it means we are undoing an optimization that is likely to be
2466  // redone, thus causing potential infinite loop.
2467  if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
2468  return nullptr;
2469 
2470  // SExt or Trunc instructions.
2471  // Return the related handler.
2472  if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
2473  isa<ZExtInst>(ExtOpnd))
2474  return promoteOperandForTruncAndAnyExt;
2475 
2476  // Regular instruction.
2477  // Abort early if we will have to insert non-free instructions.
2478  if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
2479  return nullptr;
2480  return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
2481 }
2482 
2483 Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
2484  llvm::Instruction *SExt, TypePromotionTransaction &TPT,
2485  InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
2487  SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
2488  // By construction, the operand of SExt is an instruction. Otherwise we cannot
2489  // get through it and this method should not be called.
2490  Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
2491  Value *ExtVal = SExt;
2492  bool HasMergedNonFreeExt = false;
2493  if (isa<ZExtInst>(SExtOpnd)) {
2494  // Replace s|zext(zext(opnd))
2495  // => zext(opnd).
2496  HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
2497  Value *ZExt =
2498  TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
2499  TPT.replaceAllUsesWith(SExt, ZExt);
2500  TPT.eraseInstruction(SExt);
2501  ExtVal = ZExt;
2502  } else {
2503  // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
2504  // => z|sext(opnd).
2505  TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
2506  }
2507  CreatedInstsCost = 0;
2508 
2509  // Remove dead code.
2510  if (SExtOpnd->use_empty())
2511  TPT.eraseInstruction(SExtOpnd);
2512 
2513  // Check if the extension is still needed.
2514  Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
2515  if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
2516  if (ExtInst) {
2517  if (Exts)
2518  Exts->push_back(ExtInst);
2519  CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
2520  }
2521  return ExtVal;
2522  }
2523 
2524  // At this point we have: ext ty opnd to ty.
2525  // Reassign the uses of ExtInst to the opnd and remove ExtInst.
2526  Value *NextVal = ExtInst->getOperand(0);
2527  TPT.eraseInstruction(ExtInst, NextVal);
2528  return NextVal;
2529 }
2530 
2531 Value *TypePromotionHelper::promoteOperandForOther(
2532  Instruction *Ext, TypePromotionTransaction &TPT,
2533  InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
2535  SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
2536  bool IsSExt) {
2537  // By construction, the operand of Ext is an instruction. Otherwise we cannot
2538  // get through it and this method should not be called.
2539  Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
2540  CreatedInstsCost = 0;
2541  if (!ExtOpnd->hasOneUse()) {
2542  // ExtOpnd will be promoted.
2543  // All its uses, but Ext, will need to use a truncated value of the
2544  // promoted version.
2545  // Create the truncate now.
2546  Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
2547  if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
2548  ITrunc->removeFromParent();
2549  // Insert it just after the definition.
2550  ITrunc->insertAfter(ExtOpnd);
2551  if (Truncs)
2552  Truncs->push_back(ITrunc);
2553  }
2554 
2555  TPT.replaceAllUsesWith(ExtOpnd, Trunc);
2556  // Restore the operand of Ext (which has been replace by the previous call
2557  // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
2558  TPT.setOperand(Ext, 0, ExtOpnd);
2559  }
2560 
2561  // Get through the Instruction:
2562  // 1. Update its type.
2563  // 2. Replace the uses of Ext by Inst.
2564  // 3. Extend each operand that needs to be extended.
2565 
2566  // Remember the original type of the instruction before promotion.
2567  // This is useful to know that the high bits are sign extended bits.
2568  PromotedInsts.insert(std::pair<Instruction *, TypeIsSExt>(
2569  ExtOpnd, TypeIsSExt(ExtOpnd->getType(), IsSExt)));
2570  // Step #1.
2571  TPT.mutateType(ExtOpnd, Ext->getType());
2572  // Step #2.
2573  TPT.replaceAllUsesWith(Ext, ExtOpnd);
2574  // Step #3.
2575  Instruction *ExtForOpnd = Ext;
2576 
2577  DEBUG(dbgs() << "Propagate Ext to operands\n");
2578  for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
2579  ++OpIdx) {
2580  DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
2581  if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
2582  !shouldExtOperand(ExtOpnd, OpIdx)) {
2583  DEBUG(dbgs() << "No need to propagate\n");
2584  continue;
2585  }
2586  // Check if we can statically extend the operand.
2587  Value *Opnd = ExtOpnd->getOperand(OpIdx);
2588  if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
2589  DEBUG(dbgs() << "Statically extend\n");
2590  unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
2591  APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
2592  : Cst->getValue().zext(BitWidth);
2593  TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
2594  continue;
2595  }
2596  // UndefValue are typed, so we have to statically sign extend them.
2597  if (isa<UndefValue>(Opnd)) {
2598  DEBUG(dbgs() << "Statically extend\n");
2599  TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
2600  continue;
2601  }
2602 
2603  // Otherwise we have to explicity sign extend the operand.
2604  // Check if Ext was reused to extend an operand.
2605  if (!ExtForOpnd) {
2606  // If yes, create a new one.
2607  DEBUG(dbgs() << "More operands to ext\n");
2608  Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
2609  : TPT.createZExt(Ext, Opnd, Ext->getType());
2610  if (!isa<Instruction>(ValForExtOpnd)) {
2611  TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
2612  continue;
2613  }
2614  ExtForOpnd = cast<Instruction>(ValForExtOpnd);
2615  }
2616  if (Exts)
2617  Exts->push_back(ExtForOpnd);
2618  TPT.setOperand(ExtForOpnd, 0, Opnd);
2619 
2620  // Move the sign extension before the insertion point.
2621  TPT.moveBefore(ExtForOpnd, ExtOpnd);
2622  TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
2623  CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
2624  // If more sext are required, new instructions will have to be created.
2625  ExtForOpnd = nullptr;
2626  }
2627  if (ExtForOpnd == Ext) {
2628  DEBUG(dbgs() << "Extension is useless now\n");
2629  TPT.eraseInstruction(Ext);
2630  }
2631  return ExtOpnd;
2632 }
2633 
2634 /// IsPromotionProfitable - Check whether or not promoting an instruction
2635 /// to a wider type was profitable.
2636 /// \p NewCost gives the cost of extension instructions created by the
2637 /// promotion.
2638 /// \p OldCost gives the cost of extension instructions before the promotion
2639 /// plus the number of instructions that have been
2640 /// matched in the addressing mode the promotion.
2641 /// \p PromotedOperand is the value that has been promoted.
2642 /// \return True if the promotion is profitable, false otherwise.
2643 bool AddressingModeMatcher::IsPromotionProfitable(
2644  unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
2645  DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n');
2646  // The cost of the new extensions is greater than the cost of the
2647  // old extension plus what we folded.
2648  // This is not profitable.
2649  if (NewCost > OldCost)
2650  return false;
2651  if (NewCost < OldCost)
2652  return true;
2653  // The promotion is neutral but it may help folding the sign extension in
2654  // loads for instance.
2655  // Check that we did not create an illegal instruction.
2656  return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
2657 }
2658 
2659 /// MatchOperationAddr - Given an instruction or constant expr, see if we can
2660 /// fold the operation into the addressing mode. If so, update the addressing
2661 /// mode and return true, otherwise return false without modifying AddrMode.
2662 /// If \p MovedAway is not NULL, it contains the information of whether or
2663 /// not AddrInst has to be folded into the addressing mode on success.
2664 /// If \p MovedAway == true, \p AddrInst will not be part of the addressing
2665 /// because it has been moved away.
2666 /// Thus AddrInst must not be added in the matched instructions.
2667 /// This state can happen when AddrInst is a sext, since it may be moved away.
2668 /// Therefore, AddrInst may not be valid when MovedAway is true and it must
2669 /// not be referenced anymore.
2670 bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
2671  unsigned Depth,
2672  bool *MovedAway) {
2673  // Avoid exponential behavior on extremely deep expression trees.
2674  if (Depth >= 5) return false;
2675 
2676  // By default, all matched instructions stay in place.
2677  if (MovedAway)
2678  *MovedAway = false;
2679 
2680  switch (Opcode) {
2681  case Instruction::PtrToInt:
2682  // PtrToInt is always a noop, as we know that the int type is pointer sized.
2683  return MatchAddr(AddrInst->getOperand(0), Depth);
2684  case Instruction::IntToPtr: {
2685  auto AS = AddrInst->getType()->getPointerAddressSpace();
2686  auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
2687  // This inttoptr is a no-op if the integer type is pointer sized.
2688  if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
2689  return MatchAddr(AddrInst->getOperand(0), Depth);
2690  return false;
2691  }
2692  case Instruction::BitCast:
2693  // BitCast is always a noop, and we can handle it as long as it is
2694  // int->int or pointer->pointer (we don't want int<->fp or something).
2695  if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
2696  AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
2697  // Don't touch identity bitcasts. These were probably put here by LSR,
2698  // and we don't want to mess around with them. Assume it knows what it
2699  // is doing.
2700  AddrInst->getOperand(0)->getType() != AddrInst->getType())
2701  return MatchAddr(AddrInst->getOperand(0), Depth);
2702  return false;
2703  case Instruction::AddrSpaceCast: {
2704  unsigned SrcAS
2705  = AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
2706  unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
2707  if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
2708  return MatchAddr(AddrInst->getOperand(0), Depth);
2709  return false;
2710  }
2711  case Instruction::Add: {
2712  // Check to see if we can merge in the RHS then the LHS. If so, we win.
2713  ExtAddrMode BackupAddrMode = AddrMode;
2714  unsigned OldSize = AddrModeInsts.size();
2715  // Start a transaction at this point.
2716  // The LHS may match but not the RHS.
2717  // Therefore, we need a higher level restoration point to undo partially
2718  // matched operation.
2719  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
2720  TPT.getRestorationPoint();
2721 
2722  if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
2723  MatchAddr(AddrInst->getOperand(0), Depth+1))
2724  return true;
2725 
2726  // Restore the old addr mode info.
2727  AddrMode = BackupAddrMode;
2728  AddrModeInsts.resize(OldSize);
2729  TPT.rollback(LastKnownGood);
2730 
2731  // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
2732  if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
2733  MatchAddr(AddrInst->getOperand(1), Depth+1))
2734  return true;
2735 
2736  // Otherwise we definitely can't merge the ADD in.
2737  AddrMode = BackupAddrMode;
2738  AddrModeInsts.resize(OldSize);
2739  TPT.rollback(LastKnownGood);
2740  break;
2741  }
2742  //case Instruction::Or:
2743  // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
2744  //break;
2745  case Instruction::Mul:
2746  case Instruction::Shl: {
2747  // Can only handle X*C and X << C.
2748  ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
2749  if (!RHS)
2750  return false;
2751  int64_t Scale = RHS->getSExtValue();
2752  if (Opcode == Instruction::Shl)
2753  Scale = 1LL << Scale;
2754 
2755  return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
2756  }
2757  case Instruction::GetElementPtr: {
2758  // Scan the GEP. We check it if it contains constant offsets and at most
2759  // one variable offset.
2760  int VariableOperand = -1;
2761  unsigned VariableScale = 0;
2762 
2763  int64_t ConstantOffset = 0;
2764  gep_type_iterator GTI = gep_type_begin(AddrInst);
2765  for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
2766  if (StructType *STy = dyn_cast<StructType>(*GTI)) {
2767  const StructLayout *SL = DL.getStructLayout(STy);
2768  unsigned Idx =
2769  cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
2770  ConstantOffset += SL->getElementOffset(Idx);
2771  } else {
2772  uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
2773  if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
2774  ConstantOffset += CI->getSExtValue()*TypeSize;
2775  } else if (TypeSize) { // Scales of zero don't do anything.
2776  // We only allow one variable index at the moment.
2777  if (VariableOperand != -1)
2778  return false;
2779 
2780  // Remember the variable index.
2781  VariableOperand = i;
2782  VariableScale = TypeSize;
2783  }
2784  }
2785  }
2786 
2787  // A common case is for the GEP to only do a constant offset. In this case,
2788  // just add it to the disp field and check validity.
2789  if (VariableOperand == -1) {
2790  AddrMode.BaseOffs += ConstantOffset;
2791  if (ConstantOffset == 0 ||
2792  TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
2793  // Check to see if we can fold the base pointer in too.
2794  if (MatchAddr(AddrInst->getOperand(0), Depth+1))
2795  return true;
2796  }
2797  AddrMode.BaseOffs -= ConstantOffset;
2798  return false;
2799  }
2800 
2801  // Save the valid addressing mode in case we can't match.
2802  ExtAddrMode BackupAddrMode = AddrMode;
2803  unsigned OldSize = AddrModeInsts.size();
2804 
2805  // See if the scale and offset amount is valid for this target.
2806  AddrMode.BaseOffs += ConstantOffset;
2807 
2808  // Match the base operand of the GEP.
2809  if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
2810  // If it couldn't be matched, just stuff the value in a register.
2811  if (AddrMode.HasBaseReg) {
2812  AddrMode = BackupAddrMode;
2813  AddrModeInsts.resize(OldSize);
2814  return false;
2815  }
2816  AddrMode.HasBaseReg = true;
2817  AddrMode.BaseReg = AddrInst->getOperand(0);
2818  }
2819 
2820  // Match the remaining variable portion of the GEP.
2821  if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
2822  Depth)) {
2823  // If it couldn't be matched, try stuffing the base into a register
2824  // instead of matching it, and retrying the match of the scale.
2825  AddrMode = BackupAddrMode;
2826  AddrModeInsts.resize(OldSize);
2827  if (AddrMode.HasBaseReg)
2828  return false;
2829  AddrMode.HasBaseReg = true;
2830  AddrMode.BaseReg = AddrInst->getOperand(0);
2831  AddrMode.BaseOffs += ConstantOffset;
2832  if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
2833  VariableScale, Depth)) {
2834  // If even that didn't work, bail.
2835  AddrMode = BackupAddrMode;
2836  AddrModeInsts.resize(OldSize);
2837  return false;
2838  }
2839  }
2840 
2841  return true;
2842  }
2843  case Instruction::SExt:
2844  case Instruction::ZExt: {
2845  Instruction *Ext = dyn_cast<Instruction>(AddrInst);
2846  if (!Ext)
2847  return false;
2848 
2849  // Try to move this ext out of the way of the addressing mode.
2850  // Ask for a method for doing so.
2851  TypePromotionHelper::Action TPH =
2852  TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
2853  if (!TPH)
2854  return false;
2855 
2856  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
2857  TPT.getRestorationPoint();
2858  unsigned CreatedInstsCost = 0;
2859  unsigned ExtCost = !TLI.isExtFree(Ext);
2860  Value *PromotedOperand =
2861  TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
2862  // SExt has been moved away.
2863  // Thus either it will be rematched later in the recursive calls or it is
2864  // gone. Anyway, we must not fold it into the addressing mode at this point.
2865  // E.g.,
2866  // op = add opnd, 1
2867  // idx = ext op
2868  // addr = gep base, idx
2869  // is now:
2870  // promotedOpnd = ext opnd <- no match here
2871  // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
2872  // addr = gep base, op <- match
2873  if (MovedAway)
2874  *MovedAway = true;
2875 
2876  assert(PromotedOperand &&
2877  "TypePromotionHelper should have filtered out those cases");
2878 
2879  ExtAddrMode BackupAddrMode = AddrMode;
2880  unsigned OldSize = AddrModeInsts.size();
2881 
2882  if (!MatchAddr(PromotedOperand, Depth) ||
2883  // The total of the new cost is equals to the cost of the created
2884  // instructions.
2885  // The total of the old cost is equals to the cost of the extension plus
2886  // what we have saved in the addressing mode.
2887  !IsPromotionProfitable(CreatedInstsCost,
2888  ExtCost + (AddrModeInsts.size() - OldSize),
2889  PromotedOperand)) {
2890  AddrMode = BackupAddrMode;
2891  AddrModeInsts.resize(OldSize);
2892  DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
2893  TPT.rollback(LastKnownGood);
2894  return false;
2895  }
2896  return true;
2897  }
2898  }
2899  return false;
2900 }
2901 
2902 /// MatchAddr - If we can, try to add the value of 'Addr' into the current
2903 /// addressing mode. If Addr can't be added to AddrMode this returns false and
2904 /// leaves AddrMode unmodified. This assumes that Addr is either a pointer type
2905 /// or intptr_t for the target.
2906 ///
2907 bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
2908  // Start a transaction at this point that we will rollback if the matching
2909  // fails.
2910  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
2911  TPT.getRestorationPoint();
2912  if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
2913  // Fold in immediates if legal for the target.
2914  AddrMode.BaseOffs += CI->getSExtValue();
2915  if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
2916  return true;
2917  AddrMode.BaseOffs -= CI->getSExtValue();
2918  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
2919  // If this is a global variable, try to fold it into the addressing mode.
2920  if (!AddrMode.BaseGV) {
2921  AddrMode.BaseGV = GV;
2922  if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
2923  return true;
2924  AddrMode.BaseGV = nullptr;
2925  }
2926  } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
2927  ExtAddrMode BackupAddrMode = AddrMode;
2928  unsigned OldSize = AddrModeInsts.size();
2929 
2930  // Check to see if it is possible to fold this operation.
2931  bool MovedAway = false;
2932  if (MatchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
2933  // This instruction may have been move away. If so, there is nothing
2934  // to check here.
2935  if (MovedAway)
2936  return true;
2937  // Okay, it's possible to fold this. Check to see if it is actually
2938  // *profitable* to do so. We use a simple cost model to avoid increasing
2939  // register pressure too much.
2940  if (I->hasOneUse() ||
2941  IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
2942  AddrModeInsts.push_back(I);
2943  return true;
2944  }
2945 
2946  // It isn't profitable to do this, roll back.
2947  //cerr << "NOT FOLDING: " << *I;
2948  AddrMode = BackupAddrMode;
2949  AddrModeInsts.resize(OldSize);
2950  TPT.rollback(LastKnownGood);
2951  }
2952  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
2953  if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
2954  return true;
2955  TPT.rollback(LastKnownGood);
2956  } else if (isa<ConstantPointerNull>(Addr)) {
2957  // Null pointer gets folded without affecting the addressing mode.
2958  return true;
2959  }
2960 
2961  // Worse case, the target should support [reg] addressing modes. :)
2962  if (!AddrMode.HasBaseReg) {
2963  AddrMode.HasBaseReg = true;
2964  AddrMode.BaseReg = Addr;
2965  // Still check for legality in case the target supports [imm] but not [i+r].
2966  if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
2967  return true;
2968  AddrMode.HasBaseReg = false;
2969  AddrMode.BaseReg = nullptr;
2970  }
2971 
2972  // If the base register is already taken, see if we can do [r+r].
2973  if (AddrMode.Scale == 0) {
2974  AddrMode.Scale = 1;
2975  AddrMode.ScaledReg = Addr;
2976  if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
2977  return true;
2978  AddrMode.Scale = 0;
2979  AddrMode.ScaledReg = nullptr;
2980  }
2981  // Couldn't match.
2982  TPT.rollback(LastKnownGood);
2983  return false;
2984 }
2985 
2986 /// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
2987 /// inline asm call are due to memory operands. If so, return true, otherwise
2988 /// return false.
2989 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
2990  const TargetMachine &TM) {
2991  const Function *F = CI->getParent()->getParent();
2992  const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering();
2993  const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo();
2994  TargetLowering::AsmOperandInfoVector TargetConstraints =
2995  TLI->ParseConstraints(F->getParent()->getDataLayout(), TRI,
2996  ImmutableCallSite(CI));
2997  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
2998  TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
2999 
3000  // Compute the constraint code and ConstraintType to use.
3001  TLI->ComputeConstraintToUse(OpInfo, SDValue());
3002 
3003  // If this asm operand is our Value*, and if it isn't an indirect memory
3004  // operand, we can't fold it!
3005  if (OpInfo.CallOperandVal == OpVal &&
3007  !OpInfo.isIndirect))
3008  return false;
3009  }
3010 
3011  return true;
3012 }
3013 
3014 /// FindAllMemoryUses - Recursively walk all the uses of I until we find a
3015 /// memory use. If we find an obviously non-foldable instruction, return true.
3016 /// Add the ultimately found memory instructions to MemoryUses.
3017 static bool FindAllMemoryUses(
3018  Instruction *I,
3019  SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
3020  SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetMachine &TM) {
3021  // If we already considered this instruction, we're done.
3022  if (!ConsideredInsts.insert(I).second)
3023  return false;
3024 
3025  // If this is an obviously unfoldable instruction, bail out.
3026  if (!MightBeFoldableInst(I))
3027  return true;
3028 
3029  // Loop over all the uses, recursively processing them.
3030  for (Use &U : I->uses()) {
3031  Instruction *UserI = cast<Instruction>(U.getUser());
3032 
3033  if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
3034  MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
3035  continue;
3036  }
3037 
3038  if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
3039  unsigned opNo = U.getOperandNo();
3040  if (opNo == 0) return true; // Storing addr, not into addr.
3041  MemoryUses.push_back(std::make_pair(SI, opNo));
3042  continue;
3043  }
3044 
3045  if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
3047  if (!IA) return true;
3048 
3049  // If this is a memory operand, we're cool, otherwise bail out.
3050  if (!IsOperandAMemoryOperand(CI, IA, I, TM))
3051  return true;
3052  continue;
3053  }
3054 
3055  if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TM))
3056  return true;
3057  }
3058 
3059  return false;
3060 }
3061 
3062 /// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
3063 /// the use site that we're folding it into. If so, there is no cost to
3064 /// include it in the addressing mode. KnownLive1 and KnownLive2 are two values
3065 /// that we know are live at the instruction already.
3066 bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
3067  Value *KnownLive2) {
3068  // If Val is either of the known-live values, we know it is live!
3069  if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
3070  return true;
3071 
3072  // All values other than instructions and arguments (e.g. constants) are live.
3073  if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
3074 
3075  // If Val is a constant sized alloca in the entry block, it is live, this is
3076  // true because it is just a reference to the stack/frame pointer, which is
3077  // live for the whole function.
3078  if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
3079  if (AI->isStaticAlloca())
3080  return true;
3081 
3082  // Check to see if this value is already used in the memory instruction's
3083  // block. If so, it's already live into the block at the very least, so we
3084  // can reasonably fold it.
3085  return Val->isUsedInBasicBlock(MemoryInst->getParent());
3086 }
3087 
3088 /// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
3089 /// mode of the machine to fold the specified instruction into a load or store
3090 /// that ultimately uses it. However, the specified instruction has multiple
3091 /// uses. Given this, it may actually increase register pressure to fold it
3092 /// into the load. For example, consider this code:
3093 ///
3094 /// X = ...
3095 /// Y = X+1
3096 /// use(Y) -> nonload/store
3097 /// Z = Y+1
3098 /// load Z
3099 ///
3100 /// In this case, Y has multiple uses, and can be folded into the load of Z
3101 /// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
3102 /// be live at the use(Y) line. If we don't fold Y into load Z, we use one
3103 /// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
3104 /// number of computations either.
3105 ///
3106 /// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
3107 /// X was live across 'load Z' for other reasons, we actually *would* want to
3108 /// fold the addressing mode in the Z case. This would make Y die earlier.
3109 bool AddressingModeMatcher::
3110 IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
3111  ExtAddrMode &AMAfter) {
3112  if (IgnoreProfitability) return true;
3113 
3114  // AMBefore is the addressing mode before this instruction was folded into it,
3115  // and AMAfter is the addressing mode after the instruction was folded. Get
3116  // the set of registers referenced by AMAfter and subtract out those
3117  // referenced by AMBefore: this is the set of values which folding in this
3118  // address extends the lifetime of.
3119  //
3120  // Note that there are only two potential values being referenced here,
3121  // BaseReg and ScaleReg (global addresses are always available, as are any
3122  // folded immediates).
3123  Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
3124 
3125  // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
3126  // lifetime wasn't extended by adding this instruction.
3127  if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
3128  BaseReg = nullptr;
3129  if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
3130  ScaledReg = nullptr;
3131 
3132  // If folding this instruction (and it's subexprs) didn't extend any live
3133  // ranges, we're ok with it.
3134  if (!BaseReg && !ScaledReg)
3135  return true;
3136 
3137  // If all uses of this instruction are ultimately load/store/inlineasm's,
3138  // check to see if their addressing modes will include this instruction. If
3139  // so, we can fold it into all uses, so it doesn't matter if it has multiple
3140  // uses.
3142  SmallPtrSet<Instruction*, 16> ConsideredInsts;
3143  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM))
3144  return false; // Has a non-memory, non-foldable use!
3145 
3146  // Now that we know that all uses of this instruction are part of a chain of
3147  // computation involving only operations that could theoretically be folded
3148  // into a memory use, loop over each of these uses and see if they could
3149  // *actually* fold the instruction.
3150  SmallVector<Instruction*, 32> MatchedAddrModeInsts;
3151  for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
3152  Instruction *User = MemoryUses[i].first;
3153  unsigned OpNo = MemoryUses[i].second;
3154 
3155  // Get the access type of this use. If the use isn't a pointer, we don't
3156  // know what it accesses.
3157  Value *Address = User->getOperand(OpNo);
3158  PointerType *AddrTy = dyn_cast<PointerType>(Address->getType());
3159  if (!AddrTy)
3160  return false;
3161  Type *AddressAccessTy = AddrTy->getElementType();
3162  unsigned AS = AddrTy->getAddressSpace();
3163 
3164  // Do a match against the root of this address, ignoring profitability. This
3165  // will tell us if the addressing mode for the memory operation will
3166  // *actually* cover the shared instruction.
3167  ExtAddrMode Result;
3168  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
3169  TPT.getRestorationPoint();
3170  AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy, AS,
3171  MemoryInst, Result, InsertedInsts,
3172  PromotedInsts, TPT);
3173  Matcher.IgnoreProfitability = true;
3174  bool Success = Matcher.MatchAddr(Address, 0);
3175  (void)Success; assert(Success && "Couldn't select *anything*?");
3176 
3177  // The match was to check the profitability, the changes made are not
3178  // part of the original matcher. Therefore, they should be dropped
3179  // otherwise the original matcher will not present the right state.
3180  TPT.rollback(LastKnownGood);
3181 
3182  // If the match didn't cover I, then it won't be shared by it.
3183  if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
3184  I) == MatchedAddrModeInsts.end())
3185  return false;
3186 
3187  MatchedAddrModeInsts.clear();
3188  }
3189 
3190  return true;
3191 }
3192 
3193 } // end anonymous namespace
3194 
3195 /// IsNonLocalValue - Return true if the specified values are defined in a
3196 /// different basic block than BB.
3197 static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
3198  if (Instruction *I = dyn_cast<Instruction>(V))
3199  return I->getParent() != BB;
3200  return false;
3201 }
3202 
3203 /// OptimizeMemoryInst - Load and Store Instructions often have
3204 /// addressing modes that can do significant amounts of computation. As such,
3205 /// instruction selection will try to get the load or store to do as much
3206 /// computation as possible for the program. The problem is that isel can only
3207 /// see within a single block. As such, we sink as much legal addressing mode
3208 /// stuff into the block as possible.
3209 ///
3210 /// This method is used to optimize both load/store and inline asms with memory
3211 /// operands.
3212 bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
3213  Type *AccessTy, unsigned AddrSpace) {
3214  Value *Repl = Addr;
3215 
3216  // Try to collapse single-value PHI nodes. This is necessary to undo
3217  // unprofitable PRE transformations.
3218  SmallVector<Value*, 8> worklist;
3219  SmallPtrSet<Value*, 16> Visited;
3220  worklist.push_back(Addr);
3221 
3222  // Use a worklist to iteratively look through PHI nodes, and ensure that
3223  // the addressing mode obtained from the non-PHI roots of the graph
3224  // are equivalent.
3225  Value *Consensus = nullptr;
3226  unsigned NumUsesConsensus = 0;
3227  bool IsNumUsesConsensusValid = false;
3228  SmallVector<Instruction*, 16> AddrModeInsts;
3229  ExtAddrMode AddrMode;
3230  TypePromotionTransaction TPT;
3231  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
3232  TPT.getRestorationPoint();
3233  while (!worklist.empty()) {
3234  Value *V = worklist.back();
3235  worklist.pop_back();
3236 
3237  // Break use-def graph loops.
3238  if (!Visited.insert(V).second) {
3239  Consensus = nullptr;
3240  break;
3241  }
3242 
3243  // For a PHI node, push all of its incoming values.
3244  if (PHINode *P = dyn_cast<PHINode>(V)) {
3245  for (Value *IncValue : P->incoming_values())
3246  worklist.push_back(IncValue);
3247  continue;
3248  }
3249 
3250  // For non-PHIs, determine the addressing mode being computed.
3251  SmallVector<Instruction*, 16> NewAddrModeInsts;
3252  ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
3253  V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TM,
3254  InsertedInsts, PromotedInsts, TPT);
3255 
3256  // This check is broken into two cases with very similar code to avoid using
3257  // getNumUses() as much as possible. Some values have a lot of uses, so
3258  // calling getNumUses() unconditionally caused a significant compile-time
3259  // regression.
3260  if (!Consensus) {
3261  Consensus = V;
3262  AddrMode = NewAddrMode;
3263  AddrModeInsts = NewAddrModeInsts;
3264  continue;
3265  } else if (NewAddrMode == AddrMode) {
3266  if (!IsNumUsesConsensusValid) {
3267  NumUsesConsensus = Consensus->getNumUses();
3268  IsNumUsesConsensusValid = true;
3269  }
3270 
3271  // Ensure that the obtained addressing mode is equivalent to that obtained
3272  // for all other roots of the PHI traversal. Also, when choosing one
3273  // such root as representative, select the one with the most uses in order
3274  // to keep the cost modeling heuristics in AddressingModeMatcher
3275  // applicable.
3276  unsigned NumUses = V->getNumUses();
3277  if (NumUses > NumUsesConsensus) {
3278  Consensus = V;
3279  NumUsesConsensus = NumUses;
3280  AddrModeInsts = NewAddrModeInsts;
3281  }
3282  continue;
3283  }
3284 
3285  Consensus = nullptr;
3286  break;
3287  }
3288 
3289  // If the addressing mode couldn't be determined, or if multiple different
3290  // ones were determined, bail out now.
3291  if (!Consensus) {
3292  TPT.rollback(LastKnownGood);
3293  return false;
3294  }
3295  TPT.commit();
3296 
3297  // Check to see if any of the instructions supersumed by this addr mode are
3298  // non-local to I's BB.
3299  bool AnyNonLocal = false;
3300  for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
3301  if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
3302  AnyNonLocal = true;
3303  break;
3304  }
3305  }
3306 
3307  // If all the instructions matched are already in this BB, don't do anything.
3308  if (!AnyNonLocal) {
3309  DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n");
3310  return false;
3311  }
3312 
3313  // Insert this computation right after this user. Since our caller is
3314  // scanning from the top of the BB to the bottom, reuse of the expr are
3315  // guaranteed to happen later.
3316  IRBuilder<> Builder(MemoryInst);
3317 
3318  // Now that we determined the addressing expression we want to use and know
3319  // that we have to sink it into this block. Check to see if we have already
3320  // done this for some other load/store instr in this block. If so, reuse the
3321  // computation.
3322  Value *&SunkAddr = SunkAddrs[Addr];
3323  if (SunkAddr) {
3324  DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
3325  << *MemoryInst << "\n");
3326  if (SunkAddr->getType() != Addr->getType())
3327  SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
3328  } else if (AddrSinkUsingGEPs ||
3329  (!AddrSinkUsingGEPs.getNumOccurrences() && TM &&
3330  TM->getSubtargetImpl(*MemoryInst->getParent()->getParent())
3331  ->useAA())) {
3332  // By default, we use the GEP-based method when AA is used later. This
3333  // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
3334  DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
3335  << *MemoryInst << "\n");
3336  Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
3337  Value *ResultPtr = nullptr, *ResultIndex = nullptr;
3338 
3339  // First, find the pointer.
3340  if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
3341  ResultPtr = AddrMode.BaseReg;
3342  AddrMode.BaseReg = nullptr;
3343  }
3344 
3345  if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
3346  // We can't add more than one pointer together, nor can we scale a
3347  // pointer (both of which seem meaningless).
3348  if (ResultPtr || AddrMode.Scale != 1)
3349  return false;
3350 
3351  ResultPtr = AddrMode.ScaledReg;
3352  AddrMode.Scale = 0;
3353  }
3354 
3355  if (AddrMode.BaseGV) {
3356  if (ResultPtr)
3357  return false;
3358 
3359  ResultPtr = AddrMode.BaseGV;
3360  }
3361 
3362  // If the real base value actually came from an inttoptr, then the matcher
3363  // will look through it and provide only the integer value. In that case,
3364  // use it here.
3365  if (!ResultPtr && AddrMode.BaseReg) {
3366  ResultPtr =
3367  Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr");
3368  AddrMode.BaseReg = nullptr;
3369  } else if (!ResultPtr && AddrMode.Scale == 1) {
3370  ResultPtr =
3371  Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr");
3372  AddrMode.Scale = 0;
3373  }
3374 
3375  if (!ResultPtr &&
3376  !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
3377  SunkAddr = Constant::getNullValue(Addr->getType());
3378  } else if (!ResultPtr) {
3379  return false;
3380  } else {
3381  Type *I8PtrTy =
3382  Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
3383  Type *I8Ty = Builder.getInt8Ty();
3384 
3385  // Start with the base register. Do this first so that subsequent address
3386  // matching finds it last, which will prevent it from trying to match it
3387  // as the scaled value in case it happens to be a mul. That would be
3388  // problematic if we've sunk a different mul for the scale, because then
3389  // we'd end up sinking both muls.
3390  if (AddrMode.BaseReg) {
3391  Value *V = AddrMode.BaseReg;
3392  if (V->getType() != IntPtrTy)
3393  V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
3394 
3395  ResultIndex = V;
3396  }
3397 
3398  // Add the scale value.
3399  if (AddrMode.Scale) {
3400  Value *V = AddrMode.ScaledReg;
3401  if (V->getType() == IntPtrTy) {
3402  // done.
3403  } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
3404  cast<IntegerType>(V->getType())->getBitWidth()) {
3405  V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
3406  } else {
3407  // It is only safe to sign extend the BaseReg if we know that the math
3408  // required to create it did not overflow before we extend it. Since
3409  // the original IR value was tossed in favor of a constant back when
3410  // the AddrMode was created we need to bail out gracefully if widths
3411  // do not match instead of extending it.
3412  Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex);
3413  if (I && (ResultIndex != AddrMode.BaseReg))
3414  I->eraseFromParent();
3415  return false;
3416  }
3417 
3418  if (AddrMode.Scale != 1)
3419  V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
3420  "sunkaddr");
3421  if (ResultIndex)
3422  ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
3423  else
3424  ResultIndex = V;
3425  }
3426 
3427  // Add in the Base Offset if present.
3428  if (AddrMode.BaseOffs) {
3429  Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
3430  if (ResultIndex) {
3431  // We need to add this separately from the scale above to help with
3432  // SDAG consecutive load/store merging.
3433  if (ResultPtr->getType() != I8PtrTy)
3434  ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
3435  ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
3436  }
3437 
3438  ResultIndex = V;
3439  }
3440 
3441  if (!ResultIndex) {
3442  SunkAddr = ResultPtr;
3443  } else {
3444  if (ResultPtr->getType() != I8PtrTy)
3445  ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
3446  SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
3447  }
3448 
3449  if (SunkAddr->getType() != Addr->getType())
3450  SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
3451  }
3452  } else {
3453  DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
3454  << *MemoryInst << "\n");
3455  Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
3456  Value *Result = nullptr;
3457 
3458  // Start with the base register. Do this first so that subsequent address
3459  // matching finds it last, which will prevent it from trying to match it
3460  // as the scaled value in case it happens to be a mul. That would be
3461  // problematic if we've sunk a different mul for the scale, because then
3462  // we'd end up sinking both muls.
3463  if (AddrMode.BaseReg) {
3464  Value *V = AddrMode.BaseReg;
3465  if (V->getType()->isPointerTy())
3466  V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
3467  if (V->getType() != IntPtrTy)
3468  V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
3469  Result = V;
3470  }
3471 
3472  // Add the scale value.
3473  if (AddrMode.Scale) {
3474  Value *V = AddrMode.ScaledReg;
3475  if (V->getType() == IntPtrTy) {
3476  // done.
3477  } else if (V->getType()->isPointerTy()) {
3478  V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
3479  } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
3480  cast<IntegerType>(V->getType())->getBitWidth()) {
3481  V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
3482  } else {
3483  // It is only safe to sign extend the BaseReg if we know that the math
3484  // required to create it did not overflow before we extend it. Since
3485  // the original IR value was tossed in favor of a constant back when
3486  // the AddrMode was created we need to bail out gracefully if widths
3487  // do not match instead of extending it.
3488  Instruction *I = dyn_cast_or_null<Instruction>(Result);
3489  if (I && (Result != AddrMode.BaseReg))
3490  I->eraseFromParent();
3491  return false;
3492  }
3493  if (AddrMode.Scale != 1)
3494  V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
3495  "sunkaddr");
3496  if (Result)
3497  Result = Builder.CreateAdd(Result, V, "sunkaddr");
3498  else
3499  Result = V;
3500  }
3501 
3502  // Add in the BaseGV if present.
3503  if (AddrMode.BaseGV) {
3504  Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
3505  if (Result)
3506  Result = Builder.CreateAdd(Result, V, "sunkaddr");
3507  else
3508  Result = V;
3509  }
3510 
3511  // Add in the Base Offset if present.
3512  if (AddrMode.BaseOffs) {
3513  Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
3514  if (Result)
3515  Result = Builder.CreateAdd(Result, V, "sunkaddr");
3516  else
3517  Result = V;
3518  }
3519 
3520  if (!Result)
3521  SunkAddr = Constant::getNullValue(Addr->getType());
3522  else
3523  SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
3524  }
3525 
3526  MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
3527 
3528  // If we have no uses, recursively delete the value and all dead instructions
3529  // using it.
3530  if (Repl->use_empty()) {
3531  // This can cause recursive deletion, which can invalidate our iterator.
3532  // Use a WeakVH to hold onto it in case this happens.
3533  WeakVH IterHandle(CurInstIterator);
3534  BasicBlock *BB = CurInstIterator->getParent();
3535 
3537 
3538  if (IterHandle != CurInstIterator) {
3539  // If the iterator instruction was recursively deleted, start over at the
3540  // start of the block.
3541  CurInstIterator = BB->begin();
3542  SunkAddrs.clear();
3543  }
3544  }
3545  ++NumMemoryInsts;
3546  return true;
3547 }
3548 
3549 /// OptimizeInlineAsmInst - If there are any memory operands, use
3550 /// OptimizeMemoryInst to sink their address computing into the block when
3551 /// possible / profitable.
3552 bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
3553  bool MadeChange = false;
3554 
3555  const TargetRegisterInfo *TRI =
3556  TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo();
3557  TargetLowering::AsmOperandInfoVector TargetConstraints =
3558  TLI->ParseConstraints(*DL, TRI, CS);
3559  unsigned ArgNo = 0;
3560  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
3561  TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
3562 
3563  // Compute the constraint code and ConstraintType to use.
3564  TLI->ComputeConstraintToUse(OpInfo, SDValue());
3565 
3566  if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
3567  OpInfo.isIndirect) {
3568  Value *OpVal = CS->getArgOperand(ArgNo++);
3569  MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
3570  } else if (OpInfo.Type == InlineAsm::isInput)
3571  ArgNo++;
3572  }
3573 
3574  return MadeChange;
3575 }
3576 
3577 /// \brief Check if all the uses of \p Inst are equivalent (or free) zero or
3578 /// sign extensions.
3579 static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
3580  assert(!Inst->use_empty() && "Input must have at least one use");
3581  const Instruction *FirstUser = cast<Instruction>(*Inst->user_begin());
3582  bool IsSExt = isa<SExtInst>(FirstUser);
3583  Type *ExtTy = FirstUser->getType();
3584  for (const User *U : Inst->users()) {
3585  const Instruction *UI = cast<Instruction>(U);
3586  if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
3587  return false;
3588  Type *CurTy = UI->getType();
3589  // Same input and output types: Same instruction after CSE.
3590  if (CurTy == ExtTy)
3591  continue;
3592 
3593  // If IsSExt is true, we are in this situation:
3594  // a = Inst
3595  // b = sext ty1 a to ty2
3596  // c = sext ty1 a to ty3
3597  // Assuming ty2 is shorter than ty3, this could be turned into:
3598  // a = Inst
3599  // b = sext ty1 a to ty2
3600  // c = sext ty2 b to ty3
3601  // However, the last sext is not free.
3602  if (IsSExt)
3603  return false;
3604 
3605  // This is a ZExt, maybe this is free to extend from one type to another.
3606  // In that case, we would not account for a different use.
3607  Type *NarrowTy;
3608  Type *LargeTy;
3609  if (ExtTy->getScalarType()->getIntegerBitWidth() >
3610  CurTy->getScalarType()->getIntegerBitWidth()) {
3611  NarrowTy = CurTy;
3612  LargeTy = ExtTy;
3613  } else {
3614  NarrowTy = ExtTy;
3615  LargeTy = CurTy;
3616  }
3617 
3618  if (!TLI.isZExtFree(NarrowTy, LargeTy))
3619  return false;
3620  }
3621  // All uses are the same or can be derived from one another for free.
3622  return true;
3623 }
3624 
3625 /// \brief Try to form ExtLd by promoting \p Exts until they reach a
3626 /// load instruction.
3627 /// If an ext(load) can be formed, it is returned via \p LI for the load
3628 /// and \p Inst for the extension.
3629 /// Otherwise LI == nullptr and Inst == nullptr.
3630 /// When some promotion happened, \p TPT contains the proper state to
3631 /// revert them.
3632 ///
3633 /// \return true when promoting was necessary to expose the ext(load)
3634 /// opportunity, false otherwise.
3635 ///
3636 /// Example:
3637 /// \code
3638 /// %ld = load i32* %addr
3639 /// %add = add nuw i32 %ld, 4
3640 /// %zext = zext i32 %add to i64
3641 /// \endcode
3642 /// =>
3643 /// \code
3644 /// %ld = load i32* %addr
3645 /// %zext = zext i32 %ld to i64
3646 /// %add = add nuw i64 %zext, 4
3647 /// \encode
3648 /// Thanks to the promotion, we can match zext(load i32*) to i64.
3649 bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
3650  LoadInst *&LI, Instruction *&Inst,
3651  const SmallVectorImpl<Instruction *> &Exts,
3652  unsigned CreatedInstsCost = 0) {
3653  // Iterate over all the extensions to see if one form an ext(load).
3654  for (auto I : Exts) {
3655  // Check if we directly have ext(load).
3656  if ((LI = dyn_cast<LoadInst>(I->getOperand(0)))) {
3657  Inst = I;
3658  // No promotion happened here.
3659  return false;
3660  }
3661  // Check whether or not we want to do any promotion.
3662  if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
3663  continue;
3664  // Get the action to perform the promotion.
3665  TypePromotionHelper::Action TPH = TypePromotionHelper::getAction(
3666  I, InsertedInsts, *TLI, PromotedInsts);
3667  // Check if we can promote.
3668  if (!TPH)
3669  continue;
3670  // Save the current state.
3671  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
3672  TPT.getRestorationPoint();
3674  unsigned NewCreatedInstsCost = 0;
3675  unsigned ExtCost = !TLI->isExtFree(I);
3676  // Promote.
3677  Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
3678  &NewExts, nullptr, *TLI);
3679  assert(PromotedVal &&
3680  "TypePromotionHelper should have filtered out those cases");
3681 
3682  // We would be able to merge only one extension in a load.
3683  // Therefore, if we have more than 1 new extension we heuristically
3684  // cut this search path, because it means we degrade the code quality.
3685  // With exactly 2, the transformation is neutral, because we will merge
3686  // one extension but leave one. However, we optimistically keep going,
3687  // because the new extension may be removed too.
3688  long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
3689  TotalCreatedInstsCost -= ExtCost;
3690  if (!StressExtLdPromotion &&
3691  (TotalCreatedInstsCost > 1 ||
3692  !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
3693  // The promotion is not profitable, rollback to the previous state.
3694  TPT.rollback(LastKnownGood);
3695  continue;
3696  }
3697  // The promotion is profitable.
3698  // Check if it exposes an ext(load).
3699  (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
3700  if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
3701  // If we have created a new extension, i.e., now we have two
3702  // extensions. We must make sure one of them is merged with
3703  // the load, otherwise we may degrade the code quality.
3704  (LI->hasOneUse() || hasSameExtUse(LI, *TLI))))
3705  // Promotion happened.
3706  return true;
3707  // If this does not help to expose an ext(load) then, rollback.
3708  TPT.rollback(LastKnownGood);
3709  }
3710  // None of the extension can form an ext(load).
3711  LI = nullptr;
3712  Inst = nullptr;
3713  return false;
3714 }
3715 
3716 /// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
3717 /// basic block as the load, unless conditions are unfavorable. This allows
3718 /// SelectionDAG to fold the extend into the load.
3719 /// \p I[in/out] the extension may be modified during the process if some
3720 /// promotions apply.
3721 ///
3722 bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
3723  // Try to promote a chain of computation if it allows to form
3724  // an extended load.
3725  TypePromotionTransaction TPT;
3726  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
3727  TPT.getRestorationPoint();
3729  Exts.push_back(I);
3730  // Look for a load being extended.
3731  LoadInst *LI = nullptr;
3732  Instruction *OldExt = I;
3733  bool HasPromoted = ExtLdPromotion(TPT, LI, I, Exts);
3734  if (!LI || !I) {
3735  assert(!HasPromoted && !LI && "If we did not match any load instruction "
3736  "the code must remain the same");
3737  I = OldExt;
3738  return false;
3739  }
3740 
3741  // If they're already in the same block, there's nothing to do.
3742  // Make the cheap checks first if we did not promote.
3743  // If we promoted, we need to check if it is indeed profitable.
3744  if (!HasPromoted && LI->getParent() == I->getParent())
3745  return false;
3746 
3747  EVT VT = TLI->getValueType(*DL, I->getType());
3748  EVT LoadVT = TLI->getValueType(*DL, LI->getType());
3749 
3750  // If the load has other users and the truncate is not free, this probably
3751  // isn't worthwhile.
3752  if (!LI->hasOneUse() && TLI &&
3753  (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
3754  !TLI->isTruncateFree(I->getType(), LI->getType())) {
3755  I = OldExt;
3756  TPT.rollback(LastKnownGood);
3757  return false;
3758  }
3759 
3760  // Check whether the target supports casts folded into loads.
3761  unsigned LType;
3762  if (isa<ZExtInst>(I))
3763  LType = ISD::ZEXTLOAD;
3764  else {
3765  assert(isa<SExtInst>(I) && "Unexpected ext type!");
3766  LType = ISD::SEXTLOAD;
3767  }
3768  if (TLI && !TLI->isLoadExtLegal(LType, VT, LoadVT)) {
3769  I = OldExt;
3770  TPT.rollback(LastKnownGood);
3771  return false;
3772  }
3773 
3774  // Move the extend into the same block as the load, so that SelectionDAG
3775  // can fold it.
3776  TPT.commit();
3777  I->removeFromParent();
3778  I->insertAfter(LI);
3779  ++NumExtsMoved;
3780  return true;
3781 }
3782 
3783 bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
3784  BasicBlock *DefBB = I->getParent();
3785 
3786  // If the result of a {s|z}ext and its source are both live out, rewrite all
3787  // other uses of the source with result of extension.
3788  Value *Src = I->getOperand(0);
3789  if (Src->hasOneUse())
3790  return false;
3791 
3792  // Only do this xform if truncating is free.
3793  if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
3794  return false;
3795 
3796  // Only safe to perform the optimization if the source is also defined in
3797  // this block.
3798  if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
3799  return false;
3800 
3801  bool DefIsLiveOut = false;
3802  for (User *U : I->users()) {
3803  Instruction *UI = cast<Instruction>(U);
3804 
3805  // Figure out which BB this ext is used in.
3806  BasicBlock *UserBB = UI->getParent();
3807  if (UserBB == DefBB) continue;
3808  DefIsLiveOut = true;
3809  break;
3810  }
3811  if (!DefIsLiveOut)
3812  return false;
3813 
3814  // Make sure none of the uses are PHI nodes.
3815  for (User *U : Src->users()) {
3816  Instruction *UI = cast<Instruction>(U);
3817  BasicBlock *UserBB = UI->getParent();
3818  if (UserBB == DefBB) continue;
3819  // Be conservative. We don't want this xform to end up introducing
3820  // reloads just before load / store instructions.
3821  if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
3822  return false;
3823  }
3824 
3825  // InsertedTruncs - Only insert one trunc in each block once.
3826  DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
3827 
3828  bool MadeChange = false;
3829  for (Use &U : Src->uses()) {
3830  Instruction *User = cast<Instruction>(U.getUser());
3831 
3832  // Figure out which BB this ext is used in.
3833  BasicBlock *UserBB = User->getParent();
3834  if (UserBB == DefBB) continue;
3835 
3836  // Both src and def are live in this block. Rewrite the use.
3837  Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
3838 
3839  if (!InsertedTrunc) {
3840  BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
3841  InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
3842  InsertedInsts.insert(InsertedTrunc);
3843  }
3844 
3845  // Replace a use of the {s|z}ext source with a use of the result.
3846  U = InsertedTrunc;
3847  ++NumExtUses;
3848  MadeChange = true;
3849  }
3850 
3851  return MadeChange;
3852 }
3853 
3854 /// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be
3855 /// turned into an explicit branch.
3856 static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
3857  // FIXME: This should use the same heuristics as IfConversion to determine
3858  // whether a select is better represented as a branch. This requires that
3859  // branch probability metadata is preserved for the select, which is not the
3860  // case currently.
3861 
3862  CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
3863 
3864  // If the branch is predicted right, an out of order CPU can avoid blocking on
3865  // the compare. Emit cmovs on compares with a memory operand as branches to
3866  // avoid stalls on the load from memory. If the compare has more than one use
3867  // there's probably another cmov or setcc around so it's not worth emitting a
3868  // branch.
3869  if (!Cmp)
3870  return false;
3871 
3872  Value *CmpOp0 = Cmp->getOperand(0);
3873  Value *CmpOp1 = Cmp->getOperand(1);
3874 
3875  // We check that the memory operand has one use to avoid uses of the loaded
3876  // value directly after the compare, making branches unprofitable.
3877  return Cmp->hasOneUse() &&
3878  ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
3879  (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()));
3880 }
3881 
3882 
3883 /// If we have a SelectInst that will likely profit from branch prediction,
3884 /// turn it into a branch.
3885 bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
3886  bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
3887 
3888  // Can we convert the 'select' to CF ?
3889  if (DisableSelectToBranch || OptSize || !TLI || VectorCond)
3890  return false;
3891 
3893  if (VectorCond)
3894  SelectKind = TargetLowering::VectorMaskSelect;
3895  else if (SI->getType()->isVectorTy())
3897  else
3898  SelectKind = TargetLowering::ScalarValSelect;
3899 
3900  // Do we have efficient codegen support for this kind of 'selects' ?
3901  if (TLI->isSelectSupported(SelectKind)) {
3902  // We have efficient codegen support for the select instruction.
3903  // Check if it is profitable to keep this 'select'.
3904  if (!TLI->isPredictableSelectExpensive() ||
3905  !isFormingBranchFromSelectProfitable(SI))
3906  return false;
3907  }
3908 
3909  ModifiedDT = true;
3910 
3911  // First, we split the block containing the select into 2 blocks.
3912  BasicBlock *StartBlock = SI->getParent();
3913  BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
3914  BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
3915 
3916  // Create a new block serving as the landing pad for the branch.
3917  BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid",
3918  NextBlock->getParent(), NextBlock);
3919 
3920  // Move the unconditional branch from the block with the select in it into our
3921  // landing pad block.
3922  StartBlock->getTerminator()->eraseFromParent();
3923  BranchInst::Create(NextBlock, SmallBlock);
3924 
3925  // Insert the real conditional branch based on the original condition.
3926  BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI);
3927 
3928  // The select itself is replaced with a PHI Node.
3929  PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin());
3930  PN->takeName(SI);
3931  PN->addIncoming(SI->getTrueValue(), StartBlock);
3932  PN->addIncoming(SI->getFalseValue(), SmallBlock);
3933  SI->replaceAllUsesWith(PN);
3934  SI->eraseFromParent();
3935 
3936  // Instruct OptimizeBlock to skip to the next block.
3937  CurInstIterator = StartBlock->end();
3938  ++NumSelectsExpanded;
3939  return true;
3940 }
3941 
3942 static bool isBroadcastShuffle(ShuffleVectorInst *SVI) {
3943  SmallVector<int, 16> Mask(SVI->getShuffleMask());
3944  int SplatElem = -1;
3945  for (unsigned i = 0; i < Mask.size(); ++i) {
3946  if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem)
3947  return false;
3948  SplatElem = Mask[i];
3949  }
3950 
3951  return true;
3952 }
3953 
3954 /// Some targets have expensive vector shifts if the lanes aren't all the same
3955 /// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
3956 /// it's often worth sinking a shufflevector splat down to its use so that
3957 /// codegen can spot all lanes are identical.
3958 bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
3959  BasicBlock *DefBB = SVI->getParent();
3960 
3961  // Only do this xform if variable vector shifts are particularly expensive.
3962  if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType()))
3963  return false;
3964 
3965  // We only expect better codegen by sinking a shuffle if we can recognise a
3966  // constant splat.
3967  if (!isBroadcastShuffle(SVI))
3968  return false;
3969 
3970  // InsertedShuffles - Only insert a shuffle in each block once.
3971  DenseMap<BasicBlock*, Instruction*> InsertedShuffles;
3972 
3973  bool MadeChange = false;
3974  for (User *U : SVI->users()) {
3975  Instruction *UI = cast<Instruction>(U);
3976 
3977  // Figure out which BB this ext is used in.
3978  BasicBlock *UserBB = UI->getParent();
3979  if (UserBB == DefBB) continue;
3980 
3981  // For now only apply this when the splat is used by a shift instruction.
3982  if (!UI->isShift()) continue;
3983 
3984  // Everything checks out, sink the shuffle if the user's block doesn't
3985  // already have a copy.
3986  Instruction *&InsertedShuffle = InsertedShuffles[UserBB];
3987 
3988  if (!InsertedShuffle) {
3989  BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
3990  InsertedShuffle = new ShuffleVectorInst(SVI->getOperand(0),
3991  SVI->getOperand(1),
3992  SVI->getOperand(2), "", InsertPt);
3993  }
3994 
3995  UI->replaceUsesOfWith(SVI, InsertedShuffle);
3996  MadeChange = true;
3997  }
3998 
3999  // If we removed all uses, nuke the shuffle.
4000  if (SVI->use_empty()) {
4001  SVI->eraseFromParent();
4002  MadeChange = true;
4003  }
4004 
4005  return MadeChange;
4006 }
4007 
4008 namespace {
4009 /// \brief Helper class to promote a scalar operation to a vector one.
4010 /// This class is used to move downward extractelement transition.
4011 /// E.g.,
4012 /// a = vector_op <2 x i32>
4013 /// b = extractelement <2 x i32> a, i32 0
4014 /// c = scalar_op b
4015 /// store c
4016 ///
4017 /// =>
4018 /// a = vector_op <2 x i32>
4019 /// c = vector_op a (equivalent to scalar_op on the related lane)
4020 /// * d = extractelement <2 x i32> c, i32 0
4021 /// * store d
4022 /// Assuming both extractelement and store can be combine, we get rid of the
4023 /// transition.
4024 class VectorPromoteHelper {
4025  /// DataLayout associated with the current module.
4026  const DataLayout &DL;
4027 
4028  /// Used to perform some checks on the legality of vector operations.
4029  const TargetLowering &TLI;
4030 
4031  /// Used to estimated the cost of the promoted chain.
4032  const TargetTransformInfo &TTI;
4033 
4034  /// The transition being moved downwards.
4035  Instruction *Transition;
4036  /// The sequence of instructions to be promoted.
4037  SmallVector<Instruction *, 4> InstsToBePromoted;
4038  /// Cost of combining a store and an extract.
4039  unsigned StoreExtractCombineCost;
4040  /// Instruction that will be combined with the transition.
4041  Instruction *CombineInst;
4042 
4043  /// \brief The instruction that represents the current end of the transition.
4044  /// Since we are faking the promotion until we reach the end of the chain
4045  /// of computation, we need a way to get the current end of the transition.
4046  Instruction *getEndOfTransition() const {
4047  if (InstsToBePromoted.empty())
4048  return Transition;
4049  return InstsToBePromoted.back();
4050  }
4051 
4052  /// \brief Return the index of the original value in the transition.
4053  /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
4054  /// c, is at index 0.
4055  unsigned getTransitionOriginalValueIdx() const {
4056  assert(isa<ExtractElementInst>(Transition) &&
4057  "Other kind of transitions are not supported yet");
4058  return 0;
4059  }
4060 
4061  /// \brief Return the index of the index in the transition.
4062  /// E.g., for "extractelement <2 x i32> c, i32 0" the index
4063  /// is at index 1.
4064  unsigned getTransitionIdx() const {
4065  assert(isa<ExtractElementInst>(Transition) &&
4066  "Other kind of transitions are not supported yet");
4067  return 1;
4068  }
4069 
4070  /// \brief Get the type of the transition.
4071  /// This is the type of the original value.
4072  /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
4073  /// transition is <2 x i32>.
4074  Type *getTransitionType() const {
4075  return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
4076  }
4077 
4078  /// \brief Promote \p ToBePromoted by moving \p Def downward through.
4079  /// I.e., we have the following sequence:
4080  /// Def = Transition <ty1> a to <ty2>
4081  /// b = ToBePromoted <ty2> Def, ...
4082  /// =>
4083  /// b = ToBePromoted <ty1> a, ...
4084  /// Def = Transition <ty1> ToBePromoted to <ty2>
4085  void promoteImpl(Instruction *ToBePromoted);
4086 
4087  /// \brief Check whether or not it is profitable to promote all the
4088  /// instructions enqueued to be promoted.
4089  bool isProfitableToPromote() {
4090  Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
4091  unsigned Index = isa<ConstantInt>(ValIdx)
4092  ? cast<ConstantInt>(ValIdx)->getZExtValue()
4093  : -1;
4094  Type *PromotedType = getTransitionType();
4095 
4096  StoreInst *ST = cast<StoreInst>(CombineInst);
4097  unsigned AS = ST->getPointerAddressSpace();
4098  unsigned Align = ST->getAlignment();
4099  // Check if this store is supported.
4101  TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
4102  Align)) {
4103  // If this is not supported, there is no way we can combine
4104  // the extract with the store.
4105  return false;
4106  }
4107 
4108  // The scalar chain of computation has to pay for the transition
4109  // scalar to vector.
4110  // The vector chain has to account for the combining cost.
4111  uint64_t ScalarCost =
4112  TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index);
4113  uint64_t VectorCost = StoreExtractCombineCost;
4114  for (const auto &Inst : InstsToBePromoted) {
4115  // Compute the cost.
4116  // By construction, all instructions being promoted are arithmetic ones.
4117  // Moreover, one argument is a constant that can be viewed as a splat
4118  // constant.
4119  Value *Arg0 = Inst->getOperand(0);
4120  bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
4121  isa<ConstantFP>(Arg0);
4128  ScalarCost += TTI.getArithmeticInstrCost(
4129  Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK);
4130  VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
4131  Arg0OVK, Arg1OVK);
4132  }
4133  DEBUG(dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
4134  << ScalarCost << "\nVector: " << VectorCost << '\n');
4135  return ScalarCost > VectorCost;
4136  }
4137 
4138  /// \brief Generate a constant vector with \p Val with the same
4139  /// number of elements as the transition.
4140  /// \p UseSplat defines whether or not \p Val should be replicated
4141  /// accross the whole vector.
4142  /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
4143  /// otherwise we generate a vector with as many undef as possible:
4144  /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
4145  /// used at the index of the extract.
4146  Value *getConstantVector(Constant *Val, bool UseSplat) const {
4147  unsigned ExtractIdx = UINT_MAX;
4148  if (!UseSplat) {
4149  // If we cannot determine where the constant must be, we have to
4150  // use a splat constant.
4151  Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
4152  if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
4153  ExtractIdx = CstVal->getSExtValue();
4154  else
4155  UseSplat = true;
4156  }
4157 
4158  unsigned End = getTransitionType()->getVectorNumElements();
4159  if (UseSplat)
4160  return ConstantVector::getSplat(End, Val);
4161 
4162  SmallVector<Constant *, 4> ConstVec;
4163  UndefValue *UndefVal = UndefValue::get(Val->getType());
4164  for (unsigned Idx = 0; Idx != End; ++Idx) {
4165  if (Idx == ExtractIdx)
4166  ConstVec.push_back(Val);
4167  else
4168  ConstVec.push_back(UndefVal);
4169  }
4170  return ConstantVector::get(ConstVec);
4171  }
4172 
4173  /// \brief Check if promoting to a vector type an operand at \p OperandIdx
4174  /// in \p Use can trigger undefined behavior.
4175  static bool canCauseUndefinedBehavior(const Instruction *Use,
4176  unsigned OperandIdx) {
4177  // This is not safe to introduce undef when the operand is on
4178  // the right hand side of a division-like instruction.
4179  if (OperandIdx != 1)
4180  return false;
4181  switch (Use->getOpcode()) {
4182  default:
4183  return false;
4184  case Instruction::SDiv:
4185  case Instruction::UDiv:
4186  case Instruction::SRem:
4187  case Instruction::URem:
4188  return true;
4189  case Instruction::FDiv:
4190  case Instruction::FRem:
4191  return !Use->hasNoNaNs();
4192  }
4193  llvm_unreachable(nullptr);
4194  }
4195 
4196 public:
4197  VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
4198  const TargetTransformInfo &TTI, Instruction *Transition,
4199  unsigned CombineCost)
4200  : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
4201  StoreExtractCombineCost(CombineCost), CombineInst(nullptr) {
4202  assert(Transition && "Do not know how to promote null");
4203  }
4204 
4205  /// \brief Check if we can promote \p ToBePromoted to \p Type.
4206  bool canPromote(const Instruction *ToBePromoted) const {
4207  // We could support CastInst too.
4208  return isa<BinaryOperator>(ToBePromoted);
4209  }
4210 
4211  /// \brief Check if it is profitable to promote \p ToBePromoted
4212  /// by moving downward the transition through.
4213  bool shouldPromote(const Instruction *ToBePromoted) const {
4214  // Promote only if all the operands can be statically expanded.
4215  // Indeed, we do not want to introduce any new kind of transitions.
4216  for (const Use &U : ToBePromoted->operands()) {
4217  const Value *Val = U.get();
4218  if (Val == getEndOfTransition()) {
4219  // If the use is a division and the transition is on the rhs,
4220  // we cannot promote the operation, otherwise we may create a
4221  // division by zero.
4222  if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
4223  return false;
4224  continue;
4225  }
4226  if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
4227  !isa<ConstantFP>(Val))
4228  return false;
4229  }
4230  // Check that the resulting operation is legal.
4231  int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
4232  if (!ISDOpcode)
4233  return false;
4234  return StressStoreExtract ||
4236  ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
4237  }
4238 
4239  /// \brief Check whether or not \p Use can be combined
4240  /// with the transition.
4241  /// I.e., is it possible to do Use(Transition) => AnotherUse?
4242  bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
4243 
4244  /// \brief Record \p ToBePromoted as part of the chain to be promoted.
4245  void enqueueForPromotion(Instruction *ToBePromoted) {
4246  InstsToBePromoted.push_back(ToBePromoted);
4247  }
4248 
4249  /// \brief Set the instruction that will be combined with the transition.
4250  void recordCombineInstruction(Instruction *ToBeCombined) {
4251  assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
4252  CombineInst = ToBeCombined;
4253  }
4254 
4255  /// \brief Promote all the instructions enqueued for promotion if it is
4256  /// is profitable.
4257  /// \return True if the promotion happened, false otherwise.
4258  bool promote() {
4259  // Check if there is something to promote.
4260  // Right now, if we do not have anything to combine with,
4261  // we assume the promotion is not profitable.
4262  if (InstsToBePromoted.empty() || !CombineInst)
4263  return false;
4264 
4265  // Check cost.
4266  if (!StressStoreExtract && !isProfitableToPromote())
4267  return false;
4268 
4269  // Promote.
4270  for (auto &ToBePromoted : InstsToBePromoted)
4271  promoteImpl(ToBePromoted);
4272  InstsToBePromoted.clear();
4273  return true;
4274  }
4275 };
4276 } // End of anonymous namespace.
4277 
4278 void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
4279  // At this point, we know that all the operands of ToBePromoted but Def
4280  // can be statically promoted.
4281  // For Def, we need to use its parameter in ToBePromoted:
4282  // b = ToBePromoted ty1 a
4283  // Def = Transition ty1 b to ty2
4284  // Move the transition down.
4285  // 1. Replace all uses of the promoted operation by the transition.
4286  // = ... b => = ... Def.
4287  assert(ToBePromoted->getType() == Transition->getType() &&
4288  "The type of the result of the transition does not match "
4289  "the final type");
4290  ToBePromoted->replaceAllUsesWith(Transition);
4291  // 2. Update the type of the uses.
4292  // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
4293  Type *TransitionTy = getTransitionType();
4294  ToBePromoted->mutateType(TransitionTy);
4295  // 3. Update all the operands of the promoted operation with promoted
4296  // operands.
4297  // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
4298  for (Use &U : ToBePromoted->operands()) {
4299  Value *Val = U.get();
4300  Value *NewVal = nullptr;
4301  if (Val == Transition)
4302  NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
4303  else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
4304  isa<ConstantFP>(Val)) {
4305  // Use a splat constant if it is not safe to use undef.
4306  NewVal = getConstantVector(
4307  cast<Constant>(Val),
4308  isa<UndefValue>(Val) ||
4309  canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
4310  } else
4311  llvm_unreachable("Did you modified shouldPromote and forgot to update "
4312  "this?");
4313  ToBePromoted->setOperand(U.getOperandNo(), NewVal);
4314  }
4315  Transition->removeFromParent();
4316  Transition->insertAfter(ToBePromoted);
4317  Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
4318 }
4319 
4320 /// Some targets can do store(extractelement) with one instruction.
4321 /// Try to push the extractelement towards the stores when the target
4322 /// has this feature and this is profitable.
4323 bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) {
4324  unsigned CombineCost = UINT_MAX;
4325  if (DisableStoreExtract || !TLI ||
4326  (!StressStoreExtract &&
4327  !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(),
4328  Inst->getOperand(1), CombineCost)))
4329  return false;
4330 
4331  // At this point we know that Inst is a vector to scalar transition.
4332  // Try to move it down the def-use chain, until:
4333  // - We can combine the transition with its single use
4334  // => we got rid of the transition.
4335  // - We escape the current basic block
4336  // => we would need to check that we are moving it at a cheaper place and
4337  // we do not do that for now.
4338  BasicBlock *Parent = Inst->getParent();
4339  DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
4340  VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
4341  // If the transition has more than one use, assume this is not going to be
4342  // beneficial.
4343  while (Inst->hasOneUse()) {
4344  Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
4345  DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
4346 
4347  if (ToBePromoted->getParent() != Parent) {
4348  DEBUG(dbgs() << "Instruction to promote is in a different block ("
4349  << ToBePromoted->getParent()->getName()
4350  << ") than the transition (" << Parent->getName() << ").\n");
4351  return false;
4352  }
4353 
4354  if (VPH.canCombine(ToBePromoted)) {
4355  DEBUG(dbgs() << "Assume " << *Inst << '\n'
4356  << "will be combined with: " << *ToBePromoted << '\n');
4357  VPH.recordCombineInstruction(ToBePromoted);
4358  bool Changed = VPH.promote();
4359  NumStoreExtractExposed += Changed;
4360  return Changed;
4361  }
4362 
4363  DEBUG(dbgs() << "Try promoting.\n");
4364  if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
4365  return false;
4366 
4367  DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
4368 
4369  VPH.enqueueForPromotion(ToBePromoted);
4370  Inst = ToBePromoted;
4371  }
4372  return false;
4373 }
4374 
4375 bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
4376  // Bail out if we inserted the instruction to prevent optimizations from
4377  // stepping on each other's toes.
4378  if (InsertedInsts.count(I))
4379  return false;
4380 
4381  if (PHINode *P = dyn_cast<PHINode>(I)) {
4382  // It is possible for very late stage optimizations (such as SimplifyCFG)
4383  // to introduce PHI nodes too late to be cleaned up. If we detect such a
4384  // trivial PHI, go ahead and zap it here.
4385  if (Value *V = SimplifyInstruction(P, *DL, TLInfo, nullptr)) {
4386  P->replaceAllUsesWith(V);
4387  P->eraseFromParent();
4388  ++NumPHIsElim;
4389  return true;
4390  }
4391  return false;
4392  }
4393 
4394  if (CastInst *CI = dyn_cast<CastInst>(I)) {
4395  // If the source of the cast is a constant, then this should have
4396  // already been constant folded. The only reason NOT to constant fold
4397  // it is if something (e.g. LSR) was careful to place the constant
4398  // evaluation in a block other than then one that uses it (e.g. to hoist
4399  // the address of globals out of a loop). If this is the case, we don't
4400  // want to forward-subst the cast.
4401  if (isa<Constant>(CI->getOperand(0)))
4402  return false;
4403 
4404  if (TLI && OptimizeNoopCopyExpression(CI, *TLI, *DL))
4405  return true;
4406 
4407  if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
4408  /// Sink a zext or sext into its user blocks if the target type doesn't
4409  /// fit in one register
4410  if (TLI &&
4411  TLI->getTypeAction(CI->getContext(),
4412  TLI->getValueType(*DL, CI->getType())) ==
4414  return SinkCast(CI);
4415  } else {
4416  bool MadeChange = MoveExtToFormExtLoad(I);
4417  return MadeChange | OptimizeExtUses(I);
4418  }
4419  }
4420  return false;
4421  }
4422 
4423  if (CmpInst *CI = dyn_cast<CmpInst>(I))
4424  if (!TLI || !TLI->hasMultipleConditionRegisters())
4425  return OptimizeCmpExpression(CI);
4426 
4427  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
4428  if (TLI) {
4429  unsigned AS = LI->getPointerAddressSpace();
4430  return OptimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
4431  }
4432  return false;
4433  }
4434 
4435  if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
4436  if (TLI) {
4437  unsigned AS = SI->getPointerAddressSpace();
4438  return OptimizeMemoryInst(I, SI->getOperand(1),
4439  SI->getOperand(0)->getType(), AS);
4440  }
4441  return false;
4442  }
4443 
4445 
4446  if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
4447  BinOp->getOpcode() == Instruction::LShr)) {
4448  ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
4449  if (TLI && CI && TLI->hasExtractBitsInsn())
4450  return OptimizeExtractBits(BinOp, CI, *TLI, *DL);
4451 
4452  return false;
4453  }
4454 
4455  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
4456  if (GEPI->hasAllZeroIndices()) {
4457  /// The GEP operand must be a pointer, so must its result -> BitCast
4458  Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
4459  GEPI->getName(), GEPI);
4460  GEPI->replaceAllUsesWith(NC);
4461  GEPI->eraseFromParent();
4462  ++NumGEPsElim;
4463  OptimizeInst(NC, ModifiedDT);
4464  return true;
4465  }
4466  return false;
4467  }
4468 
4469  if (CallInst *CI = dyn_cast<CallInst>(I))
4470  return OptimizeCallInst(CI, ModifiedDT);
4471 
4472  if (SelectInst *SI = dyn_cast<SelectInst>(I))
4473  return OptimizeSelectInst(SI);
4474 
4475  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
4476  return OptimizeShuffleVectorInst(SVI);
4477 
4478  if (isa<ExtractElementInst>(I))
4479  return OptimizeExtractElementInst(I);
4480 
4481  return false;
4482 }
4483 
4484 // In this pass we look for GEP and cast instructions that are used
4485 // across basic blocks and rewrite them to improve basic-block-at-a-time
4486 // selection.
4487 bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
4488  SunkAddrs.clear();
4489  bool MadeChange = false;
4490 
4491  CurInstIterator = BB.begin();
4492  while (CurInstIterator != BB.end()) {
4493  MadeChange |= OptimizeInst(CurInstIterator++, ModifiedDT);
4494  if (ModifiedDT)
4495  return true;
4496  }
4497  MadeChange |= DupRetToEnableTailCallOpts(&BB);
4498 
4499  return MadeChange;
4500 }
4501 
4502 // llvm.dbg.value is far away from the value then iSel may not be able
4503 // handle it properly. iSel will drop llvm.dbg.value if it can not
4504 // find a node corresponding to the value.
4505 bool CodeGenPrepare::PlaceDbgValues(Function &F) {
4506  bool MadeChange = false;
4507  for (BasicBlock &BB : F) {
4508  Instruction *PrevNonDbgInst = nullptr;
4509  for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
4510  Instruction *Insn = BI++;
4511  DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
4512  // Leave dbg.values that refer to an alloca alone. These
4513  // instrinsics describe the address of a variable (= the alloca)
4514  // being taken. They should not be moved next to the alloca
4515  // (and to the beginning of the scope), but rather stay close to
4516  // where said address is used.
4517  if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) {
4518  PrevNonDbgInst = Insn;
4519  continue;
4520  }
4521 
4522  Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
4523  if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
4524  DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
4525  DVI->removeFromParent();
4526  if (isa<PHINode>(VI))
4528  else
4529  DVI->insertAfter(VI);
4530  MadeChange = true;
4531  ++NumDbgValueMoved;
4532  }
4533  }
4534  }
4535  return MadeChange;
4536 }
4537 
4538 // If there is a sequence that branches based on comparing a single bit
4539 // against zero that can be combined into a single instruction, and the
4540 // target supports folding these into a single instruction, sink the
4541 // mask and compare into the branch uses. Do this before OptimizeBlock ->
4542 // OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being
4543 // searched for.
4544 bool CodeGenPrepare::sinkAndCmp(Function &F) {
4545  if (!EnableAndCmpSinking)
4546  return false;
4547  if (!TLI || !TLI->isMaskAndBranchFoldingLegal())
4548  return false;
4549  bool MadeChange = false;
4550  for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
4551  BasicBlock *BB = I++;
4552 
4553  // Does this BB end with the following?
4554  // %andVal = and %val, #single-bit-set
4555  // %icmpVal = icmp %andResult, 0
4556  // br i1 %cmpVal label %dest1, label %dest2"
4557  BranchInst *Brcc = dyn_cast<BranchInst>(BB->getTerminator());
4558  if (!Brcc || !Brcc->isConditional())
4559  continue;
4560  ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0));
4561  if (!Cmp || Cmp->getParent() != BB)
4562  continue;
4563  ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1));
4564  if (!Zero || !Zero->isZero())
4565  continue;
4567  if (!And || And->getOpcode() != Instruction::And || And->getParent() != BB)
4568  continue;
4569  ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1));
4570  if (!Mask || !Mask->getUniqueInteger().isPowerOf2())
4571  continue;
4572  DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB->dump());
4573 
4574  // Push the "and; icmp" for any users that are conditional branches.
4575  // Since there can only be one branch use per BB, we don't need to keep
4576  // track of which BBs we insert into.
4577  for (Value::use_iterator UI = Cmp->use_begin(), E = Cmp->use_end();
4578  UI != E; ) {
4579  Use &TheUse = *UI;
4580  // Find brcc use.
4581  BranchInst *BrccUser = dyn_cast<BranchInst>(*UI);
4582  ++UI;
4583  if (!BrccUser || !BrccUser->isConditional())
4584  continue;
4585  BasicBlock *UserBB = BrccUser->getParent();
4586  if (UserBB == BB) continue;
4587  DEBUG(dbgs() << "found Brcc use\n");
4588 
4589  // Sink the "and; icmp" to use.
4590  MadeChange = true;
4591  BinaryOperator *NewAnd =
4592  BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "",
4593  BrccUser);
4594  CmpInst *NewCmp =
4595  CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero,
4596  "", BrccUser);
4597  TheUse = NewCmp;
4598  ++NumAndCmpsMoved;
4599  DEBUG(BrccUser->getParent()->dump());
4600  }
4601  }
4602  return MadeChange;
4603 }
4604 
4605 /// \brief Retrieve the probabilities of a conditional branch. Returns true on
4606 /// success, or returns false if no or invalid metadata was found.
4607 static bool extractBranchMetadata(BranchInst *BI,
4608  uint64_t &ProbTrue, uint64_t &ProbFalse) {
4609  assert(BI->isConditional() &&
4610  "Looking for probabilities on unconditional branch?");
4611  auto *ProfileData = BI->getMetadata(LLVMContext::MD_prof);
4612  if (!ProfileData || ProfileData->getNumOperands() != 3)
4613  return false;
4614 
4615  const auto *CITrue =
4616  mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1));
4617  const auto *CIFalse =
4618  mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2));
4619  if (!CITrue || !CIFalse)
4620  return false;
4621 
4622  ProbTrue = CITrue->getValue().getZExtValue();
4623  ProbFalse = CIFalse->getValue().getZExtValue();
4624 
4625  return true;
4626 }
4627 
4628 /// \brief Scale down both weights to fit into uint32_t.
4629 static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
4630  uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
4631  uint32_t Scale = (NewMax / UINT32_MAX) + 1;
4632  NewTrue = NewTrue / Scale;
4633  NewFalse = NewFalse / Scale;
4634 }
4635 
4636 /// \brief Some targets prefer to split a conditional branch like:
4637 /// \code
4638 /// %0 = icmp ne i32 %a, 0
4639 /// %1 = icmp ne i32 %b, 0
4640 /// %or.cond = or i1 %0, %1
4641 /// br i1 %or.cond, label %TrueBB, label %FalseBB
4642 /// \endcode
4643 /// into multiple branch instructions like:
4644 /// \code
4645 /// bb1:
4646 /// %0 = icmp ne i32 %a, 0
4647 /// br i1 %0, label %TrueBB, label %bb2
4648 /// bb2:
4649 /// %1 = icmp ne i32 %b, 0
4650 /// br i1 %1, label %TrueBB, label %FalseBB
4651 /// \endcode
4652 /// This usually allows instruction selection to do even further optimizations
4653 /// and combine the compare with the branch instruction. Currently this is
4654 /// applied for targets which have "cheap" jump instructions.
4655 ///
4656 /// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
4657 ///
4658 bool CodeGenPrepare::splitBranchCondition(Function &F) {
4659  if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive())
4660  return false;
4661 
4662  bool MadeChange = false;
4663  for (auto &BB : F) {
4664  // Does this BB end with the following?
4665  // %cond1 = icmp|fcmp|binary instruction ...
4666  // %cond2 = icmp|fcmp|binary instruction ...
4667  // %cond.or = or|and i1 %cond1, cond2
4668  // br i1 %cond.or label %dest1, label %dest2"
4669  BinaryOperator *LogicOp;
4670  BasicBlock *TBB, *FBB;
4671  if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB)))
4672  continue;
4673 
4674  unsigned Opc;
4675  Value *Cond1, *Cond2;
4676  if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
4677  m_OneUse(m_Value(Cond2)))))
4678  Opc = Instruction::And;
4679  else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)),
4680  m_OneUse(m_Value(Cond2)))))
4681  Opc = Instruction::Or;
4682  else
4683  continue;
4684 
4685  if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) ||
4686  !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp())) )
4687  continue;
4688 
4689  DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
4690 
4691  // Create a new BB.
4692  auto *InsertBefore = std::next(Function::iterator(BB))
4693  .getNodePtrUnchecked();
4694  auto TmpBB = BasicBlock::Create(BB.getContext(),
4695  BB.getName() + ".cond.split",
4696  BB.getParent(), InsertBefore);
4697 
4698  // Update original basic block by using the first condition directly by the
4699  // branch instruction and removing the no longer needed and/or instruction.
4700  auto *Br1 = cast<BranchInst>(BB.getTerminator());
4701  Br1->setCondition(Cond1);
4702  LogicOp->eraseFromParent();
4703 
4704  // Depending on the conditon we have to either replace the true or the false
4705  // successor of the original branch instruction.
4706  if (Opc == Instruction::And)
4707  Br1->setSuccessor(0, TmpBB);
4708  else
4709  Br1->setSuccessor(1, TmpBB);
4710 
4711  // Fill in the new basic block.
4712  auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
4713  if (auto *I = dyn_cast<Instruction>(Cond2)) {
4714  I->removeFromParent();
4715  I->insertBefore(Br2);
4716  }
4717 
4718  // Update PHI nodes in both successors. The original BB needs to be
4719  // replaced in one succesor's PHI nodes, because the branch comes now from
4720  // the newly generated BB (NewBB). In the other successor we need to add one
4721  // incoming edge to the PHI nodes, because both branch instructions target
4722  // now the same successor. Depending on the original branch condition
4723  // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
4724  // we perfrom the correct update for the PHI nodes.
4725  // This doesn't change the successor order of the just created branch
4726  // instruction (or any other instruction).
4727  if (Opc == Instruction::Or)
4728  std::swap(TBB, FBB);
4729 
4730  // Replace the old BB with the new BB.
4731  for (auto &I : *TBB) {
4732  PHINode *PN = dyn_cast<PHINode>(&I);
4733  if (!PN)
4734  break;
4735  int i;
4736  while ((i = PN->getBasicBlockIndex(&BB)) >= 0)
4737  PN->setIncomingBlock(i, TmpBB);
4738  }
4739 
4740  // Add another incoming edge form the new BB.
4741  for (auto &I : *FBB) {
4742  PHINode *PN = dyn_cast<PHINode>(&I);
4743  if (!PN)
4744  break;
4745  auto *Val = PN->getIncomingValueForBlock(&BB);
4746  PN->addIncoming(Val, TmpBB);
4747  }
4748 
4749  // Update the branch weights (from SelectionDAGBuilder::
4750  // FindMergedConditions).
4751  if (Opc == Instruction::Or) {
4752  // Codegen X | Y as:
4753  // BB1:
4754  // jmp_if_X TBB
4755  // jmp TmpBB
4756  // TmpBB:
4757  // jmp_if_Y TBB
4758  // jmp FBB
4759  //
4760 
4761  // We have flexibility in setting Prob for BB1 and Prob for NewBB.
4762  // The requirement is that
4763  // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
4764  // = TrueProb for orignal BB.
4765  // Assuming the orignal weights are A and B, one choice is to set BB1's
4766  // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
4767  // assumes that
4768  // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
4769  // Another choice is to assume TrueProb for BB1 equals to TrueProb for
4770  // TmpBB, but the math is more complicated.
4771  uint64_t TrueWeight, FalseWeight;
4772  if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) {
4773  uint64_t NewTrueWeight = TrueWeight;
4774  uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
4775  scaleWeights(NewTrueWeight, NewFalseWeight);
4776  Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
4777  .createBranchWeights(TrueWeight, FalseWeight));
4778 
4779  NewTrueWeight = TrueWeight;
4780  NewFalseWeight = 2 * FalseWeight;
4781  scaleWeights(NewTrueWeight, NewFalseWeight);
4782  Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
4783  .createBranchWeights(TrueWeight, FalseWeight));
4784  }
4785  } else {
4786  // Codegen X & Y as:
4787  // BB1:
4788  // jmp_if_X TmpBB
4789  // jmp FBB
4790  // TmpBB:
4791  // jmp_if_Y TBB
4792  // jmp FBB
4793  //
4794  // This requires creation of TmpBB after CurBB.
4795 
4796  // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
4797  // The requirement is that
4798  // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
4799  // = FalseProb for orignal BB.
4800  // Assuming the orignal weights are A and B, one choice is to set BB1's
4801  // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
4802  // assumes that
4803  // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
4804  uint64_t TrueWeight, FalseWeight;
4805  if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) {
4806  uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
4807  uint64_t NewFalseWeight = FalseWeight;
4808  scaleWeights(NewTrueWeight, NewFalseWeight);
4809  Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
4810  .createBranchWeights(TrueWeight, FalseWeight));
4811 
4812  NewTrueWeight = 2 * TrueWeight;
4813  NewFalseWeight = FalseWeight;
4814  scaleWeights(NewTrueWeight, NewFalseWeight);
4815  Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
4816  .createBranchWeights(TrueWeight, FalseWeight));
4817  }
4818  }
4819 
4820  // Note: No point in getting fancy here, since the DT info is never
4821  // available to CodeGenPrepare.
4822  ModifiedDT = true;
4823 
4824  MadeChange = true;
4825 
4826  DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
4827  TmpBB->dump());
4828  }
4829  return MadeChange;
4830 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type (if unknown returns 0).
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:506
ReturnInst - Return a value (possibly void), from a function.
Value * getValueOperand()
Definition: Instructions.h:406
const Value * getCalledValue() const
getCalledValue - Get a pointer to the function that is invoked by this instruction.
iplist< Instruction >::iterator eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing basic block and deletes it...
Definition: Instruction.cpp:70
static MVT getIntegerVT(unsigned BitWidth)
use_iterator use_end()
Definition: Value.h:281
use_iterator_impl< Use > use_iterator
Definition: Value.h:277
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:104
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:649
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:64
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:679
iterator_range< use_iterator > uses()
Definition: Value.h:283
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static bool SinkCmpExpression(CmpInst *CI)
SinkCmpExpression - Sink the given CmpInst into user blocks to reduce the number of virtual registers...
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:72
void addIncoming(Value *V, BasicBlock *BB)
addIncoming - Add an incoming value to the end of the PHI list
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
unsigned getBasePtrIndex()
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
Definition: Statepoint.h:354
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallPtrSet.h:78
STATISTIC(NumFunctions,"Total number of functions")
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
Sign extended before/after call.
Definition: Attributes.h:105
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:114
void setAlignment(unsigned Align)
iterator end()
Definition: Function.h:459
Intrinsic::ID getIntrinsicID() const
getIntrinsicID - Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:44
reverse_iterator rend()
Definition: ilist.h:379
This class represents zero extension of integer types.
unsigned getNumOperands() const
Definition: User.h:138
void DeleteDeadBlock(BasicBlock *BB)
DeleteDeadBlock - Delete the specified block, which must have no predecessors.
CallInst - This class represents a function call, abstracting a target machine's calling convention...
size_type count(PtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:276
void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DominatorTree *DT=nullptr)
MergeBasicBlockIntoOnlyPred - BB is a block with one predecessor and its predecessor is known to have...
Definition: Local.cpp:500
static void computeBaseDerivedRelocateMap(const SmallVectorImpl< User * > &AllRelocateCalls, DenseMap< IntrinsicInst *, SmallVector< IntrinsicInst *, 2 >> &RelocateInstMap)
ShuffleVectorInst - This instruction constructs a fixed permutation of two input vectors.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:111
virtual bool isZExtFree(Type *, Type *) const
Return true if any actual instruction that defines a value of type Ty1 implicitly zero-extends the va...
F(f)
const APInt & getUniqueInteger() const
If C is a constant integer then return its value, otherwise C must be a vector of constant integers...
Definition: Constants.cpp:1443
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, unsigned Align=1, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:472
LoadInst - an instruction for reading from memory.
Definition: Instructions.h:177
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:822
Hexagon Common GEP
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: Type.cpp:216
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
void setAlignment(unsigned Align)
Definition: Globals.cpp:77
bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const
Return true if the attribute exists at the given index.
Definition: Attributes.cpp:956
bool bitsLT(EVT VT) const
bitsLT - Return true if this has less bits than VT.
Definition: ValueTypes.h:189
static Constant * getNullValue(Type *Ty)
Definition: Constants.cpp:178
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:188
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:242
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:231
reverse_iterator rbegin()
Definition: ilist.h:377
bool enableExtLdPromotion() const
Return true if the target wants to use the optimization that turns ext(promotableInst1(...(promotableInstN(load)))) into promotedInst1(...(promotedInstN(ext(load)))).
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:169
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:41
AnalysisUsage & addRequired()
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:475
bool hasNoNaNs() const
Determine whether the no-NaNs flag is set.
bool isUnconditional() const
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
removeIncomingValue - Remove an incoming value.
SelectInst - This class represents the LLVM 'select' instruction.
bool hasMultipleConditionRegisters() const
Return true if multiple condition registers are available.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:551
static CallInst * Create(Value *Func, ArrayRef< Value * > Args, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:389
NodeTy * getNextNode()
Get the next node, or 0 for the list tail.
Definition: ilist_node.h:80
static cl::opt< bool > DisableExtLdPromotion("disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in ""CodeGenPrepare"))
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:308
UndefValue - 'undef' values are things that do not have specified contents.
Definition: Constants.h:1220
T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val()
Definition: SmallVector.h:406
StructType - Class to represent struct types.
Definition: DerivedTypes.h:191
static void ScalarizeMaskedLoad(CallInst *CI)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
A Use represents the edge between a Value definition and its users.
Definition: Use.h:69
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldTerminator - If a terminator instruction is predicated on a constant value, convert it into an unconditional branch to the constant destination.
Definition: Local.cpp:64
TargetLowering::ConstraintType ConstraintType
Information about the constraint code, e.g.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1057
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:517
static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
Definition: BasicBlock.h:306
Number of individual test Apply this number of consecutive mutations to each input exit after the first new interesting input is found the minimized corpus is saved into the first input directory Number of jobs to run If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
Windows NT (Windows on ARM)
bool hasUniqueInitializer() const
hasUniqueInitializer - Whether the global variable has an initializer, and any changes made to the in...
Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset)
Accumulate offsets from stripInBoundsConstantOffsets().
Definition: Value.cpp:470
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:110
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
static cl::opt< bool > DisableStoreExtract("disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare"))
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:434
bool isExtFree(const Instruction *I) const
Return true if the extension represented by I is free.
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:104
bool isStatepoint(const ImmutableCallSite &CS)
Definition: Statepoint.cpp:22
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:414
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL)
OptimizeNoopCopyExpression - If the specified cast instruction is a noop copy (e.g.
user_iterator_impl< User > user_iterator
Definition: Value.h:292
ConstantExpr - a constant value that is initialized with an expression using other constant values...
Definition: Constants.h:852
static cl::opt< bool > AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(false), cl::desc("Address sinking in CGP using GEPs."))
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:117
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:57
unsigned getDerivedPtrIndex()
The index into the associate statepoint's argument list which contains the pointer whose relocation t...
Definition: Statepoint.h:360
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:141
unsigned getAlignment() const
Definition: GlobalObject.h:46
This contains information for each constraint that we are lowering.
BasicBlock * getSuccessor(unsigned i) const
This class represents a no-op cast from one type to another.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:75
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:866
StoreInst - an instruction for storing to memory.
Definition: Instructions.h:316
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:109
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:351
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
ConstraintPrefix Type
Type - The basic type of the constraint: input/output/clobber.
Definition: InlineAsm.h:120
unsigned getNumElements() const
Return the number of elements in the Vector type.
Definition: DerivedTypes.h:432
virtual bool isSelectSupported(SelectSupportKind) const
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:256
iterator begin()
Definition: Function.h:457
INITIALIZE_TM_PASS(CodeGenPrepare,"codegenprepare","Optimize for code generation", false, false) FunctionPass *llvm
This class implements simplifications for calls to fortified library functions (__st*cpy_chk, __memcpy_chk, __memmove_chk, __memset_chk), to, when possible, replace them with their non-checking counterparts.
Type * getElementType() const
Definition: DerivedTypes.h:323
This class represents a truncation of integer types.
Considered to not alias after call.
Definition: Attributes.h:83
static unsigned getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
getKnownAlignment - Try to infer an alignment for the specified pointer.
Definition: Local.h:180
PointerType - Class to represent pointers.
Definition: DerivedTypes.h:449
static bool simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase, const SmallVectorImpl< IntrinsicInst * > &Targets)
unsigned getNumIncomingValues() const
getNumIncomingValues - Return the number of incoming edges
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:107
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:24
static cl::opt< bool > StressStoreExtract("stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"))
FunctionPass * createCodeGenPreparePass(const TargetMachine *TM=nullptr)
createCodeGenPreparePass - Transform the code to expose more pattern matching during instruction sele...
static bool isExtractBitsCandidateUse(Instruction *User)
isExtractBitsCandidateUse - Check if the candidates could be combined with shift instruction, which includes:
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:491
bool hasSection() const
Definition: GlobalObject.h:56
GetElementPtrInst - an instruction for type-safe pointer arithmetic to access elements of arrays and ...
Definition: Instructions.h:830
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:55
#define P(N)
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:325
static CmpInst * Create(OtherOps Op, unsigned short predicate, Value *S1, Value *S2, const Twine &Name="", Instruction *InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
static bool CombineUAddWithOverflow(CmpInst *CI)
CombineUAddWithOverflow - try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if p...
unsigned getAlignment() const
getAlignment - Return the alignment of the access that is being performed
Definition: Instructions.h:365
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Wrapper pass for TargetTransformInfo.
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
static void ScalarizeMaskedStore(CallInst *CI)
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:76
LLVM Basic Block Representation.
Definition: BasicBlock.h:65
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition: PatternMatch.h:512
BranchInst - Conditional or Unconditional Branch instruction.
static ExtractValueInst * Create(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
bool isVectorTy() const
isVectorTy - True if this is an instance of VectorType.
Definition: Type.h:226
bool isIndirect
isIndirect - True if this operand is an indirect operand.
Definition: InlineAsm.h:144
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This is an important base class in LLVM.
Definition: Constant.h:41
const Value * getCondition() const
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1339
bool isGCRelocate(const Value *V)
Definition: Statepoint.cpp:50
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isMaskAndBranchFoldingLegal() const
Return if the target supports combining a chain like:
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
Definition: APInt.h:1895
unsigned getAlignment() const
getAlignment - Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:130
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:264
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:114
const DebugLoc & getDebugLoc() const
getDebugLoc - Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:230
brc_match< Cond_t > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
Definition: PatternMatch.h:942
virtual bool isTruncateFree(Type *, Type *) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
Represent the analysis usage information of a pass.
BasicBlock * getIncomingBlock(unsigned i) const
getIncomingBlock - Return incoming basic block number i.
This instruction compares its operands according to the predicate given to the constructor.
static bool SinkCast(CastInst *CI)
SinkCast - Sink the specified cast instruction into its user blocks.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
Value * getOperand(unsigned i) const
Definition: User.h:118
Zero extended before/after call.
Definition: Attributes.h:119
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:117
op_range operands()
Definition: User.h:191
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:103
iterator begin() const
Definition: SmallPtrSet.h:286
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:67
Wraps a call to a gc.relocate and provides access to it's operands.
Definition: Statepoint.h:308
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:760
static cl::opt< bool > DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare"))
EVT - Extended Value Type.
Definition: ValueTypes.h:31
bool isPointerTy() const
isPointerTy - True if this is an instance of PointerType.
Definition: Type.h:217
static UndefValue * get(Type *T)
get() - Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1473
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr)
RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a trivially dead instruction...
Definition: Local.cpp:340
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:519
PointerType * getPointerTo(unsigned AddrSpace=0)
getPointerTo - Return a pointer to the current type.
Definition: Type.cpp:764
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:283
bool isUsedInBasicBlock(const BasicBlock *BB) const
Check if this value is used in the specified basic block.
Definition: Value.cpp:113
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
const Value * getTrueValue() const
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:386
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:3353
static bool OptimizeCmpExpression(CmpInst *CI)
bool isTerminator() const
Definition: Instruction.h:115
bool isConditional() const
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:694
std::vector< AsmOperandInfo > AsmOperandInfoVector
BinaryOps getOpcode() const
Definition: InstrTypes.h:323
static Constant * getSplat(unsigned NumElts, Constant *Elt)
getSplat - Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1162
void initializeCodeGenPreparePass(PassRegistry &)
MemIntrinsic - This is the common base class for memset/memcpy/memmove.
static bool IsNonLocalValue(Value *V, BasicBlock *BB)
IsNonLocalValue - Return true if the specified values are defined in a different basic block than BB...
See the file comment.
Definition: ValueMap.h:80
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:299
unsigned getIntegerBitWidth() const
Definition: Type.cpp:176
This is the shared class of boolean and integer constants.
Definition: Constants.h:47
void setIncomingBlock(unsigned i, BasicBlock *BB)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:271
Value * getIncomingValue(unsigned i) const
getIncomingValue - Return incoming value number x
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:388
iterator end()
Definition: BasicBlock.h:233
static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap< BasicBlock *, BinaryOperator * > &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL)
SinkShiftAndTruncate - sink both shift and truncate instruction to the use of truncate's BB...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
bool isStaticAlloca() const
isStaticAlloca - Return true if this alloca is in the entry block of the function and is a constant s...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
Provides information about what library functions are available for the current target.
MDNode * getMetadata(unsigned KindID) const
getMetadata - Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:167
virtual const TargetLowering * getTargetLowering() const
static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))
std::reverse_iterator< iterator > reverse_iterator
Definition: ilist.h:349
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:582
Function * getCalledFunction() const
getCalledFunction - Return the function called, or null if this is an indirect function invocation...
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:161
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
const BasicBlock & getEntryBlock() const
Definition: Function.h:442
#define NC
Definition: regutils.h:42
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:289
bool hasExtractBitsInsn() const
Return true if the target has BitExtract instructions.
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(NoStrictAlign), cl::values(clEnumValN(StrictAlign,"aarch64-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"aarch64-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
void setOperand(unsigned i, Value *Val)
Definition: User.h:122
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
AttributeSet getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:181
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
VectorType - Class to represent vector types.
Definition: DerivedTypes.h:362
Class for arbitrary precision integers.
Definition: APInt.h:73
Value * getIncomingValueForBlock(const BasicBlock *BB) const
bool isIntegerTy() const
isIntegerTy - True if this is an instance of IntegerType.
Definition: Type.h:193
iterator_range< user_iterator > users()
Definition: Value.h:300
BasicBlock * getSinglePredecessor()
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:211
AddrMode
ARM Addressing Modes.
Definition: ARMBaseInfo.h:235
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
static cl::opt< bool > StressExtLdPromotion("stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) ""optimization in CodeGenPrepare"))
const Type * getScalarType() const LLVM_READONLY
getScalarType - If this is a vector type, return the element type, otherwise return 'this'...
Definition: Type.cpp:51
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1890
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing basic block, but does not delete it...
Definition: Instruction.cpp:66
#define Success
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl< Value * > &OffsetV)
use_iterator use_begin()
Definition: Value.h:279
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:185
MemTransferInst - This class wraps the llvm.memcpy/memmove intrinsics.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:372
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:217
virtual bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
SelectSupportKind
Enum that describes what type of support for selects the target has.
DbgValueInst - This represents the llvm.dbg.value instruction.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
ImmutableCallSite - establish a view to a call site for examination.
Definition: CallSite.h:418
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction...
Definition: Instruction.cpp:82
iplist< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:97
#define I(x, y, z)
Definition: MD5.cpp:54
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:124
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:311
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:329
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL)
OptimizeExtractBits - sink the shift right instruction into user blocks if the uses could potentially...
ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred)
FoldReturnIntoUncondBranch - This method duplicates the specified return instruction into a predecess...
VectorType * getType() const
getType - Overload to return most specific vector type.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:348
bool bypassSlowDivision(Function &F, Function::iterator &I, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions that can be profitably bypassed and carried out with a ...
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:1738
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition: Value.h:471
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition: InstrTypes.h:755
iterator_range< op_iterator > arg_operands()
arg_operands - iteration adapter for range-for loops.
bool use_empty() const
Definition: Value.h:275
user_iterator user_begin()
Definition: Value.h:294
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
aarch64 promote const
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:365
LLVM Value Representation.
Definition: Value.h:69
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
unsigned getOpcode() const
getOpcode() returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:112
static cl::opt< bool > EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinkinig and/cmp into branches."))
static const Function * getParent(const Value *V)
void moveBefore(Instruction *MovePos)
moveBefore - Unlink this instruction from its current basic block and insert it into the basic block ...
Definition: Instruction.cpp:89
const Value * getValue() const
DbgValueInst - This represents the llvm.dbg.value instruction.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:38
#define DEBUG(X)
Definition: Debug.h:92
virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const
Return true if the target can combine store(extractelement VectorTy, Idx).
Primary interface to the complete machine description for the target machine.
const Value * getFalseValue() const
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
OperandValueKind
Additional information about an operand's possible values.
Value * SimplifyInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr)
SimplifyInstruction - See if we can compute a simplified version of this instruction.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:203
static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI)
Check if all the uses of Inst are equivalent (or free) zero or sign extensions.
This pass exposes codegen information to IR-level passes.
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1734
iterator getFirstInsertionPt()
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:194
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:134
Type * getAllocatedType() const
getAllocatedType - Return the type that is being allocated by the instruction.
Definition: Instructions.h:122
bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
static void getShuffleMask(Constant *Mask, SmallVectorImpl< int > &Result)
getShuffleMask - Return the full mask for this instruction, where each element is the element number ...
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:125
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
bool isJumpExpensive() const
Return true if Flow Control is an expensive operation that should be avoided.
int getBasicBlockIndex(const BasicBlock *BB) const
getBasicBlockIndex - Return the first index of the specified basic block in the value list for this P...
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:237
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:103
const BasicBlock * getParent() const
Definition: Instruction.h:72
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:93
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, ImmutableCallSite CS) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:299
UAddWithOverflow_match< LHS_t, RHS_t, Sum_t > m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S)
Match an icmp instruction checking for unsigned overflow on addition.
IntrinsicInst - A useful wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:37
This file describes how to lower LLVM code to machine code.
AllocaInst - an instruction to allocate memory on the stack.
Definition: Instructions.h:76
gep_type_iterator gep_type_begin(const User *GEP)
user_iterator user_end()
Definition: Value.h:296