LLVM  4.0.0
CodeGenPrepare.cpp
Go to the documentation of this file.
1 //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass munges the code in the input function to better prepare it for
11 // SelectionDAG-based code generation. This works around limitations in it's
12 // basic-block-at-a-time approach. It should eventually be removed.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/CodeGen/Passes.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/LoopInfo.h"
29 #include "llvm/CodeGen/Analysis.h"
30 #include "llvm/IR/CallSite.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/DataLayout.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/Dominators.h"
35 #include "llvm/IR/Function.h"
37 #include "llvm/IR/IRBuilder.h"
38 #include "llvm/IR/InlineAsm.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/IntrinsicInst.h"
41 #include "llvm/IR/MDBuilder.h"
42 #include "llvm/IR/PatternMatch.h"
43 #include "llvm/IR/Statepoint.h"
44 #include "llvm/IR/ValueHandle.h"
45 #include "llvm/IR/ValueMap.h"
46 #include "llvm/Pass.h"
49 #include "llvm/Support/Debug.h"
58 using namespace llvm;
59 using namespace llvm::PatternMatch;
60 
61 #define DEBUG_TYPE "codegenprepare"
62 
63 STATISTIC(NumBlocksElim, "Number of blocks eliminated");
64 STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
65 STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
66 STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
67  "sunken Cmps");
68 STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
69  "of sunken Casts");
70 STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
71  "computations were sunk");
72 STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
73 STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
74 STATISTIC(NumAndsAdded,
75  "Number of and mask instructions added to form ext loads");
76 STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
77 STATISTIC(NumRetsDup, "Number of return instructions duplicated");
78 STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
79 STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
80 STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches");
81 STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
82 
84  "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
85  cl::desc("Disable branch optimizations in CodeGenPrepare"));
86 
87 static cl::opt<bool>
88  DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
89  cl::desc("Disable GC optimizations in CodeGenPrepare"));
90 
92  "disable-cgp-select2branch", cl::Hidden, cl::init(false),
93  cl::desc("Disable select to branch conversion."));
94 
96  "addr-sink-using-gep", cl::Hidden, cl::init(false),
97  cl::desc("Address sinking in CGP using GEPs."));
98 
100  "enable-andcmp-sinking", cl::Hidden, cl::init(true),
101  cl::desc("Enable sinkinig and/cmp into branches."));
102 
104  "disable-cgp-store-extract", cl::Hidden, cl::init(false),
105  cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
106 
108  "stress-cgp-store-extract", cl::Hidden, cl::init(false),
109  cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
110 
112  "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
113  cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
114  "CodeGenPrepare"));
115 
117  "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
118  cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
119  "optimization in CodeGenPrepare"));
120 
122  "disable-preheader-prot", cl::Hidden, cl::init(false),
123  cl::desc("Disable protection against removing loop preheaders"));
124 
126  "profile-guided-section-prefix", cl::Hidden, cl::init(true),
127  cl::desc("Use profile info to add section prefix for hot/cold functions"));
128 
130  "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
131  cl::desc("Skip merging empty blocks if (frequency of empty block) / "
132  "(frequency of destination block) is greater than this ratio"));
133 
135  "force-split-store", cl::Hidden, cl::init(false),
136  cl::desc("Force store splitting no matter what the target query says."));
137 
138 namespace {
139 typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
140 typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
141 typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
142 class TypePromotionTransaction;
143 
144  class CodeGenPrepare : public FunctionPass {
145  const TargetMachine *TM;
146  const TargetLowering *TLI;
147  const TargetTransformInfo *TTI;
148  const TargetLibraryInfo *TLInfo;
149  const LoopInfo *LI;
150  std::unique_ptr<BlockFrequencyInfo> BFI;
151  std::unique_ptr<BranchProbabilityInfo> BPI;
152 
153  /// As we scan instructions optimizing them, this is the next instruction
154  /// to optimize. Transforms that can invalidate this should update it.
155  BasicBlock::iterator CurInstIterator;
156 
157  /// Keeps track of non-local addresses that have been sunk into a block.
158  /// This allows us to avoid inserting duplicate code for blocks with
159  /// multiple load/stores of the same address.
160  ValueMap<Value*, Value*> SunkAddrs;
161 
162  /// Keeps track of all instructions inserted for the current function.
163  SetOfInstrs InsertedInsts;
164  /// Keeps track of the type of the related instruction before their
165  /// promotion for the current function.
166  InstrToOrigTy PromotedInsts;
167 
168  /// True if CFG is modified in any way.
169  bool ModifiedDT;
170 
171  /// True if optimizing for size.
172  bool OptSize;
173 
174  /// DataLayout for the Function being processed.
175  const DataLayout *DL;
176 
177  public:
178  static char ID; // Pass identification, replacement for typeid
179  explicit CodeGenPrepare(const TargetMachine *TM = nullptr)
180  : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr), DL(nullptr) {
182  }
183  bool runOnFunction(Function &F) override;
184 
185  StringRef getPassName() const override { return "CodeGen Prepare"; }
186 
187  void getAnalysisUsage(AnalysisUsage &AU) const override {
188  // FIXME: When we can selectively preserve passes, preserve the domtree.
193  }
194 
195  private:
196  bool eliminateFallThrough(Function &F);
197  bool eliminateMostlyEmptyBlocks(Function &F);
198  BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
199  bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
200  void eliminateMostlyEmptyBlock(BasicBlock *BB);
201  bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
202  bool isPreheader);
203  bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT);
204  bool optimizeInst(Instruction *I, bool& ModifiedDT);
205  bool optimizeMemoryInst(Instruction *I, Value *Addr,
206  Type *AccessTy, unsigned AS);
207  bool optimizeInlineAsmInst(CallInst *CS);
208  bool optimizeCallInst(CallInst *CI, bool& ModifiedDT);
209  bool moveExtToFormExtLoad(Instruction *&I);
210  bool optimizeExtUses(Instruction *I);
211  bool optimizeLoadExt(LoadInst *I);
212  bool optimizeSelectInst(SelectInst *SI);
213  bool optimizeShuffleVectorInst(ShuffleVectorInst *SI);
214  bool optimizeSwitchInst(SwitchInst *CI);
215  bool optimizeExtractElementInst(Instruction *Inst);
216  bool dupRetToEnableTailCallOpts(BasicBlock *BB);
217  bool placeDbgValues(Function &F);
218  bool sinkAndCmp(Function &F);
219  bool extLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
220  Instruction *&Inst,
221  const SmallVectorImpl<Instruction *> &Exts,
222  unsigned CreatedInstCost);
223  bool splitBranchCondition(Function &F);
224  bool simplifyOffsetableRelocate(Instruction &I);
225  };
226 }
227 
228 char CodeGenPrepare::ID = 0;
229 INITIALIZE_TM_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
230  "Optimize for code generation", false, false)
233  "Optimize for code generation", false, false)
234 
236  return new CodeGenPrepare(TM);
237 }
238 
239 bool CodeGenPrepare::runOnFunction(Function &F) {
240  if (skipFunction(F))
241  return false;
242 
243  DL = &F.getParent()->getDataLayout();
244 
245  bool EverMadeChange = false;
246  // Clear per function information.
247  InsertedInsts.clear();
248  PromotedInsts.clear();
249  BFI.reset();
250  BPI.reset();
251 
252  ModifiedDT = false;
253  if (TM)
254  TLI = TM->getSubtargetImpl(F)->getTargetLowering();
255  TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
256  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
257  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
258  OptSize = F.optForSize();
259 
261  ProfileSummaryInfo *PSI =
262  getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
263  if (PSI->isFunctionEntryHot(&F))
264  F.setSectionPrefix(".hot");
265  else if (PSI->isFunctionEntryCold(&F))
266  F.setSectionPrefix(".cold");
267  }
268 
269  /// This optimization identifies DIV instructions that can be
270  /// profitably bypassed and carried out with a shorter, faster divide.
271  if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
272  const DenseMap<unsigned int, unsigned int> &BypassWidths =
273  TLI->getBypassSlowDivWidths();
274  BasicBlock* BB = &*F.begin();
275  while (BB != nullptr) {
276  // bypassSlowDivision may create new BBs, but we don't want to reapply the
277  // optimization to those blocks.
278  BasicBlock* Next = BB->getNextNode();
279  EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
280  BB = Next;
281  }
282  }
283 
284  // Eliminate blocks that contain only PHI nodes and an
285  // unconditional branch.
286  EverMadeChange |= eliminateMostlyEmptyBlocks(F);
287 
288  // llvm.dbg.value is far away from the value then iSel may not be able
289  // handle it properly. iSel will drop llvm.dbg.value if it can not
290  // find a node corresponding to the value.
291  EverMadeChange |= placeDbgValues(F);
292 
293  // If there is a mask, compare against zero, and branch that can be combined
294  // into a single target instruction, push the mask and compare into branch
295  // users. Do this before OptimizeBlock -> OptimizeInst ->
296  // OptimizeCmpExpression, which perturbs the pattern being searched for.
297  if (!DisableBranchOpts) {
298  EverMadeChange |= sinkAndCmp(F);
299  EverMadeChange |= splitBranchCondition(F);
300  }
301 
302  bool MadeChange = true;
303  while (MadeChange) {
304  MadeChange = false;
305  for (Function::iterator I = F.begin(); I != F.end(); ) {
306  BasicBlock *BB = &*I++;
307  bool ModifiedDTOnIteration = false;
308  MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
309 
310  // Restart BB iteration if the dominator tree of the Function was changed
311  if (ModifiedDTOnIteration)
312  break;
313  }
314  EverMadeChange |= MadeChange;
315  }
316 
317  SunkAddrs.clear();
318 
319  if (!DisableBranchOpts) {
320  MadeChange = false;
322  for (BasicBlock &BB : F) {
323  SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));
324  MadeChange |= ConstantFoldTerminator(&BB, true);
325  if (!MadeChange) continue;
326 
328  II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
329  if (pred_begin(*II) == pred_end(*II))
330  WorkList.insert(*II);
331  }
332 
333  // Delete the dead blocks and any of their dead successors.
334  MadeChange |= !WorkList.empty();
335  while (!WorkList.empty()) {
336  BasicBlock *BB = *WorkList.begin();
337  WorkList.erase(BB);
338  SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
339 
340  DeleteDeadBlock(BB);
341 
343  II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
344  if (pred_begin(*II) == pred_end(*II))
345  WorkList.insert(*II);
346  }
347 
348  // Merge pairs of basic blocks with unconditional branches, connected by
349  // a single edge.
350  if (EverMadeChange || MadeChange)
351  MadeChange |= eliminateFallThrough(F);
352 
353  EverMadeChange |= MadeChange;
354  }
355 
356  if (!DisableGCOpts) {
357  SmallVector<Instruction *, 2> Statepoints;
358  for (BasicBlock &BB : F)
359  for (Instruction &I : BB)
360  if (isStatepoint(I))
361  Statepoints.push_back(&I);
362  for (auto &I : Statepoints)
363  EverMadeChange |= simplifyOffsetableRelocate(*I);
364  }
365 
366  return EverMadeChange;
367 }
368 
369 /// Merge basic blocks which are connected by a single edge, where one of the
370 /// basic blocks has a single successor pointing to the other basic block,
371 /// which has a single predecessor.
372 bool CodeGenPrepare::eliminateFallThrough(Function &F) {
373  bool Changed = false;
374  // Scan all of the blocks in the function, except for the entry block.
375  for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
376  BasicBlock *BB = &*I++;
377  // If the destination block has a single pred, then this is a trivial
378  // edge, just collapse it.
379  BasicBlock *SinglePred = BB->getSinglePredecessor();
380 
381  // Don't merge if BB's address is taken.
382  if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
383 
384  BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
385  if (Term && !Term->isConditional()) {
386  Changed = true;
387  DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n");
388  // Remember if SinglePred was the entry block of the function.
389  // If so, we will need to move BB back to the entry position.
390  bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
391  MergeBasicBlockIntoOnlyPred(BB, nullptr);
392 
393  if (isEntry && BB != &BB->getParent()->getEntryBlock())
394  BB->moveBefore(&BB->getParent()->getEntryBlock());
395 
396  // We have erased a block. Update the iterator.
397  I = BB->getIterator();
398  }
399  }
400  return Changed;
401 }
402 
403 /// Find a destination block from BB if BB is mergeable empty block.
404 BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
405  // If this block doesn't end with an uncond branch, ignore it.
407  if (!BI || !BI->isUnconditional())
408  return nullptr;
409 
410  // If the instruction before the branch (skipping debug info) isn't a phi
411  // node, then other stuff is happening here.
412  BasicBlock::iterator BBI = BI->getIterator();
413  if (BBI != BB->begin()) {
414  --BBI;
415  while (isa<DbgInfoIntrinsic>(BBI)) {
416  if (BBI == BB->begin())
417  break;
418  --BBI;
419  }
420  if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
421  return nullptr;
422  }
423 
424  // Do not break infinite loops.
425  BasicBlock *DestBB = BI->getSuccessor(0);
426  if (DestBB == BB)
427  return nullptr;
428 
429  if (!canMergeBlocks(BB, DestBB))
430  DestBB = nullptr;
431 
432  return DestBB;
433 }
434 
435 /// Eliminate blocks that contain only PHI nodes, debug info directives, and an
436 /// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
437 /// edges in ways that are non-optimal for isel. Start by eliminating these
438 /// blocks so we can split them the way we want them.
439 bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
441  SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
442  while (!LoopList.empty()) {
443  Loop *L = LoopList.pop_back_val();
444  LoopList.insert(LoopList.end(), L->begin(), L->end());
445  if (BasicBlock *Preheader = L->getLoopPreheader())
446  Preheaders.insert(Preheader);
447  }
448 
449  bool MadeChange = false;
450  // Note that this intentionally skips the entry block.
451  for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
452  BasicBlock *BB = &*I++;
453  BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
454  if (!DestBB ||
455  !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
456  continue;
457 
458  eliminateMostlyEmptyBlock(BB);
459  MadeChange = true;
460  }
461  return MadeChange;
462 }
463 
464 bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
465  BasicBlock *DestBB,
466  bool isPreheader) {
467  // Do not delete loop preheaders if doing so would create a critical edge.
468  // Loop preheaders can be good locations to spill registers. If the
469  // preheader is deleted and we create a critical edge, registers may be
470  // spilled in the loop body instead.
471  if (!DisablePreheaderProtect && isPreheader &&
472  !(BB->getSinglePredecessor() &&
474  return false;
475 
476  // Try to skip merging if the unique predecessor of BB is terminated by a
477  // switch or indirect branch instruction, and BB is used as an incoming block
478  // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
479  // add COPY instructions in the predecessor of BB instead of BB (if it is not
480  // merged). Note that the critical edge created by merging such blocks wont be
481  // split in MachineSink because the jump table is not analyzable. By keeping
482  // such empty block (BB), ISel will place COPY instructions in BB, not in the
483  // predecessor of BB.
484  BasicBlock *Pred = BB->getUniquePredecessor();
485  if (!Pred ||
486  !(isa<SwitchInst>(Pred->getTerminator()) ||
487  isa<IndirectBrInst>(Pred->getTerminator())))
488  return true;
489 
490  if (BB->getTerminator() != BB->getFirstNonPHI())
491  return true;
492 
493  // We use a simple cost heuristic which determine skipping merging is
494  // profitable if the cost of skipping merging is less than the cost of
495  // merging : Cost(skipping merging) < Cost(merging BB), where the
496  // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
497  // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
498  // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
499  // Freq(Pred) / Freq(BB) > 2.
500  // Note that if there are multiple empty blocks sharing the same incoming
501  // value for the PHIs in the DestBB, we consider them together. In such
502  // case, Cost(merging BB) will be the sum of their frequencies.
503 
504  if (!isa<PHINode>(DestBB->begin()))
505  return true;
506 
507  SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
508 
509  // Find all other incoming blocks from which incoming values of all PHIs in
510  // DestBB are the same as the ones from BB.
511  for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E;
512  ++PI) {
513  BasicBlock *DestBBPred = *PI;
514  if (DestBBPred == BB)
515  continue;
516 
517  bool HasAllSameValue = true;
518  BasicBlock::const_iterator DestBBI = DestBB->begin();
519  while (const PHINode *DestPN = dyn_cast<PHINode>(DestBBI++)) {
520  if (DestPN->getIncomingValueForBlock(BB) !=
521  DestPN->getIncomingValueForBlock(DestBBPred)) {
522  HasAllSameValue = false;
523  break;
524  }
525  }
526  if (HasAllSameValue)
527  SameIncomingValueBBs.insert(DestBBPred);
528  }
529 
530  // See if all BB's incoming values are same as the value from Pred. In this
531  // case, no reason to skip merging because COPYs are expected to be place in
532  // Pred already.
533  if (SameIncomingValueBBs.count(Pred))
534  return true;
535 
536  if (!BFI) {
537  Function &F = *BB->getParent();
538  LoopInfo LI{DominatorTree(F)};
539  BPI.reset(new BranchProbabilityInfo(F, LI));
540  BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
541  }
542 
543  BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
544  BlockFrequency BBFreq = BFI->getBlockFreq(BB);
545 
546  for (auto SameValueBB : SameIncomingValueBBs)
547  if (SameValueBB->getUniquePredecessor() == Pred &&
548  DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
549  BBFreq += BFI->getBlockFreq(SameValueBB);
550 
551  return PredFreq.getFrequency() <=
553 }
554 
555 /// Return true if we can merge BB into DestBB if there is a single
556 /// unconditional branch between them, and BB contains no other non-phi
557 /// instructions.
558 bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
559  const BasicBlock *DestBB) const {
560  // We only want to eliminate blocks whose phi nodes are used by phi nodes in
561  // the successor. If there are more complex condition (e.g. preheaders),
562  // don't mess around with them.
563  BasicBlock::const_iterator BBI = BB->begin();
564  while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
565  for (const User *U : PN->users()) {
566  const Instruction *UI = cast<Instruction>(U);
567  if (UI->getParent() != DestBB || !isa<PHINode>(UI))
568  return false;
569  // If User is inside DestBB block and it is a PHINode then check
570  // incoming value. If incoming value is not from BB then this is
571  // a complex condition (e.g. preheaders) we want to avoid here.
572  if (UI->getParent() == DestBB) {
573  if (const PHINode *UPN = dyn_cast<PHINode>(UI))
574  for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
575  Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
576  if (Insn && Insn->getParent() == BB &&
577  Insn->getParent() != UPN->getIncomingBlock(I))
578  return false;
579  }
580  }
581  }
582  }
583 
584  // If BB and DestBB contain any common predecessors, then the phi nodes in BB
585  // and DestBB may have conflicting incoming values for the block. If so, we
586  // can't merge the block.
587  const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
588  if (!DestBBPN) return true; // no conflict.
589 
590  // Collect the preds of BB.
592  if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
593  // It is faster to get preds from a PHI than with pred_iterator.
594  for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
595  BBPreds.insert(BBPN->getIncomingBlock(i));
596  } else {
597  BBPreds.insert(pred_begin(BB), pred_end(BB));
598  }
599 
600  // Walk the preds of DestBB.
601  for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
602  BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
603  if (BBPreds.count(Pred)) { // Common predecessor?
604  BBI = DestBB->begin();
605  while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
606  const Value *V1 = PN->getIncomingValueForBlock(Pred);
607  const Value *V2 = PN->getIncomingValueForBlock(BB);
608 
609  // If V2 is a phi node in BB, look up what the mapped value will be.
610  if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
611  if (V2PN->getParent() == BB)
612  V2 = V2PN->getIncomingValueForBlock(Pred);
613 
614  // If there is a conflict, bail out.
615  if (V1 != V2) return false;
616  }
617  }
618  }
619 
620  return true;
621 }
622 
623 
624 /// Eliminate a basic block that has only phi's and an unconditional branch in
625 /// it.
626 void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
627  BranchInst *BI = cast<BranchInst>(BB->getTerminator());
628  BasicBlock *DestBB = BI->getSuccessor(0);
629 
630  DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
631 
632  // If the destination block has a single pred, then this is a trivial edge,
633  // just collapse it.
634  if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
635  if (SinglePred != DestBB) {
636  // Remember if SinglePred was the entry block of the function. If so, we
637  // will need to move BB back to the entry position.
638  bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
639  MergeBasicBlockIntoOnlyPred(DestBB, nullptr);
640 
641  if (isEntry && BB != &BB->getParent()->getEntryBlock())
642  BB->moveBefore(&BB->getParent()->getEntryBlock());
643 
644  DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
645  return;
646  }
647  }
648 
649  // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
650  // to handle the new incoming edges it is about to have.
651  PHINode *PN;
652  for (BasicBlock::iterator BBI = DestBB->begin();
653  (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
654  // Remove the incoming value for BB, and remember it.
655  Value *InVal = PN->removeIncomingValue(BB, false);
656 
657  // Two options: either the InVal is a phi node defined in BB or it is some
658  // value that dominates BB.
659  PHINode *InValPhi = dyn_cast<PHINode>(InVal);
660  if (InValPhi && InValPhi->getParent() == BB) {
661  // Add all of the input values of the input PHI as inputs of this phi.
662  for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
663  PN->addIncoming(InValPhi->getIncomingValue(i),
664  InValPhi->getIncomingBlock(i));
665  } else {
666  // Otherwise, add one instance of the dominating value for each edge that
667  // we will be adding.
668  if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
669  for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
670  PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
671  } else {
672  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
673  PN->addIncoming(InVal, *PI);
674  }
675  }
676  }
677 
678  // The PHIs are now updated, change everything that refers to BB to use
679  // DestBB and remove BB.
680  BB->replaceAllUsesWith(DestBB);
681  BB->eraseFromParent();
682  ++NumBlocksElim;
683 
684  DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
685 }
686 
687 // Computes a map of base pointer relocation instructions to corresponding
688 // derived pointer relocation instructions given a vector of all relocate calls
690  const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
692  &RelocateInstMap) {
693  // Collect information in two maps: one primarily for locating the base object
694  // while filling the second map; the second map is the final structure holding
695  // a mapping between Base and corresponding Derived relocate calls
697  for (auto *ThisRelocate : AllRelocateCalls) {
698  auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
699  ThisRelocate->getDerivedPtrIndex());
700  RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
701  }
702  for (auto &Item : RelocateIdxMap) {
703  std::pair<unsigned, unsigned> Key = Item.first;
704  if (Key.first == Key.second)
705  // Base relocation: nothing to insert
706  continue;
707 
708  GCRelocateInst *I = Item.second;
709  auto BaseKey = std::make_pair(Key.first, Key.first);
710 
711  // We're iterating over RelocateIdxMap so we cannot modify it.
712  auto MaybeBase = RelocateIdxMap.find(BaseKey);
713  if (MaybeBase == RelocateIdxMap.end())
714  // TODO: We might want to insert a new base object relocate and gep off
715  // that, if there are enough derived object relocates.
716  continue;
717 
718  RelocateInstMap[MaybeBase->second].push_back(I);
719  }
720 }
721 
722 // Accepts a GEP and extracts the operands into a vector provided they're all
723 // small integer constants
725  SmallVectorImpl<Value *> &OffsetV) {
726  for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
727  // Only accept small constant integer operands
728  auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
729  if (!Op || Op->getZExtValue() > 20)
730  return false;
731  }
732 
733  for (unsigned i = 1; i < GEP->getNumOperands(); i++)
734  OffsetV.push_back(GEP->getOperand(i));
735  return true;
736 }
737 
738 // Takes a RelocatedBase (base pointer relocation instruction) and Targets to
739 // replace, computes a replacement, and affects it.
740 static bool
742  const SmallVectorImpl<GCRelocateInst *> &Targets) {
743  bool MadeChange = false;
744  for (GCRelocateInst *ToReplace : Targets) {
745  assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
746  "Not relocating a derived object of the original base object");
747  if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
748  // A duplicate relocate call. TODO: coalesce duplicates.
749  continue;
750  }
751 
752  if (RelocatedBase->getParent() != ToReplace->getParent()) {
753  // Base and derived relocates are in different basic blocks.
754  // In this case transform is only valid when base dominates derived
755  // relocate. However it would be too expensive to check dominance
756  // for each such relocate, so we skip the whole transformation.
757  continue;
758  }
759 
760  Value *Base = ToReplace->getBasePtr();
761  auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
762  if (!Derived || Derived->getPointerOperand() != Base)
763  continue;
764 
765  SmallVector<Value *, 2> OffsetV;
766  if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
767  continue;
768 
769  // Create a Builder and replace the target callsite with a gep
770  assert(RelocatedBase->getNextNode() &&
771  "Should always have one since it's not a terminator");
772 
773  // Insert after RelocatedBase
774  IRBuilder<> Builder(RelocatedBase->getNextNode());
775  Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
776 
777  // If gc_relocate does not match the actual type, cast it to the right type.
778  // In theory, there must be a bitcast after gc_relocate if the type does not
779  // match, and we should reuse it to get the derived pointer. But it could be
780  // cases like this:
781  // bb1:
782  // ...
783  // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
784  // br label %merge
785  //
786  // bb2:
787  // ...
788  // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
789  // br label %merge
790  //
791  // merge:
792  // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
793  // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
794  //
795  // In this case, we can not find the bitcast any more. So we insert a new bitcast
796  // no matter there is already one or not. In this way, we can handle all cases, and
797  // the extra bitcast should be optimized away in later passes.
798  Value *ActualRelocatedBase = RelocatedBase;
799  if (RelocatedBase->getType() != Base->getType()) {
800  ActualRelocatedBase =
801  Builder.CreateBitCast(RelocatedBase, Base->getType());
802  }
803  Value *Replacement = Builder.CreateGEP(
804  Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
805  Replacement->takeName(ToReplace);
806  // If the newly generated derived pointer's type does not match the original derived
807  // pointer's type, cast the new derived pointer to match it. Same reasoning as above.
808  Value *ActualReplacement = Replacement;
809  if (Replacement->getType() != ToReplace->getType()) {
810  ActualReplacement =
811  Builder.CreateBitCast(Replacement, ToReplace->getType());
812  }
813  ToReplace->replaceAllUsesWith(ActualReplacement);
814  ToReplace->eraseFromParent();
815 
816  MadeChange = true;
817  }
818  return MadeChange;
819 }
820 
821 // Turns this:
822 //
823 // %base = ...
824 // %ptr = gep %base + 15
825 // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
826 // %base' = relocate(%tok, i32 4, i32 4)
827 // %ptr' = relocate(%tok, i32 4, i32 5)
828 // %val = load %ptr'
829 //
830 // into this:
831 //
832 // %base = ...
833 // %ptr = gep %base + 15
834 // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
835 // %base' = gc.relocate(%tok, i32 4, i32 4)
836 // %ptr' = gep %base' + 15
837 // %val = load %ptr'
838 bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
839  bool MadeChange = false;
840  SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
841 
842  for (auto *U : I.users())
843  if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
844  // Collect all the relocate calls associated with a statepoint
845  AllRelocateCalls.push_back(Relocate);
846 
847  // We need atleast one base pointer relocation + one derived pointer
848  // relocation to mangle
849  if (AllRelocateCalls.size() < 2)
850  return false;
851 
852  // RelocateInstMap is a mapping from the base relocate instruction to the
853  // corresponding derived relocate instructions
855  computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
856  if (RelocateInstMap.empty())
857  return false;
858 
859  for (auto &Item : RelocateInstMap)
860  // Item.first is the RelocatedBase to offset against
861  // Item.second is the vector of Targets to replace
862  MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
863  return MadeChange;
864 }
865 
866 /// SinkCast - Sink the specified cast instruction into its user blocks
867 static bool SinkCast(CastInst *CI) {
868  BasicBlock *DefBB = CI->getParent();
869 
870  /// InsertedCasts - Only insert a cast in each block once.
871  DenseMap<BasicBlock*, CastInst*> InsertedCasts;
872 
873  bool MadeChange = false;
874  for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
875  UI != E; ) {
876  Use &TheUse = UI.getUse();
877  Instruction *User = cast<Instruction>(*UI);
878 
879  // Figure out which BB this cast is used in. For PHI's this is the
880  // appropriate predecessor block.
881  BasicBlock *UserBB = User->getParent();
882  if (PHINode *PN = dyn_cast<PHINode>(User)) {
883  UserBB = PN->getIncomingBlock(TheUse);
884  }
885 
886  // Preincrement use iterator so we don't invalidate it.
887  ++UI;
888 
889  // The first insertion point of a block containing an EH pad is after the
890  // pad. If the pad is the user, we cannot sink the cast past the pad.
891  if (User->isEHPad())
892  continue;
893 
894  // If the block selected to receive the cast is an EH pad that does not
895  // allow non-PHI instructions before the terminator, we can't sink the
896  // cast.
897  if (UserBB->getTerminator()->isEHPad())
898  continue;
899 
900  // If this user is in the same block as the cast, don't change the cast.
901  if (UserBB == DefBB) continue;
902 
903  // If we have already inserted a cast into this block, use it.
904  CastInst *&InsertedCast = InsertedCasts[UserBB];
905 
906  if (!InsertedCast) {
907  BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
908  assert(InsertPt != UserBB->end());
909  InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
910  CI->getType(), "", &*InsertPt);
911  }
912 
913  // Replace a use of the cast with a use of the new cast.
914  TheUse = InsertedCast;
915  MadeChange = true;
916  ++NumCastUses;
917  }
918 
919  // If we removed all uses, nuke the cast.
920  if (CI->use_empty()) {
921  CI->eraseFromParent();
922  MadeChange = true;
923  }
924 
925  return MadeChange;
926 }
927 
928 /// If the specified cast instruction is a noop copy (e.g. it's casting from
929 /// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
930 /// reduce the number of virtual registers that must be created and coalesced.
931 ///
932 /// Return true if any changes are made.
933 ///
935  const DataLayout &DL) {
936  // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
937  // than sinking only nop casts, but is helpful on some platforms.
938  if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
939  if (!TLI.isCheapAddrSpaceCast(ASC->getSrcAddressSpace(),
940  ASC->getDestAddressSpace()))
941  return false;
942  }
943 
944  // If this is a noop copy,
945  EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
946  EVT DstVT = TLI.getValueType(DL, CI->getType());
947 
948  // This is an fp<->int conversion?
949  if (SrcVT.isInteger() != DstVT.isInteger())
950  return false;
951 
952  // If this is an extension, it will be a zero or sign extension, which
953  // isn't a noop.
954  if (SrcVT.bitsLT(DstVT)) return false;
955 
956  // If these values will be promoted, find out what they will be promoted
957  // to. This helps us consider truncates on PPC as noop copies when they
958  // are.
959  if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
961  SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
962  if (TLI.getTypeAction(CI->getContext(), DstVT) ==
964  DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
965 
966  // If, after promotion, these are the same types, this is a noop copy.
967  if (SrcVT != DstVT)
968  return false;
969 
970  return SinkCast(CI);
971 }
972 
973 /// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if
974 /// possible.
975 ///
976 /// Return true if any changes were made.
978  Value *A, *B;
979  Instruction *AddI;
980  if (!match(CI,
982  return false;
983 
984  Type *Ty = AddI->getType();
985  if (!isa<IntegerType>(Ty))
986  return false;
987 
988  // We don't want to move around uses of condition values this late, so we we
989  // check if it is legal to create the call to the intrinsic in the basic
990  // block containing the icmp:
991 
992  if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
993  return false;
994 
995 #ifndef NDEBUG
996  // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
997  // for now:
998  if (AddI->hasOneUse())
999  assert(*AddI->user_begin() == CI && "expected!");
1000 #endif
1001 
1002  Module *M = CI->getModule();
1003  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
1004 
1005  auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
1006 
1007  auto *UAddWithOverflow =
1008  CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
1009  auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
1010  auto *Overflow =
1011  ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
1012 
1013  CI->replaceAllUsesWith(Overflow);
1014  AddI->replaceAllUsesWith(UAdd);
1015  CI->eraseFromParent();
1016  AddI->eraseFromParent();
1017  return true;
1018 }
1019 
1020 /// Sink the given CmpInst into user blocks to reduce the number of virtual
1021 /// registers that must be created and coalesced. This is a clear win except on
1022 /// targets with multiple condition code registers (PowerPC), where it might
1023 /// lose; some adjustment may be wanted there.
1024 ///
1025 /// Return true if any changes are made.
1026 static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
1027  BasicBlock *DefBB = CI->getParent();
1028 
1029  // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1030  if (TLI && TLI->useSoftFloat() && isa<FCmpInst>(CI))
1031  return false;
1032 
1033  // Only insert a cmp in each block once.
1034  DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
1035 
1036  bool MadeChange = false;
1037  for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1038  UI != E; ) {
1039  Use &TheUse = UI.getUse();
1040  Instruction *User = cast<Instruction>(*UI);
1041 
1042  // Preincrement use iterator so we don't invalidate it.
1043  ++UI;
1044 
1045  // Don't bother for PHI nodes.
1046  if (isa<PHINode>(User))
1047  continue;
1048 
1049  // Figure out which BB this cmp is used in.
1050  BasicBlock *UserBB = User->getParent();
1051 
1052  // If this user is in the same block as the cmp, don't change the cmp.
1053  if (UserBB == DefBB) continue;
1054 
1055  // If we have already inserted a cmp into this block, use it.
1056  CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1057 
1058  if (!InsertedCmp) {
1059  BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1060  assert(InsertPt != UserBB->end());
1061  InsertedCmp =
1062  CmpInst::Create(CI->getOpcode(), CI->getPredicate(),
1063  CI->getOperand(0), CI->getOperand(1), "", &*InsertPt);
1064  // Propagate the debug info.
1065  InsertedCmp->setDebugLoc(CI->getDebugLoc());
1066  }
1067 
1068  // Replace a use of the cmp with a use of the new cmp.
1069  TheUse = InsertedCmp;
1070  MadeChange = true;
1071  ++NumCmpUses;
1072  }
1073 
1074  // If we removed all uses, nuke the cmp.
1075  if (CI->use_empty()) {
1076  CI->eraseFromParent();
1077  MadeChange = true;
1078  }
1079 
1080  return MadeChange;
1081 }
1082 
1083 static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
1084  if (SinkCmpExpression(CI, TLI))
1085  return true;
1086 
1087  if (CombineUAddWithOverflow(CI))
1088  return true;
1089 
1090  return false;
1091 }
1092 
1093 /// Check if the candidates could be combined with a shift instruction, which
1094 /// includes:
1095 /// 1. Truncate instruction
1096 /// 2. And instruction and the imm is a mask of the low bits:
1097 /// imm & (imm+1) == 0
1099  if (!isa<TruncInst>(User)) {
1100  if (User->getOpcode() != Instruction::And ||
1101  !isa<ConstantInt>(User->getOperand(1)))
1102  return false;
1103 
1104  const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
1105 
1106  if ((Cimm & (Cimm + 1)).getBoolValue())
1107  return false;
1108  }
1109  return true;
1110 }
1111 
1112 /// Sink both shift and truncate instruction to the use of truncate's BB.
1113 static bool
1116  const TargetLowering &TLI, const DataLayout &DL) {
1117  BasicBlock *UserBB = User->getParent();
1118  DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
1119  TruncInst *TruncI = dyn_cast<TruncInst>(User);
1120  bool MadeChange = false;
1121 
1122  for (Value::user_iterator TruncUI = TruncI->user_begin(),
1123  TruncE = TruncI->user_end();
1124  TruncUI != TruncE;) {
1125 
1126  Use &TruncTheUse = TruncUI.getUse();
1127  Instruction *TruncUser = cast<Instruction>(*TruncUI);
1128  // Preincrement use iterator so we don't invalidate it.
1129 
1130  ++TruncUI;
1131 
1132  int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
1133  if (!ISDOpcode)
1134  continue;
1135 
1136  // If the use is actually a legal node, there will not be an
1137  // implicit truncate.
1138  // FIXME: always querying the result type is just an
1139  // approximation; some nodes' legality is determined by the
1140  // operand or other means. There's no good way to find out though.
1141  if (TLI.isOperationLegalOrCustom(
1142  ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
1143  continue;
1144 
1145  // Don't bother for PHI nodes.
1146  if (isa<PHINode>(TruncUser))
1147  continue;
1148 
1149  BasicBlock *TruncUserBB = TruncUser->getParent();
1150 
1151  if (UserBB == TruncUserBB)
1152  continue;
1153 
1154  BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
1155  CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
1156 
1157  if (!InsertedShift && !InsertedTrunc) {
1158  BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
1159  assert(InsertPt != TruncUserBB->end());
1160  // Sink the shift
1161  if (ShiftI->getOpcode() == Instruction::AShr)
1162  InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
1163  "", &*InsertPt);
1164  else
1165  InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
1166  "", &*InsertPt);
1167 
1168  // Sink the trunc
1169  BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
1170  TruncInsertPt++;
1171  assert(TruncInsertPt != TruncUserBB->end());
1172 
1173  InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
1174  TruncI->getType(), "", &*TruncInsertPt);
1175 
1176  MadeChange = true;
1177 
1178  TruncTheUse = InsertedTrunc;
1179  }
1180  }
1181  return MadeChange;
1182 }
1183 
1184 /// Sink the shift *right* instruction into user blocks if the uses could
1185 /// potentially be combined with this shift instruction and generate BitExtract
1186 /// instruction. It will only be applied if the architecture supports BitExtract
1187 /// instruction. Here is an example:
1188 /// BB1:
1189 /// %x.extract.shift = lshr i64 %arg1, 32
1190 /// BB2:
1191 /// %x.extract.trunc = trunc i64 %x.extract.shift to i16
1192 /// ==>
1193 ///
1194 /// BB2:
1195 /// %x.extract.shift.1 = lshr i64 %arg1, 32
1196 /// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
1197 ///
1198 /// CodeGen will recoginze the pattern in BB2 and generate BitExtract
1199 /// instruction.
1200 /// Return true if any changes are made.
1202  const TargetLowering &TLI,
1203  const DataLayout &DL) {
1204  BasicBlock *DefBB = ShiftI->getParent();
1205 
1206  /// Only insert instructions in each block once.
1208 
1209  bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
1210 
1211  bool MadeChange = false;
1212  for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
1213  UI != E;) {
1214  Use &TheUse = UI.getUse();
1215  Instruction *User = cast<Instruction>(*UI);
1216  // Preincrement use iterator so we don't invalidate it.
1217  ++UI;
1218 
1219  // Don't bother for PHI nodes.
1220  if (isa<PHINode>(User))
1221  continue;
1222 
1223  if (!isExtractBitsCandidateUse(User))
1224  continue;
1225 
1226  BasicBlock *UserBB = User->getParent();
1227 
1228  if (UserBB == DefBB) {
1229  // If the shift and truncate instruction are in the same BB. The use of
1230  // the truncate(TruncUse) may still introduce another truncate if not
1231  // legal. In this case, we would like to sink both shift and truncate
1232  // instruction to the BB of TruncUse.
1233  // for example:
1234  // BB1:
1235  // i64 shift.result = lshr i64 opnd, imm
1236  // trunc.result = trunc shift.result to i16
1237  //
1238  // BB2:
1239  // ----> We will have an implicit truncate here if the architecture does
1240  // not have i16 compare.
1241  // cmp i16 trunc.result, opnd2
1242  //
1243  if (isa<TruncInst>(User) && shiftIsLegal
1244  // If the type of the truncate is legal, no trucate will be
1245  // introduced in other basic blocks.
1246  &&
1247  (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
1248  MadeChange =
1249  SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
1250 
1251  continue;
1252  }
1253  // If we have already inserted a shift into this block, use it.
1254  BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
1255 
1256  if (!InsertedShift) {
1257  BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1258  assert(InsertPt != UserBB->end());
1259 
1260  if (ShiftI->getOpcode() == Instruction::AShr)
1261  InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
1262  "", &*InsertPt);
1263  else
1264  InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
1265  "", &*InsertPt);
1266 
1267  MadeChange = true;
1268  }
1269 
1270  // Replace a use of the shift with a use of the new shift.
1271  TheUse = InsertedShift;
1272  }
1273 
1274  // If we removed all uses, nuke the shift.
1275  if (ShiftI->use_empty())
1276  ShiftI->eraseFromParent();
1277 
1278  return MadeChange;
1279 }
1280 
1281 // Translate a masked load intrinsic like
1282 // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
1283 // <16 x i1> %mask, <16 x i32> %passthru)
1284 // to a chain of basic blocks, with loading element one-by-one if
1285 // the appropriate mask bit is set
1286 //
1287 // %1 = bitcast i8* %addr to i32*
1288 // %2 = extractelement <16 x i1> %mask, i32 0
1289 // %3 = icmp eq i1 %2, true
1290 // br i1 %3, label %cond.load, label %else
1291 //
1292 //cond.load: ; preds = %0
1293 // %4 = getelementptr i32* %1, i32 0
1294 // %5 = load i32* %4
1295 // %6 = insertelement <16 x i32> undef, i32 %5, i32 0
1296 // br label %else
1297 //
1298 //else: ; preds = %0, %cond.load
1299 // %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ]
1300 // %7 = extractelement <16 x i1> %mask, i32 1
1301 // %8 = icmp eq i1 %7, true
1302 // br i1 %8, label %cond.load1, label %else2
1303 //
1304 //cond.load1: ; preds = %else
1305 // %9 = getelementptr i32* %1, i32 1
1306 // %10 = load i32* %9
1307 // %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1
1308 // br label %else2
1309 //
1310 //else2: ; preds = %else, %cond.load1
1311 // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
1312 // %12 = extractelement <16 x i1> %mask, i32 2
1313 // %13 = icmp eq i1 %12, true
1314 // br i1 %13, label %cond.load4, label %else5
1315 //
1316 static void scalarizeMaskedLoad(CallInst *CI) {
1317  Value *Ptr = CI->getArgOperand(0);
1318  Value *Alignment = CI->getArgOperand(1);
1319  Value *Mask = CI->getArgOperand(2);
1320  Value *Src0 = CI->getArgOperand(3);
1321 
1322  unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
1324  assert(VecType && "Unexpected return type of masked load intrinsic");
1325 
1326  Type *EltTy = CI->getType()->getVectorElementType();
1327 
1328  IRBuilder<> Builder(CI->getContext());
1329  Instruction *InsertPt = CI;
1330  BasicBlock *IfBlock = CI->getParent();
1331  BasicBlock *CondBlock = nullptr;
1332  BasicBlock *PrevIfBlock = CI->getParent();
1333 
1334  Builder.SetInsertPoint(InsertPt);
1335  Builder.SetCurrentDebugLocation(CI->getDebugLoc());
1336 
1337  // Short-cut if the mask is all-true.
1338  bool IsAllOnesMask = isa<Constant>(Mask) &&
1339  cast<Constant>(Mask)->isAllOnesValue();
1340 
1341  if (IsAllOnesMask) {
1342  Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal);
1343  CI->replaceAllUsesWith(NewI);
1344  CI->eraseFromParent();
1345  return;
1346  }
1347 
1348  // Adjust alignment for the scalar instruction.
1349  AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits()/8);
1350  // Bitcast %addr fron i8* to EltTy*
1351  Type *NewPtrType =
1352  EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
1353  Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
1354  unsigned VectorWidth = VecType->getNumElements();
1355 
1356  Value *UndefVal = UndefValue::get(VecType);
1357 
1358  // The result vector
1359  Value *VResult = UndefVal;
1360 
1361  if (isa<ConstantVector>(Mask)) {
1362  for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
1363  if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
1364  continue;
1365  Value *Gep =
1366  Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
1367  LoadInst* Load = Builder.CreateAlignedLoad(Gep, AlignVal);
1368  VResult = Builder.CreateInsertElement(VResult, Load,
1369  Builder.getInt32(Idx));
1370  }
1371  Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
1372  CI->replaceAllUsesWith(NewI);
1373  CI->eraseFromParent();
1374  return;
1375  }
1376 
1377  PHINode *Phi = nullptr;
1378  Value *PrevPhi = UndefVal;
1379 
1380  for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
1381 
1382  // Fill the "else" block, created in the previous iteration
1383  //
1384  // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
1385  // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
1386  // %to_load = icmp eq i1 %mask_1, true
1387  // br i1 %to_load, label %cond.load, label %else
1388  //
1389  if (Idx > 0) {
1390  Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
1391  Phi->addIncoming(VResult, CondBlock);
1392  Phi->addIncoming(PrevPhi, PrevIfBlock);
1393  PrevPhi = Phi;
1394  VResult = Phi;
1395  }
1396 
1397  Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
1398  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
1399  ConstantInt::get(Predicate->getType(), 1));
1400 
1401  // Create "cond" block
1402  //
1403  // %EltAddr = getelementptr i32* %1, i32 0
1404  // %Elt = load i32* %EltAddr
1405  // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
1406  //
1407  CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load");
1408  Builder.SetInsertPoint(InsertPt);
1409 
1410  Value *Gep =
1411  Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
1412  LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
1413  VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
1414 
1415  // Create "else" block, fill it in the next iteration
1416  BasicBlock *NewIfBlock =
1417  CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
1418  Builder.SetInsertPoint(InsertPt);
1419  Instruction *OldBr = IfBlock->getTerminator();
1420  BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
1421  OldBr->eraseFromParent();
1422  PrevIfBlock = IfBlock;
1423  IfBlock = NewIfBlock;
1424  }
1425 
1426  Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
1427  Phi->addIncoming(VResult, CondBlock);
1428  Phi->addIncoming(PrevPhi, PrevIfBlock);
1429  Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
1430  CI->replaceAllUsesWith(NewI);
1431  CI->eraseFromParent();
1432 }
1433 
1434 // Translate a masked store intrinsic, like
1435 // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
1436 // <16 x i1> %mask)
1437 // to a chain of basic blocks, that stores element one-by-one if
1438 // the appropriate mask bit is set
1439 //
1440 // %1 = bitcast i8* %addr to i32*
1441 // %2 = extractelement <16 x i1> %mask, i32 0
1442 // %3 = icmp eq i1 %2, true
1443 // br i1 %3, label %cond.store, label %else
1444 //
1445 // cond.store: ; preds = %0
1446 // %4 = extractelement <16 x i32> %val, i32 0
1447 // %5 = getelementptr i32* %1, i32 0
1448 // store i32 %4, i32* %5
1449 // br label %else
1450 //
1451 // else: ; preds = %0, %cond.store
1452 // %6 = extractelement <16 x i1> %mask, i32 1
1453 // %7 = icmp eq i1 %6, true
1454 // br i1 %7, label %cond.store1, label %else2
1455 //
1456 // cond.store1: ; preds = %else
1457 // %8 = extractelement <16 x i32> %val, i32 1
1458 // %9 = getelementptr i32* %1, i32 1
1459 // store i32 %8, i32* %9
1460 // br label %else2
1461 // . . .
1462 static void scalarizeMaskedStore(CallInst *CI) {
1463  Value *Src = CI->getArgOperand(0);
1464  Value *Ptr = CI->getArgOperand(1);
1465  Value *Alignment = CI->getArgOperand(2);
1466  Value *Mask = CI->getArgOperand(3);
1467 
1468  unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
1470  assert(VecType && "Unexpected data type in masked store intrinsic");
1471 
1472  Type *EltTy = VecType->getElementType();
1473 
1474  IRBuilder<> Builder(CI->getContext());
1475  Instruction *InsertPt = CI;
1476  BasicBlock *IfBlock = CI->getParent();
1477  Builder.SetInsertPoint(InsertPt);
1478  Builder.SetCurrentDebugLocation(CI->getDebugLoc());
1479 
1480  // Short-cut if the mask is all-true.
1481  bool IsAllOnesMask = isa<Constant>(Mask) &&
1482  cast<Constant>(Mask)->isAllOnesValue();
1483 
1484  if (IsAllOnesMask) {
1485  Builder.CreateAlignedStore(Src, Ptr, AlignVal);
1486  CI->eraseFromParent();
1487  return;
1488  }
1489 
1490  // Adjust alignment for the scalar instruction.
1491  AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits()/8);
1492  // Bitcast %addr fron i8* to EltTy*
1493  Type *NewPtrType =
1494  EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
1495  Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
1496  unsigned VectorWidth = VecType->getNumElements();
1497 
1498  if (isa<ConstantVector>(Mask)) {
1499  for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
1500  if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
1501  continue;
1502  Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
1503  Value *Gep =
1504  Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
1505  Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
1506  }
1507  CI->eraseFromParent();
1508  return;
1509  }
1510 
1511  for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
1512 
1513  // Fill the "else" block, created in the previous iteration
1514  //
1515  // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
1516  // %to_store = icmp eq i1 %mask_1, true
1517  // br i1 %to_store, label %cond.store, label %else
1518  //
1519  Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
1520  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
1521  ConstantInt::get(Predicate->getType(), 1));
1522 
1523  // Create "cond" block
1524  //
1525  // %OneElt = extractelement <16 x i32> %Src, i32 Idx
1526  // %EltAddr = getelementptr i32* %1, i32 0
1527  // %store i32 %OneElt, i32* %EltAddr
1528  //
1529  BasicBlock *CondBlock =
1530  IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
1531  Builder.SetInsertPoint(InsertPt);
1532 
1533  Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
1534  Value *Gep =
1535  Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
1536  Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
1537 
1538  // Create "else" block, fill it in the next iteration
1539  BasicBlock *NewIfBlock =
1540  CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
1541  Builder.SetInsertPoint(InsertPt);
1542  Instruction *OldBr = IfBlock->getTerminator();
1543  BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
1544  OldBr->eraseFromParent();
1545  IfBlock = NewIfBlock;
1546  }
1547  CI->eraseFromParent();
1548 }
1549 
1550 // Translate a masked gather intrinsic like
1551 // <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
1552 // <16 x i1> %Mask, <16 x i32> %Src)
1553 // to a chain of basic blocks, with loading element one-by-one if
1554 // the appropriate mask bit is set
1555 //
1556 // % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
1557 // % Mask0 = extractelement <16 x i1> %Mask, i32 0
1558 // % ToLoad0 = icmp eq i1 % Mask0, true
1559 // br i1 % ToLoad0, label %cond.load, label %else
1560 //
1561 // cond.load:
1562 // % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
1563 // % Load0 = load i32, i32* % Ptr0, align 4
1564 // % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0
1565 // br label %else
1566 //
1567 // else:
1568 // %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0]
1569 // % Mask1 = extractelement <16 x i1> %Mask, i32 1
1570 // % ToLoad1 = icmp eq i1 % Mask1, true
1571 // br i1 % ToLoad1, label %cond.load1, label %else2
1572 //
1573 // cond.load1:
1574 // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
1575 // % Load1 = load i32, i32* % Ptr1, align 4
1576 // % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1
1577 // br label %else2
1578 // . . .
1579 // % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
1580 // ret <16 x i32> %Result
1582  Value *Ptrs = CI->getArgOperand(0);
1583  Value *Alignment = CI->getArgOperand(1);
1584  Value *Mask = CI->getArgOperand(2);
1585  Value *Src0 = CI->getArgOperand(3);
1586 
1588 
1589  assert(VecType && "Unexpected return type of masked load intrinsic");
1590 
1591  IRBuilder<> Builder(CI->getContext());
1592  Instruction *InsertPt = CI;
1593  BasicBlock *IfBlock = CI->getParent();
1594  BasicBlock *CondBlock = nullptr;
1595  BasicBlock *PrevIfBlock = CI->getParent();
1596  Builder.SetInsertPoint(InsertPt);
1597  unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
1598 
1599  Builder.SetCurrentDebugLocation(CI->getDebugLoc());
1600 
1601  Value *UndefVal = UndefValue::get(VecType);
1602 
1603  // The result vector
1604  Value *VResult = UndefVal;
1605  unsigned VectorWidth = VecType->getNumElements();
1606 
1607  // Shorten the way if the mask is a vector of constants.
1608  bool IsConstMask = isa<ConstantVector>(Mask);
1609 
1610  if (IsConstMask) {
1611  for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
1612  if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
1613  continue;
1614  Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
1615  "Ptr" + Twine(Idx));
1616  LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal,
1617  "Load" + Twine(Idx));
1618  VResult = Builder.CreateInsertElement(VResult, Load,
1619  Builder.getInt32(Idx),
1620  "Res" + Twine(Idx));
1621  }
1622  Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
1623  CI->replaceAllUsesWith(NewI);
1624  CI->eraseFromParent();
1625  return;
1626  }
1627 
1628  PHINode *Phi = nullptr;
1629  Value *PrevPhi = UndefVal;
1630 
1631  for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
1632 
1633  // Fill the "else" block, created in the previous iteration
1634  //
1635  // %Mask1 = extractelement <16 x i1> %Mask, i32 1
1636  // %ToLoad1 = icmp eq i1 %Mask1, true
1637  // br i1 %ToLoad1, label %cond.load, label %else
1638  //
1639  if (Idx > 0) {
1640  Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
1641  Phi->addIncoming(VResult, CondBlock);
1642  Phi->addIncoming(PrevPhi, PrevIfBlock);
1643  PrevPhi = Phi;
1644  VResult = Phi;
1645  }
1646 
1647  Value *Predicate = Builder.CreateExtractElement(Mask,
1648  Builder.getInt32(Idx),
1649  "Mask" + Twine(Idx));
1650  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
1651  ConstantInt::get(Predicate->getType(), 1),
1652  "ToLoad" + Twine(Idx));
1653 
1654  // Create "cond" block
1655  //
1656  // %EltAddr = getelementptr i32* %1, i32 0
1657  // %Elt = load i32* %EltAddr
1658  // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
1659  //
1660  CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
1661  Builder.SetInsertPoint(InsertPt);
1662 
1663  Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
1664  "Ptr" + Twine(Idx));
1665  LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal,
1666  "Load" + Twine(Idx));
1667  VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx),
1668  "Res" + Twine(Idx));
1669 
1670  // Create "else" block, fill it in the next iteration
1671  BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
1672  Builder.SetInsertPoint(InsertPt);
1673  Instruction *OldBr = IfBlock->getTerminator();
1674  BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
1675  OldBr->eraseFromParent();
1676  PrevIfBlock = IfBlock;
1677  IfBlock = NewIfBlock;
1678  }
1679 
1680  Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
1681  Phi->addIncoming(VResult, CondBlock);
1682  Phi->addIncoming(PrevPhi, PrevIfBlock);
1683  Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
1684  CI->replaceAllUsesWith(NewI);
1685  CI->eraseFromParent();
1686 }
1687 
1688 // Translate a masked scatter intrinsic, like
1689 // void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
1690 // <16 x i1> %Mask)
1691 // to a chain of basic blocks, that stores element one-by-one if
1692 // the appropriate mask bit is set.
1693 //
1694 // % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
1695 // % Mask0 = extractelement <16 x i1> % Mask, i32 0
1696 // % ToStore0 = icmp eq i1 % Mask0, true
1697 // br i1 %ToStore0, label %cond.store, label %else
1698 //
1699 // cond.store:
1700 // % Elt0 = extractelement <16 x i32> %Src, i32 0
1701 // % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
1702 // store i32 %Elt0, i32* % Ptr0, align 4
1703 // br label %else
1704 //
1705 // else:
1706 // % Mask1 = extractelement <16 x i1> % Mask, i32 1
1707 // % ToStore1 = icmp eq i1 % Mask1, true
1708 // br i1 % ToStore1, label %cond.store1, label %else2
1709 //
1710 // cond.store1:
1711 // % Elt1 = extractelement <16 x i32> %Src, i32 1
1712 // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
1713 // store i32 % Elt1, i32* % Ptr1, align 4
1714 // br label %else2
1715 // . . .
1717  Value *Src = CI->getArgOperand(0);
1718  Value *Ptrs = CI->getArgOperand(1);
1719  Value *Alignment = CI->getArgOperand(2);
1720  Value *Mask = CI->getArgOperand(3);
1721 
1722  assert(isa<VectorType>(Src->getType()) &&
1723  "Unexpected data type in masked scatter intrinsic");
1724  assert(isa<VectorType>(Ptrs->getType()) &&
1725  isa<PointerType>(Ptrs->getType()->getVectorElementType()) &&
1726  "Vector of pointers is expected in masked scatter intrinsic");
1727 
1728  IRBuilder<> Builder(CI->getContext());
1729  Instruction *InsertPt = CI;
1730  BasicBlock *IfBlock = CI->getParent();
1731  Builder.SetInsertPoint(InsertPt);
1732  Builder.SetCurrentDebugLocation(CI->getDebugLoc());
1733 
1734  unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
1735  unsigned VectorWidth = Src->getType()->getVectorNumElements();
1736 
1737  // Shorten the way if the mask is a vector of constants.
1738  bool IsConstMask = isa<ConstantVector>(Mask);
1739 
1740  if (IsConstMask) {
1741  for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
1742  if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
1743  continue;
1744  Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
1745  "Elt" + Twine(Idx));
1746  Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
1747  "Ptr" + Twine(Idx));
1748  Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
1749  }
1750  CI->eraseFromParent();
1751  return;
1752  }
1753  for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
1754  // Fill the "else" block, created in the previous iteration
1755  //
1756  // % Mask1 = extractelement <16 x i1> % Mask, i32 Idx
1757  // % ToStore = icmp eq i1 % Mask1, true
1758  // br i1 % ToStore, label %cond.store, label %else
1759  //
1760  Value *Predicate = Builder.CreateExtractElement(Mask,
1761  Builder.getInt32(Idx),
1762  "Mask" + Twine(Idx));
1763  Value *Cmp =
1764  Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
1765  ConstantInt::get(Predicate->getType(), 1),
1766  "ToStore" + Twine(Idx));
1767 
1768  // Create "cond" block
1769  //
1770  // % Elt1 = extractelement <16 x i32> %Src, i32 1
1771  // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
1772  // %store i32 % Elt1, i32* % Ptr1
1773  //
1774  BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
1775  Builder.SetInsertPoint(InsertPt);
1776 
1777  Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
1778  "Elt" + Twine(Idx));
1779  Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
1780  "Ptr" + Twine(Idx));
1781  Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
1782 
1783  // Create "else" block, fill it in the next iteration
1784  BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
1785  Builder.SetInsertPoint(InsertPt);
1786  Instruction *OldBr = IfBlock->getTerminator();
1787  BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
1788  OldBr->eraseFromParent();
1789  IfBlock = NewIfBlock;
1790  }
1791  CI->eraseFromParent();
1792 }
1793 
1794 /// If counting leading or trailing zeros is an expensive operation and a zero
1795 /// input is defined, add a check for zero to avoid calling the intrinsic.
1796 ///
1797 /// We want to transform:
1798 /// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
1799 ///
1800 /// into:
1801 /// entry:
1802 /// %cmpz = icmp eq i64 %A, 0
1803 /// br i1 %cmpz, label %cond.end, label %cond.false
1804 /// cond.false:
1805 /// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
1806 /// br label %cond.end
1807 /// cond.end:
1808 /// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
1809 ///
1810 /// If the transform is performed, return true and set ModifiedDT to true.
1811 static bool despeculateCountZeros(IntrinsicInst *CountZeros,
1812  const TargetLowering *TLI,
1813  const DataLayout *DL,
1814  bool &ModifiedDT) {
1815  if (!TLI || !DL)
1816  return false;
1817 
1818  // If a zero input is undefined, it doesn't make sense to despeculate that.
1819  if (match(CountZeros->getOperand(1), m_One()))
1820  return false;
1821 
1822  // If it's cheap to speculate, there's nothing to do.
1823  auto IntrinsicID = CountZeros->getIntrinsicID();
1824  if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) ||
1825  (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz()))
1826  return false;
1827 
1828  // Only handle legal scalar cases. Anything else requires too much work.
1829  Type *Ty = CountZeros->getType();
1830  unsigned SizeInBits = Ty->getPrimitiveSizeInBits();
1831  if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
1832  return false;
1833 
1834  // The intrinsic will be sunk behind a compare against zero and branch.
1835  BasicBlock *StartBlock = CountZeros->getParent();
1836  BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
1837 
1838  // Create another block after the count zero intrinsic. A PHI will be added
1839  // in this block to select the result of the intrinsic or the bit-width
1840  // constant if the input to the intrinsic is zero.
1841  BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
1842  BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
1843 
1844  // Set up a builder to create a compare, conditional branch, and PHI.
1845  IRBuilder<> Builder(CountZeros->getContext());
1846  Builder.SetInsertPoint(StartBlock->getTerminator());
1847  Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
1848 
1849  // Replace the unconditional branch that was created by the first split with
1850  // a compare against zero and a conditional branch.
1852  Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz");
1853  Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
1854  StartBlock->getTerminator()->eraseFromParent();
1855 
1856  // Create a PHI in the end block to select either the output of the intrinsic
1857  // or the bit width of the operand.
1858  Builder.SetInsertPoint(&EndBlock->front());
1859  PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
1860  CountZeros->replaceAllUsesWith(PN);
1861  Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
1862  PN->addIncoming(BitWidth, StartBlock);
1863  PN->addIncoming(CountZeros, CallBlock);
1864 
1865  // We are explicitly handling the zero case, so we can set the intrinsic's
1866  // undefined zero argument to 'true'. This will also prevent reprocessing the
1867  // intrinsic; we only despeculate when a zero input is defined.
1868  CountZeros->setArgOperand(1, Builder.getTrue());
1869  ModifiedDT = true;
1870  return true;
1871 }
1872 
1873 bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
1874  BasicBlock *BB = CI->getParent();
1875 
1876  // Lower inline assembly if we can.
1877  // If we found an inline asm expession, and if the target knows how to
1878  // lower it to normal LLVM code, do so now.
1879  if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
1880  if (TLI->ExpandInlineAsm(CI)) {
1881  // Avoid invalidating the iterator.
1882  CurInstIterator = BB->begin();
1883  // Avoid processing instructions out of order, which could cause
1884  // reuse before a value is defined.
1885  SunkAddrs.clear();
1886  return true;
1887  }
1888  // Sink address computing for memory operands into the block.
1889  if (optimizeInlineAsmInst(CI))
1890  return true;
1891  }
1892 
1893  // Align the pointer arguments to this call if the target thinks it's a good
1894  // idea
1895  unsigned MinSize, PrefAlign;
1896  if (TLI && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
1897  for (auto &Arg : CI->arg_operands()) {
1898  // We want to align both objects whose address is used directly and
1899  // objects whose address is used in casts and GEPs, though it only makes
1900  // sense for GEPs if the offset is a multiple of the desired alignment and
1901  // if size - offset meets the size threshold.
1902  if (!Arg->getType()->isPointerTy())
1903  continue;
1904  APInt Offset(DL->getPointerSizeInBits(
1905  cast<PointerType>(Arg->getType())->getAddressSpace()),
1906  0);
1908  uint64_t Offset2 = Offset.getLimitedValue();
1909  if ((Offset2 & (PrefAlign-1)) != 0)
1910  continue;
1911  AllocaInst *AI;
1912  if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
1913  DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
1914  AI->setAlignment(PrefAlign);
1915  // Global variables can only be aligned if they are defined in this
1916  // object (i.e. they are uniquely initialized in this object), and
1917  // over-aligning global variables that have an explicit section is
1918  // forbidden.
1919  GlobalVariable *GV;
1920  if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
1921  GV->getPointerAlignment(*DL) < PrefAlign &&
1922  DL->getTypeAllocSize(GV->getValueType()) >=
1923  MinSize + Offset2)
1924  GV->setAlignment(PrefAlign);
1925  }
1926  // If this is a memcpy (or similar) then we may be able to improve the
1927  // alignment
1928  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
1929  unsigned Align = getKnownAlignment(MI->getDest(), *DL);
1930  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
1931  Align = std::min(Align, getKnownAlignment(MTI->getSource(), *DL));
1932  if (Align > MI->getAlignment())
1933  MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align));
1934  }
1935  }
1936 
1937  // If we have a cold call site, try to sink addressing computation into the
1938  // cold block. This interacts with our handling for loads and stores to
1939  // ensure that we can fold all uses of a potential addressing computation
1940  // into their uses. TODO: generalize this to work over profiling data
1941  if (!OptSize && CI->hasFnAttr(Attribute::Cold))
1942  for (auto &Arg : CI->arg_operands()) {
1943  if (!Arg->getType()->isPointerTy())
1944  continue;
1945  unsigned AS = Arg->getType()->getPointerAddressSpace();
1946  return optimizeMemoryInst(CI, Arg, Arg->getType(), AS);
1947  }
1948 
1950  if (II) {
1951  switch (II->getIntrinsicID()) {
1952  default: break;
1953  case Intrinsic::objectsize: {
1954  // Lower all uses of llvm.objectsize.*
1955  ConstantInt *RetVal =
1956  lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true);
1957  // Substituting this can cause recursive simplifications, which can
1958  // invalidate our iterator. Use a WeakVH to hold onto it in case this
1959  // happens.
1960  Value *CurValue = &*CurInstIterator;
1961  WeakVH IterHandle(CurValue);
1962 
1963  replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
1964 
1965  // If the iterator instruction was recursively deleted, start over at the
1966  // start of the block.
1967  if (IterHandle != CurValue) {
1968  CurInstIterator = BB->begin();
1969  SunkAddrs.clear();
1970  }
1971  return true;
1972  }
1973  case Intrinsic::masked_load: {
1974  // Scalarize unsupported vector masked load
1975  if (!TTI->isLegalMaskedLoad(CI->getType())) {
1976  scalarizeMaskedLoad(CI);
1977  ModifiedDT = true;
1978  return true;
1979  }
1980  return false;
1981  }
1982  case Intrinsic::masked_store: {
1983  if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) {
1985  ModifiedDT = true;
1986  return true;
1987  }
1988  return false;
1989  }
1990  case Intrinsic::masked_gather: {
1991  if (!TTI->isLegalMaskedGather(CI->getType())) {
1993  ModifiedDT = true;
1994  return true;
1995  }
1996  return false;
1997  }
1998  case Intrinsic::masked_scatter: {
1999  if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) {
2001  ModifiedDT = true;
2002  return true;
2003  }
2004  return false;
2005  }
2006  case Intrinsic::aarch64_stlxr:
2007  case Intrinsic::aarch64_stxr: {
2008  ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
2009  if (!ExtVal || !ExtVal->hasOneUse() ||
2010  ExtVal->getParent() == CI->getParent())
2011  return false;
2012  // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
2013  ExtVal->moveBefore(CI);
2014  // Mark this instruction as "inserted by CGP", so that other
2015  // optimizations don't touch it.
2016  InsertedInsts.insert(ExtVal);
2017  return true;
2018  }
2019  case Intrinsic::invariant_group_barrier:
2020  II->replaceAllUsesWith(II->getArgOperand(0));
2021  II->eraseFromParent();
2022  return true;
2023 
2024  case Intrinsic::cttz:
2025  case Intrinsic::ctlz:
2026  // If counting zeros is expensive, try to avoid it.
2027  return despeculateCountZeros(II, TLI, DL, ModifiedDT);
2028  }
2029 
2030  if (TLI) {
2031  // Unknown address space.
2032  // TODO: Target hook to pick which address space the intrinsic cares
2033  // about?
2034  unsigned AddrSpace = ~0u;
2035  SmallVector<Value*, 2> PtrOps;
2036  Type *AccessTy;
2037  if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy, AddrSpace))
2038  while (!PtrOps.empty())
2039  if (optimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace))
2040  return true;
2041  }
2042  }
2043 
2044  // From here on out we're working with named functions.
2045  if (!CI->getCalledFunction()) return false;
2046 
2047  // Lower all default uses of _chk calls. This is very similar
2048  // to what InstCombineCalls does, but here we are only lowering calls
2049  // to fortified library functions (e.g. __memcpy_chk) that have the default
2050  // "don't know" as the objectsize. Anything else should be left alone.
2051  FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2052  if (Value *V = Simplifier.optimizeCall(CI)) {
2053  CI->replaceAllUsesWith(V);
2054  CI->eraseFromParent();
2055  return true;
2056  }
2057  return false;
2058 }
2059 
2060 /// Look for opportunities to duplicate return instructions to the predecessor
2061 /// to enable tail call optimizations. The case it is currently looking for is:
2062 /// @code
2063 /// bb0:
2064 /// %tmp0 = tail call i32 @f0()
2065 /// br label %return
2066 /// bb1:
2067 /// %tmp1 = tail call i32 @f1()
2068 /// br label %return
2069 /// bb2:
2070 /// %tmp2 = tail call i32 @f2()
2071 /// br label %return
2072 /// return:
2073 /// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2074 /// ret i32 %retval
2075 /// @endcode
2076 ///
2077 /// =>
2078 ///
2079 /// @code
2080 /// bb0:
2081 /// %tmp0 = tail call i32 @f0()
2082 /// ret i32 %tmp0
2083 /// bb1:
2084 /// %tmp1 = tail call i32 @f1()
2085 /// ret i32 %tmp1
2086 /// bb2:
2087 /// %tmp2 = tail call i32 @f2()
2088 /// ret i32 %tmp2
2089 /// @endcode
2090 bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
2091  if (!TLI)
2092  return false;
2093 
2094  ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2095  if (!RetI)
2096  return false;
2097 
2098  PHINode *PN = nullptr;
2099  BitCastInst *BCI = nullptr;
2100  Value *V = RetI->getReturnValue();
2101  if (V) {
2102  BCI = dyn_cast<BitCastInst>(V);
2103  if (BCI)
2104  V = BCI->getOperand(0);
2105 
2106  PN = dyn_cast<PHINode>(V);
2107  if (!PN)
2108  return false;
2109  }
2110 
2111  if (PN && PN->getParent() != BB)
2112  return false;
2113 
2114  // Make sure there are no instructions between the PHI and return, or that the
2115  // return is the first instruction in the block.
2116  if (PN) {
2117  BasicBlock::iterator BI = BB->begin();
2118  do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
2119  if (&*BI == BCI)
2120  // Also skip over the bitcast.
2121  ++BI;
2122  if (&*BI != RetI)
2123  return false;
2124  } else {
2125  BasicBlock::iterator BI = BB->begin();
2126  while (isa<DbgInfoIntrinsic>(BI)) ++BI;
2127  if (&*BI != RetI)
2128  return false;
2129  }
2130 
2131  /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
2132  /// call.
2133  const Function *F = BB->getParent();
2134  SmallVector<CallInst*, 4> TailCalls;
2135  if (PN) {
2136  for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
2137  CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
2138  // Make sure the phi value is indeed produced by the tail call.
2139  if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
2140  TLI->mayBeEmittedAsTailCall(CI) &&
2141  attributesPermitTailCall(F, CI, RetI, *TLI))
2142  TailCalls.push_back(CI);
2143  }
2144  } else {
2145  SmallPtrSet<BasicBlock*, 4> VisitedBBs;
2146  for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
2147  if (!VisitedBBs.insert(*PI).second)
2148  continue;
2149 
2150  BasicBlock::InstListType &InstList = (*PI)->getInstList();
2151  BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
2152  BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
2153  do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
2154  if (RI == RE)
2155  continue;
2156 
2157  CallInst *CI = dyn_cast<CallInst>(&*RI);
2158  if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
2159  attributesPermitTailCall(F, CI, RetI, *TLI))
2160  TailCalls.push_back(CI);
2161  }
2162  }
2163 
2164  bool Changed = false;
2165  for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
2166  CallInst *CI = TailCalls[i];
2167  CallSite CS(CI);
2168 
2169  // Conservatively require the attributes of the call to match those of the
2170  // return. Ignore noalias because it doesn't affect the call sequence.
2171  AttributeSet CalleeAttrs = CS.getAttributes();
2172  if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
2173  removeAttribute(Attribute::NoAlias) !=
2174  AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
2175  removeAttribute(Attribute::NoAlias))
2176  continue;
2177 
2178  // Make sure the call instruction is followed by an unconditional branch to
2179  // the return block.
2180  BasicBlock *CallBB = CI->getParent();
2181  BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
2182  if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
2183  continue;
2184 
2185  // Duplicate the return into CallBB.
2186  (void)FoldReturnIntoUncondBranch(RetI, BB, CallBB);
2187  ModifiedDT = Changed = true;
2188  ++NumRetsDup;
2189  }
2190 
2191  // If we eliminated all predecessors of the block, delete the block now.
2192  if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
2193  BB->eraseFromParent();
2194 
2195  return Changed;
2196 }
2197 
2198 //===----------------------------------------------------------------------===//
2199 // Memory Optimization
2200 //===----------------------------------------------------------------------===//
2201 
2202 namespace {
2203 
2204 /// This is an extended version of TargetLowering::AddrMode
2205 /// which holds actual Value*'s for register values.
2206 struct ExtAddrMode : public TargetLowering::AddrMode {
2207  Value *BaseReg;
2208  Value *ScaledReg;
2209  ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {}
2210  void print(raw_ostream &OS) const;
2211  void dump() const;
2212 
2213  bool operator==(const ExtAddrMode& O) const {
2214  return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
2215  (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
2216  (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
2217  }
2218 };
2219 
2220 #ifndef NDEBUG
2221 static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
2222  AM.print(OS);
2223  return OS;
2224 }
2225 #endif
2226 
2227 void ExtAddrMode::print(raw_ostream &OS) const {
2228  bool NeedPlus = false;
2229  OS << "[";
2230  if (BaseGV) {
2231  OS << (NeedPlus ? " + " : "")
2232  << "GV:";
2233  BaseGV->printAsOperand(OS, /*PrintType=*/false);
2234  NeedPlus = true;
2235  }
2236 
2237  if (BaseOffs) {
2238  OS << (NeedPlus ? " + " : "")
2239  << BaseOffs;
2240  NeedPlus = true;
2241  }
2242 
2243  if (BaseReg) {
2244  OS << (NeedPlus ? " + " : "")
2245  << "Base:";
2246  BaseReg->printAsOperand(OS, /*PrintType=*/false);
2247  NeedPlus = true;
2248  }
2249  if (Scale) {
2250  OS << (NeedPlus ? " + " : "")
2251  << Scale << "*";
2252  ScaledReg->printAsOperand(OS, /*PrintType=*/false);
2253  }
2254 
2255  OS << ']';
2256 }
2257 
2258 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2259 LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
2260  print(dbgs());
2261  dbgs() << '\n';
2262 }
2263 #endif
2264 
2265 /// \brief This class provides transaction based operation on the IR.
2266 /// Every change made through this class is recorded in the internal state and
2267 /// can be undone (rollback) until commit is called.
2268 class TypePromotionTransaction {
2269 
2270  /// \brief This represents the common interface of the individual transaction.
2271  /// Each class implements the logic for doing one specific modification on
2272  /// the IR via the TypePromotionTransaction.
2273  class TypePromotionAction {
2274  protected:
2275  /// The Instruction modified.
2276  Instruction *Inst;
2277 
2278  public:
2279  /// \brief Constructor of the action.
2280  /// The constructor performs the related action on the IR.
2281  TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
2282 
2283  virtual ~TypePromotionAction() {}
2284 
2285  /// \brief Undo the modification done by this action.
2286  /// When this method is called, the IR must be in the same state as it was
2287  /// before this action was applied.
2288  /// \pre Undoing the action works if and only if the IR is in the exact same
2289  /// state as it was directly after this action was applied.
2290  virtual void undo() = 0;
2291 
2292  /// \brief Advocate every change made by this action.
2293  /// When the results on the IR of the action are to be kept, it is important
2294  /// to call this function, otherwise hidden information may be kept forever.
2295  virtual void commit() {
2296  // Nothing to be done, this action is not doing anything.
2297  }
2298  };
2299 
2300  /// \brief Utility to remember the position of an instruction.
2301  class InsertionHandler {
2302  /// Position of an instruction.
2303  /// Either an instruction:
2304  /// - Is the first in a basic block: BB is used.
2305  /// - Has a previous instructon: PrevInst is used.
2306  union {
2307  Instruction *PrevInst;
2308  BasicBlock *BB;
2309  } Point;
2310  /// Remember whether or not the instruction had a previous instruction.
2311  bool HasPrevInstruction;
2312 
2313  public:
2314  /// \brief Record the position of \p Inst.
2315  InsertionHandler(Instruction *Inst) {
2316  BasicBlock::iterator It = Inst->getIterator();
2317  HasPrevInstruction = (It != (Inst->getParent()->begin()));
2318  if (HasPrevInstruction)
2319  Point.PrevInst = &*--It;
2320  else
2321  Point.BB = Inst->getParent();
2322  }
2323 
2324  /// \brief Insert \p Inst at the recorded position.
2325  void insert(Instruction *Inst) {
2326  if (HasPrevInstruction) {
2327  if (Inst->getParent())
2328  Inst->removeFromParent();
2329  Inst->insertAfter(Point.PrevInst);
2330  } else {
2331  Instruction *Position = &*Point.BB->getFirstInsertionPt();
2332  if (Inst->getParent())
2333  Inst->moveBefore(Position);
2334  else
2335  Inst->insertBefore(Position);
2336  }
2337  }
2338  };
2339 
2340  /// \brief Move an instruction before another.
2341  class InstructionMoveBefore : public TypePromotionAction {
2342  /// Original position of the instruction.
2343  InsertionHandler Position;
2344 
2345  public:
2346  /// \brief Move \p Inst before \p Before.
2347  InstructionMoveBefore(Instruction *Inst, Instruction *Before)
2348  : TypePromotionAction(Inst), Position(Inst) {
2349  DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n");
2350  Inst->moveBefore(Before);
2351  }
2352 
2353  /// \brief Move the instruction back to its original position.
2354  void undo() override {
2355  DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
2356  Position.insert(Inst);
2357  }
2358  };
2359 
2360  /// \brief Set the operand of an instruction with a new value.
2361  class OperandSetter : public TypePromotionAction {
2362  /// Original operand of the instruction.
2363  Value *Origin;
2364  /// Index of the modified instruction.
2365  unsigned Idx;
2366 
2367  public:
2368  /// \brief Set \p Idx operand of \p Inst with \p NewVal.
2369  OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
2370  : TypePromotionAction(Inst), Idx(Idx) {
2371  DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
2372  << "for:" << *Inst << "\n"
2373  << "with:" << *NewVal << "\n");
2374  Origin = Inst->getOperand(Idx);
2375  Inst->setOperand(Idx, NewVal);
2376  }
2377 
2378  /// \brief Restore the original value of the instruction.
2379  void undo() override {
2380  DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
2381  << "for: " << *Inst << "\n"
2382  << "with: " << *Origin << "\n");
2383  Inst->setOperand(Idx, Origin);
2384  }
2385  };
2386 
2387  /// \brief Hide the operands of an instruction.
2388  /// Do as if this instruction was not using any of its operands.
2389  class OperandsHider : public TypePromotionAction {
2390  /// The list of original operands.
2391  SmallVector<Value *, 4> OriginalValues;
2392 
2393  public:
2394  /// \brief Remove \p Inst from the uses of the operands of \p Inst.
2395  OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
2396  DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
2397  unsigned NumOpnds = Inst->getNumOperands();
2398  OriginalValues.reserve(NumOpnds);
2399  for (unsigned It = 0; It < NumOpnds; ++It) {
2400  // Save the current operand.
2401  Value *Val = Inst->getOperand(It);
2402  OriginalValues.push_back(Val);
2403  // Set a dummy one.
2404  // We could use OperandSetter here, but that would imply an overhead
2405  // that we are not willing to pay.
2406  Inst->setOperand(It, UndefValue::get(Val->getType()));
2407  }
2408  }
2409 
2410  /// \brief Restore the original list of uses.
2411  void undo() override {
2412  DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
2413  for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
2414  Inst->setOperand(It, OriginalValues[It]);
2415  }
2416  };
2417 
2418  /// \brief Build a truncate instruction.
2419  class TruncBuilder : public TypePromotionAction {
2420  Value *Val;
2421  public:
2422  /// \brief Build a truncate instruction of \p Opnd producing a \p Ty
2423  /// result.
2424  /// trunc Opnd to Ty.
2425  TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
2426  IRBuilder<> Builder(Opnd);
2427  Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
2428  DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
2429  }
2430 
2431  /// \brief Get the built value.
2432  Value *getBuiltValue() { return Val; }
2433 
2434  /// \brief Remove the built instruction.
2435  void undo() override {
2436  DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
2437  if (Instruction *IVal = dyn_cast<Instruction>(Val))
2438  IVal->eraseFromParent();
2439  }
2440  };
2441 
2442  /// \brief Build a sign extension instruction.
2443  class SExtBuilder : public TypePromotionAction {
2444  Value *Val;
2445  public:
2446  /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty
2447  /// result.
2448  /// sext Opnd to Ty.
2449  SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
2450  : TypePromotionAction(InsertPt) {
2451  IRBuilder<> Builder(InsertPt);
2452  Val = Builder.CreateSExt(Opnd, Ty, "promoted");
2453  DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
2454  }
2455 
2456  /// \brief Get the built value.
2457  Value *getBuiltValue() { return Val; }
2458 
2459  /// \brief Remove the built instruction.
2460  void undo() override {
2461  DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
2462  if (Instruction *IVal = dyn_cast<Instruction>(Val))
2463  IVal->eraseFromParent();
2464  }
2465  };
2466 
2467  /// \brief Build a zero extension instruction.
2468  class ZExtBuilder : public TypePromotionAction {
2469  Value *Val;
2470  public:
2471  /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty
2472  /// result.
2473  /// zext Opnd to Ty.
2474  ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
2475  : TypePromotionAction(InsertPt) {
2476  IRBuilder<> Builder(InsertPt);
2477  Val = Builder.CreateZExt(Opnd, Ty, "promoted");
2478  DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
2479  }
2480 
2481  /// \brief Get the built value.
2482  Value *getBuiltValue() { return Val; }
2483 
2484  /// \brief Remove the built instruction.
2485  void undo() override {
2486  DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
2487  if (Instruction *IVal = dyn_cast<Instruction>(Val))
2488  IVal->eraseFromParent();
2489  }
2490  };
2491 
2492  /// \brief Mutate an instruction to another type.
2493  class TypeMutator : public TypePromotionAction {
2494  /// Record the original type.
2495  Type *OrigTy;
2496 
2497  public:
2498  /// \brief Mutate the type of \p Inst into \p NewTy.
2499  TypeMutator(Instruction *Inst, Type *NewTy)
2500  : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
2501  DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
2502  << "\n");
2503  Inst->mutateType(NewTy);
2504  }
2505 
2506  /// \brief Mutate the instruction back to its original type.
2507  void undo() override {
2508  DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
2509  << "\n");
2510  Inst->mutateType(OrigTy);
2511  }
2512  };
2513 
2514  /// \brief Replace the uses of an instruction by another instruction.
2515  class UsesReplacer : public TypePromotionAction {
2516  /// Helper structure to keep track of the replaced uses.
2517  struct InstructionAndIdx {
2518  /// The instruction using the instruction.
2519  Instruction *Inst;
2520  /// The index where this instruction is used for Inst.
2521  unsigned Idx;
2522  InstructionAndIdx(Instruction *Inst, unsigned Idx)
2523  : Inst(Inst), Idx(Idx) {}
2524  };
2525 
2526  /// Keep track of the original uses (pair Instruction, Index).
2527  SmallVector<InstructionAndIdx, 4> OriginalUses;
2528  typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator;
2529 
2530  public:
2531  /// \brief Replace all the use of \p Inst by \p New.
2532  UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {
2533  DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
2534  << "\n");
2535  // Record the original uses.
2536  for (Use &U : Inst->uses()) {
2537  Instruction *UserI = cast<Instruction>(U.getUser());
2538  OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
2539  }
2540  // Now, we can replace the uses.
2541  Inst->replaceAllUsesWith(New);
2542  }
2543 
2544  /// \brief Reassign the original uses of Inst to Inst.
2545  void undo() override {
2546  DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
2547  for (use_iterator UseIt = OriginalUses.begin(),
2548  EndIt = OriginalUses.end();
2549  UseIt != EndIt; ++UseIt) {
2550  UseIt->Inst->setOperand(UseIt->Idx, Inst);
2551  }
2552  }
2553  };
2554 
2555  /// \brief Remove an instruction from the IR.
2556  class InstructionRemover : public TypePromotionAction {
2557  /// Original position of the instruction.
2558  InsertionHandler Inserter;
2559  /// Helper structure to hide all the link to the instruction. In other
2560  /// words, this helps to do as if the instruction was removed.
2561  OperandsHider Hider;
2562  /// Keep track of the uses replaced, if any.
2563  UsesReplacer *Replacer;
2564 
2565  public:
2566  /// \brief Remove all reference of \p Inst and optinally replace all its
2567  /// uses with New.
2568  /// \pre If !Inst->use_empty(), then New != nullptr
2569  InstructionRemover(Instruction *Inst, Value *New = nullptr)
2570  : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
2571  Replacer(nullptr) {
2572  if (New)
2573  Replacer = new UsesReplacer(Inst, New);
2574  DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
2575  Inst->removeFromParent();
2576  }
2577 
2578  ~InstructionRemover() override { delete Replacer; }
2579 
2580  /// \brief Really remove the instruction.
2581  void commit() override { delete Inst; }
2582 
2583  /// \brief Resurrect the instruction and reassign it to the proper uses if
2584  /// new value was provided when build this action.
2585  void undo() override {
2586  DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
2587  Inserter.insert(Inst);
2588  if (Replacer)
2589  Replacer->undo();
2590  Hider.undo();
2591  }
2592  };
2593 
2594 public:
2595  /// Restoration point.
2596  /// The restoration point is a pointer to an action instead of an iterator
2597  /// because the iterator may be invalidated but not the pointer.
2598  typedef const TypePromotionAction *ConstRestorationPt;
2599  /// Advocate every changes made in that transaction.
2600  void commit();
2601  /// Undo all the changes made after the given point.
2602  void rollback(ConstRestorationPt Point);
2603  /// Get the current restoration point.
2604  ConstRestorationPt getRestorationPoint() const;
2605 
2606  /// \name API for IR modification with state keeping to support rollback.
2607  /// @{
2608  /// Same as Instruction::setOperand.
2609  void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
2610  /// Same as Instruction::eraseFromParent.
2611  void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
2612  /// Same as Value::replaceAllUsesWith.
2613  void replaceAllUsesWith(Instruction *Inst, Value *New);
2614  /// Same as Value::mutateType.
2615  void mutateType(Instruction *Inst, Type *NewTy);
2616  /// Same as IRBuilder::createTrunc.
2617  Value *createTrunc(Instruction *Opnd, Type *Ty);
2618  /// Same as IRBuilder::createSExt.
2619  Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
2620  /// Same as IRBuilder::createZExt.
2621  Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
2622  /// Same as Instruction::moveBefore.
2623  void moveBefore(Instruction *Inst, Instruction *Before);
2624  /// @}
2625 
2626 private:
2627  /// The ordered list of actions made so far.
2629  typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt;
2630 };
2631 
2632 void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
2633  Value *NewVal) {
2634  Actions.push_back(
2635  make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal));
2636 }
2637 
2638 void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
2639  Value *NewVal) {
2640  Actions.push_back(
2641  make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal));
2642 }
2643 
2644 void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
2645  Value *New) {
2646  Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
2647 }
2648 
2649 void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
2650  Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
2651 }
2652 
2653 Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
2654  Type *Ty) {
2655  std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
2656  Value *Val = Ptr->getBuiltValue();
2657  Actions.push_back(std::move(Ptr));
2658  return Val;
2659 }
2660 
2661 Value *TypePromotionTransaction::createSExt(Instruction *Inst,
2662  Value *Opnd, Type *Ty) {
2663  std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
2664  Value *Val = Ptr->getBuiltValue();
2665  Actions.push_back(std::move(Ptr));
2666  return Val;
2667 }
2668 
2669 Value *TypePromotionTransaction::createZExt(Instruction *Inst,
2670  Value *Opnd, Type *Ty) {
2671  std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
2672  Value *Val = Ptr->getBuiltValue();
2673  Actions.push_back(std::move(Ptr));
2674  return Val;
2675 }
2676 
2677 void TypePromotionTransaction::moveBefore(Instruction *Inst,
2678  Instruction *Before) {
2679  Actions.push_back(
2680  make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before));
2681 }
2682 
2683 TypePromotionTransaction::ConstRestorationPt
2684 TypePromotionTransaction::getRestorationPoint() const {
2685  return !Actions.empty() ? Actions.back().get() : nullptr;
2686 }
2687 
2688 void TypePromotionTransaction::commit() {
2689  for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
2690  ++It)
2691  (*It)->commit();
2692  Actions.clear();
2693 }
2694 
2695 void TypePromotionTransaction::rollback(
2696  TypePromotionTransaction::ConstRestorationPt Point) {
2697  while (!Actions.empty() && Point != Actions.back().get()) {
2698  std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
2699  Curr->undo();
2700  }
2701 }
2702 
2703 /// \brief A helper class for matching addressing modes.
2704 ///
2705 /// This encapsulates the logic for matching the target-legal addressing modes.
2706 class AddressingModeMatcher {
2707  SmallVectorImpl<Instruction*> &AddrModeInsts;
2708  const TargetMachine &TM;
2709  const TargetLowering &TLI;
2710  const DataLayout &DL;
2711 
2712  /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
2713  /// the memory instruction that we're computing this address for.
2714  Type *AccessTy;
2715  unsigned AddrSpace;
2716  Instruction *MemoryInst;
2717 
2718  /// This is the addressing mode that we're building up. This is
2719  /// part of the return value of this addressing mode matching stuff.
2720  ExtAddrMode &AddrMode;
2721 
2722  /// The instructions inserted by other CodeGenPrepare optimizations.
2723  const SetOfInstrs &InsertedInsts;
2724  /// A map from the instructions to their type before promotion.
2725  InstrToOrigTy &PromotedInsts;
2726  /// The ongoing transaction where every action should be registered.
2727  TypePromotionTransaction &TPT;
2728 
2729  /// This is set to true when we should not do profitability checks.
2730  /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
2731  bool IgnoreProfitability;
2732 
2733  AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
2734  const TargetMachine &TM, Type *AT, unsigned AS,
2735  Instruction *MI, ExtAddrMode &AM,
2736  const SetOfInstrs &InsertedInsts,
2737  InstrToOrigTy &PromotedInsts,
2738  TypePromotionTransaction &TPT)
2739  : AddrModeInsts(AMI), TM(TM),
2740  TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent())
2741  ->getTargetLowering()),
2742  DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
2743  MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
2744  PromotedInsts(PromotedInsts), TPT(TPT) {
2745  IgnoreProfitability = false;
2746  }
2747 public:
2748 
2749  /// Find the maximal addressing mode that a load/store of V can fold,
2750  /// give an access type of AccessTy. This returns a list of involved
2751  /// instructions in AddrModeInsts.
2752  /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
2753  /// optimizations.
2754  /// \p PromotedInsts maps the instructions to their type before promotion.
2755  /// \p The ongoing transaction where every action should be registered.
2756  static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS,
2757  Instruction *MemoryInst,
2758  SmallVectorImpl<Instruction*> &AddrModeInsts,
2759  const TargetMachine &TM,
2760  const SetOfInstrs &InsertedInsts,
2761  InstrToOrigTy &PromotedInsts,
2762  TypePromotionTransaction &TPT) {
2763  ExtAddrMode Result;
2764 
2765  bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, AS,
2766  MemoryInst, Result, InsertedInsts,
2767  PromotedInsts, TPT).matchAddr(V, 0);
2768  (void)Success; assert(Success && "Couldn't select *anything*?");
2769  return Result;
2770  }
2771 private:
2772  bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
2773  bool matchAddr(Value *V, unsigned Depth);
2774  bool matchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth,
2775  bool *MovedAway = nullptr);
2776  bool isProfitableToFoldIntoAddressingMode(Instruction *I,
2777  ExtAddrMode &AMBefore,
2778  ExtAddrMode &AMAfter);
2779  bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
2780  bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
2781  Value *PromotedOperand) const;
2782 };
2783 
2784 /// Try adding ScaleReg*Scale to the current addressing mode.
2785 /// Return true and update AddrMode if this addr mode is legal for the target,
2786 /// false if not.
2787 bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
2788  unsigned Depth) {
2789  // If Scale is 1, then this is the same as adding ScaleReg to the addressing
2790  // mode. Just process that directly.
2791  if (Scale == 1)
2792  return matchAddr(ScaleReg, Depth);
2793 
2794  // If the scale is 0, it takes nothing to add this.
2795  if (Scale == 0)
2796  return true;
2797 
2798  // If we already have a scale of this value, we can add to it, otherwise, we
2799  // need an available scale field.
2800  if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
2801  return false;
2802 
2803  ExtAddrMode TestAddrMode = AddrMode;
2804 
2805  // Add scale to turn X*4+X*3 -> X*7. This could also do things like
2806  // [A+B + A*7] -> [B+A*8].
2807  TestAddrMode.Scale += Scale;
2808  TestAddrMode.ScaledReg = ScaleReg;
2809 
2810  // If the new address isn't legal, bail out.
2811  if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
2812  return false;
2813 
2814  // It was legal, so commit it.
2815  AddrMode = TestAddrMode;
2816 
2817  // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
2818  // to see if ScaleReg is actually X+C. If so, we can turn this into adding
2819  // X*Scale + C*Scale to addr mode.
2820  ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
2821  if (isa<Instruction>(ScaleReg) && // not a constant expr.
2822  match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
2823  TestAddrMode.ScaledReg = AddLHS;
2824  TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
2825 
2826  // If this addressing mode is legal, commit it and remember that we folded
2827  // this instruction.
2828  if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
2829  AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
2830  AddrMode = TestAddrMode;
2831  return true;
2832  }
2833  }
2834 
2835  // Otherwise, not (x+c)*scale, just return what we have.
2836  return true;
2837 }
2838 
2839 /// This is a little filter, which returns true if an addressing computation
2840 /// involving I might be folded into a load/store accessing it.
2841 /// This doesn't need to be perfect, but needs to accept at least
2842 /// the set of instructions that MatchOperationAddr can.
2843 static bool MightBeFoldableInst(Instruction *I) {
2844  switch (I->getOpcode()) {
2845  case Instruction::BitCast:
2846  case Instruction::AddrSpaceCast:
2847  // Don't touch identity bitcasts.
2848  if (I->getType() == I->getOperand(0)->getType())
2849  return false;
2850  return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
2851  case Instruction::PtrToInt:
2852  // PtrToInt is always a noop, as we know that the int type is pointer sized.
2853  return true;
2854  case Instruction::IntToPtr:
2855  // We know the input is intptr_t, so this is foldable.
2856  return true;
2857  case Instruction::Add:
2858  return true;
2859  case Instruction::Mul:
2860  case Instruction::Shl:
2861  // Can only handle X*C and X << C.
2862  return isa<ConstantInt>(I->getOperand(1));
2863  case Instruction::GetElementPtr:
2864  return true;
2865  default:
2866  return false;
2867  }
2868 }
2869 
2870 /// \brief Check whether or not \p Val is a legal instruction for \p TLI.
2871 /// \note \p Val is assumed to be the product of some type promotion.
2872 /// Therefore if \p Val has an undefined state in \p TLI, this is assumed
2873 /// to be legal, as the non-promoted value would have had the same state.
2874 static bool isPromotedInstructionLegal(const TargetLowering &TLI,
2875  const DataLayout &DL, Value *Val) {
2876  Instruction *PromotedInst = dyn_cast<Instruction>(Val);
2877  if (!PromotedInst)
2878  return false;
2879  int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
2880  // If the ISDOpcode is undefined, it was undefined before the promotion.
2881  if (!ISDOpcode)
2882  return true;
2883  // Otherwise, check if the promoted instruction is legal or not.
2884  return TLI.isOperationLegalOrCustom(
2885  ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
2886 }
2887 
2888 /// \brief Hepler class to perform type promotion.
2889 class TypePromotionHelper {
2890  /// \brief Utility function to check whether or not a sign or zero extension
2891  /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
2892  /// either using the operands of \p Inst or promoting \p Inst.
2893  /// The type of the extension is defined by \p IsSExt.
2894  /// In other words, check if:
2895  /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
2896  /// #1 Promotion applies:
2897  /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
2898  /// #2 Operand reuses:
2899  /// ext opnd1 to ConsideredExtType.
2900  /// \p PromotedInsts maps the instructions to their type before promotion.
2901  static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
2902  const InstrToOrigTy &PromotedInsts, bool IsSExt);
2903 
2904  /// \brief Utility function to determine if \p OpIdx should be promoted when
2905  /// promoting \p Inst.
2906  static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
2907  return !(isa<SelectInst>(Inst) && OpIdx == 0);
2908  }
2909 
2910  /// \brief Utility function to promote the operand of \p Ext when this
2911  /// operand is a promotable trunc or sext or zext.
2912  /// \p PromotedInsts maps the instructions to their type before promotion.
2913  /// \p CreatedInstsCost[out] contains the cost of all instructions
2914  /// created to promote the operand of Ext.
2915  /// Newly added extensions are inserted in \p Exts.
2916  /// Newly added truncates are inserted in \p Truncs.
2917  /// Should never be called directly.
2918  /// \return The promoted value which is used instead of Ext.
2919  static Value *promoteOperandForTruncAndAnyExt(
2920  Instruction *Ext, TypePromotionTransaction &TPT,
2921  InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
2923  SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
2924 
2925  /// \brief Utility function to promote the operand of \p Ext when this
2926  /// operand is promotable and is not a supported trunc or sext.
2927  /// \p PromotedInsts maps the instructions to their type before promotion.
2928  /// \p CreatedInstsCost[out] contains the cost of all the instructions
2929  /// created to promote the operand of Ext.
2930  /// Newly added extensions are inserted in \p Exts.
2931  /// Newly added truncates are inserted in \p Truncs.
2932  /// Should never be called directly.
2933  /// \return The promoted value which is used instead of Ext.
2934  static Value *promoteOperandForOther(Instruction *Ext,
2935  TypePromotionTransaction &TPT,
2936  InstrToOrigTy &PromotedInsts,
2937  unsigned &CreatedInstsCost,
2940  const TargetLowering &TLI, bool IsSExt);
2941 
2942  /// \see promoteOperandForOther.
2943  static Value *signExtendOperandForOther(
2944  Instruction *Ext, TypePromotionTransaction &TPT,
2945  InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
2947  SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
2948  return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
2949  Exts, Truncs, TLI, true);
2950  }
2951 
2952  /// \see promoteOperandForOther.
2953  static Value *zeroExtendOperandForOther(
2954  Instruction *Ext, TypePromotionTransaction &TPT,
2955  InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
2957  SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
2958  return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
2959  Exts, Truncs, TLI, false);
2960  }
2961 
2962 public:
2963  /// Type for the utility function that promotes the operand of Ext.
2964  typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
2965  InstrToOrigTy &PromotedInsts,
2966  unsigned &CreatedInstsCost,
2969  const TargetLowering &TLI);
2970  /// \brief Given a sign/zero extend instruction \p Ext, return the approriate
2971  /// action to promote the operand of \p Ext instead of using Ext.
2972  /// \return NULL if no promotable action is possible with the current
2973  /// sign extension.
2974  /// \p InsertedInsts keeps track of all the instructions inserted by the
2975  /// other CodeGenPrepare optimizations. This information is important
2976  /// because we do not want to promote these instructions as CodeGenPrepare
2977  /// will reinsert them later. Thus creating an infinite loop: create/remove.
2978  /// \p PromotedInsts maps the instructions to their type before promotion.
2979  static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
2980  const TargetLowering &TLI,
2981  const InstrToOrigTy &PromotedInsts);
2982 };
2983 
2984 bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
2985  Type *ConsideredExtType,
2986  const InstrToOrigTy &PromotedInsts,
2987  bool IsSExt) {
2988  // The promotion helper does not know how to deal with vector types yet.
2989  // To be able to fix that, we would need to fix the places where we
2990  // statically extend, e.g., constants and such.
2991  if (Inst->getType()->isVectorTy())
2992  return false;
2993 
2994  // We can always get through zext.
2995  if (isa<ZExtInst>(Inst))
2996  return true;
2997 
2998  // sext(sext) is ok too.
2999  if (IsSExt && isa<SExtInst>(Inst))
3000  return true;
3001 
3002  // We can get through binary operator, if it is legal. In other words, the
3003  // binary operator must have a nuw or nsw flag.
3004  const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
3005  if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
3006  ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
3007  (IsSExt && BinOp->hasNoSignedWrap())))
3008  return true;
3009 
3010  // Check if we can do the following simplification.
3011  // ext(trunc(opnd)) --> ext(opnd)
3012  if (!isa<TruncInst>(Inst))
3013  return false;
3014 
3015  Value *OpndVal = Inst->getOperand(0);
3016  // Check if we can use this operand in the extension.
3017  // If the type is larger than the result type of the extension, we cannot.
3018  if (!OpndVal->getType()->isIntegerTy() ||
3019  OpndVal->getType()->getIntegerBitWidth() >
3020  ConsideredExtType->getIntegerBitWidth())
3021  return false;
3022 
3023  // If the operand of the truncate is not an instruction, we will not have
3024  // any information on the dropped bits.
3025  // (Actually we could for constant but it is not worth the extra logic).
3026  Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
3027  if (!Opnd)
3028  return false;
3029 
3030  // Check if the source of the type is narrow enough.
3031  // I.e., check that trunc just drops extended bits of the same kind of
3032  // the extension.
3033  // #1 get the type of the operand and check the kind of the extended bits.
3034  const Type *OpndType;
3035  InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
3036  if (It != PromotedInsts.end() && It->second.getInt() == IsSExt)
3037  OpndType = It->second.getPointer();
3038  else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
3039  OpndType = Opnd->getOperand(0)->getType();
3040  else
3041  return false;
3042 
3043  // #2 check that the truncate just drops extended bits.
3044  return Inst->getType()->getIntegerBitWidth() >=
3045  OpndType->getIntegerBitWidth();
3046 }
3047 
3048 TypePromotionHelper::Action TypePromotionHelper::getAction(
3049  Instruction *Ext, const SetOfInstrs &InsertedInsts,
3050  const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
3051  assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
3052  "Unexpected instruction type");
3053  Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
3054  Type *ExtTy = Ext->getType();
3055  bool IsSExt = isa<SExtInst>(Ext);
3056  // If the operand of the extension is not an instruction, we cannot
3057  // get through.
3058  // If it, check we can get through.
3059  if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
3060  return nullptr;
3061 
3062  // Do not promote if the operand has been added by codegenprepare.
3063  // Otherwise, it means we are undoing an optimization that is likely to be
3064  // redone, thus causing potential infinite loop.
3065  if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
3066  return nullptr;
3067 
3068  // SExt or Trunc instructions.
3069  // Return the related handler.
3070  if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
3071  isa<ZExtInst>(ExtOpnd))
3072  return promoteOperandForTruncAndAnyExt;
3073 
3074  // Regular instruction.
3075  // Abort early if we will have to insert non-free instructions.
3076  if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
3077  return nullptr;
3078  return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
3079 }
3080 
3081 Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
3082  llvm::Instruction *SExt, TypePromotionTransaction &TPT,
3083  InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3085  SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3086  // By construction, the operand of SExt is an instruction. Otherwise we cannot
3087  // get through it and this method should not be called.
3088  Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
3089  Value *ExtVal = SExt;
3090  bool HasMergedNonFreeExt = false;
3091  if (isa<ZExtInst>(SExtOpnd)) {
3092  // Replace s|zext(zext(opnd))
3093  // => zext(opnd).
3094  HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
3095  Value *ZExt =
3096  TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
3097  TPT.replaceAllUsesWith(SExt, ZExt);
3098  TPT.eraseInstruction(SExt);
3099  ExtVal = ZExt;
3100  } else {
3101  // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
3102  // => z|sext(opnd).
3103  TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
3104  }
3105  CreatedInstsCost = 0;
3106 
3107  // Remove dead code.
3108  if (SExtOpnd->use_empty())
3109  TPT.eraseInstruction(SExtOpnd);
3110 
3111  // Check if the extension is still needed.
3112  Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
3113  if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
3114  if (ExtInst) {
3115  if (Exts)
3116  Exts->push_back(ExtInst);
3117  CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
3118  }
3119  return ExtVal;
3120  }
3121 
3122  // At this point we have: ext ty opnd to ty.
3123  // Reassign the uses of ExtInst to the opnd and remove ExtInst.
3124  Value *NextVal = ExtInst->getOperand(0);
3125  TPT.eraseInstruction(ExtInst, NextVal);
3126  return NextVal;
3127 }
3128 
3129 Value *TypePromotionHelper::promoteOperandForOther(
3130  Instruction *Ext, TypePromotionTransaction &TPT,
3131  InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3133  SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
3134  bool IsSExt) {
3135  // By construction, the operand of Ext is an instruction. Otherwise we cannot
3136  // get through it and this method should not be called.
3137  Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
3138  CreatedInstsCost = 0;
3139  if (!ExtOpnd->hasOneUse()) {
3140  // ExtOpnd will be promoted.
3141  // All its uses, but Ext, will need to use a truncated value of the
3142  // promoted version.
3143  // Create the truncate now.
3144  Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
3145  if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
3146  ITrunc->removeFromParent();
3147  // Insert it just after the definition.
3148  ITrunc->insertAfter(ExtOpnd);
3149  if (Truncs)
3150  Truncs->push_back(ITrunc);
3151  }
3152 
3153  TPT.replaceAllUsesWith(ExtOpnd, Trunc);
3154  // Restore the operand of Ext (which has been replaced by the previous call
3155  // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
3156  TPT.setOperand(Ext, 0, ExtOpnd);
3157  }
3158 
3159  // Get through the Instruction:
3160  // 1. Update its type.
3161  // 2. Replace the uses of Ext by Inst.
3162  // 3. Extend each operand that needs to be extended.
3163 
3164  // Remember the original type of the instruction before promotion.
3165  // This is useful to know that the high bits are sign extended bits.
3166  PromotedInsts.insert(std::pair<Instruction *, TypeIsSExt>(
3167  ExtOpnd, TypeIsSExt(ExtOpnd->getType(), IsSExt)));
3168  // Step #1.
3169  TPT.mutateType(ExtOpnd, Ext->getType());
3170  // Step #2.
3171  TPT.replaceAllUsesWith(Ext, ExtOpnd);
3172  // Step #3.
3173  Instruction *ExtForOpnd = Ext;
3174 
3175  DEBUG(dbgs() << "Propagate Ext to operands\n");
3176  for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
3177  ++OpIdx) {
3178  DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
3179  if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
3180  !shouldExtOperand(ExtOpnd, OpIdx)) {
3181  DEBUG(dbgs() << "No need to propagate\n");
3182  continue;
3183  }
3184  // Check if we can statically extend the operand.
3185  Value *Opnd = ExtOpnd->getOperand(OpIdx);
3186  if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
3187  DEBUG(dbgs() << "Statically extend\n");
3188  unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
3189  APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
3190  : Cst->getValue().zext(BitWidth);
3191  TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
3192  continue;
3193  }
3194  // UndefValue are typed, so we have to statically sign extend them.
3195  if (isa<UndefValue>(Opnd)) {
3196  DEBUG(dbgs() << "Statically extend\n");
3197  TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
3198  continue;
3199  }
3200 
3201  // Otherwise we have to explicity sign extend the operand.
3202  // Check if Ext was reused to extend an operand.
3203  if (!ExtForOpnd) {
3204  // If yes, create a new one.
3205  DEBUG(dbgs() << "More operands to ext\n");
3206  Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
3207  : TPT.createZExt(Ext, Opnd, Ext->getType());
3208  if (!isa<Instruction>(ValForExtOpnd)) {
3209  TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
3210  continue;
3211  }
3212  ExtForOpnd = cast<Instruction>(ValForExtOpnd);
3213  }
3214  if (Exts)
3215  Exts->push_back(ExtForOpnd);
3216  TPT.setOperand(ExtForOpnd, 0, Opnd);
3217 
3218  // Move the sign extension before the insertion point.
3219  TPT.moveBefore(ExtForOpnd, ExtOpnd);
3220  TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
3221  CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
3222  // If more sext are required, new instructions will have to be created.
3223  ExtForOpnd = nullptr;
3224  }
3225  if (ExtForOpnd == Ext) {
3226  DEBUG(dbgs() << "Extension is useless now\n");
3227  TPT.eraseInstruction(Ext);
3228  }
3229  return ExtOpnd;
3230 }
3231 
3232 /// Check whether or not promoting an instruction to a wider type is profitable.
3233 /// \p NewCost gives the cost of extension instructions created by the
3234 /// promotion.
3235 /// \p OldCost gives the cost of extension instructions before the promotion
3236 /// plus the number of instructions that have been
3237 /// matched in the addressing mode the promotion.
3238 /// \p PromotedOperand is the value that has been promoted.
3239 /// \return True if the promotion is profitable, false otherwise.
3240 bool AddressingModeMatcher::isPromotionProfitable(
3241  unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
3242  DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n');
3243  // The cost of the new extensions is greater than the cost of the
3244  // old extension plus what we folded.
3245  // This is not profitable.
3246  if (NewCost > OldCost)
3247  return false;
3248  if (NewCost < OldCost)
3249  return true;
3250  // The promotion is neutral but it may help folding the sign extension in
3251  // loads for instance.
3252  // Check that we did not create an illegal instruction.
3253  return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
3254 }
3255 
3256 /// Given an instruction or constant expr, see if we can fold the operation
3257 /// into the addressing mode. If so, update the addressing mode and return
3258 /// true, otherwise return false without modifying AddrMode.
3259 /// If \p MovedAway is not NULL, it contains the information of whether or
3260 /// not AddrInst has to be folded into the addressing mode on success.
3261 /// If \p MovedAway == true, \p AddrInst will not be part of the addressing
3262 /// because it has been moved away.
3263 /// Thus AddrInst must not be added in the matched instructions.
3264 /// This state can happen when AddrInst is a sext, since it may be moved away.
3265 /// Therefore, AddrInst may not be valid when MovedAway is true and it must
3266 /// not be referenced anymore.
3267 bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
3268  unsigned Depth,
3269  bool *MovedAway) {
3270  // Avoid exponential behavior on extremely deep expression trees.
3271  if (Depth >= 5) return false;
3272 
3273  // By default, all matched instructions stay in place.
3274  if (MovedAway)
3275  *MovedAway = false;
3276 
3277  switch (Opcode) {
3278  case Instruction::PtrToInt:
3279  // PtrToInt is always a noop, as we know that the int type is pointer sized.
3280  return matchAddr(AddrInst->getOperand(0), Depth);
3281  case Instruction::IntToPtr: {
3282  auto AS = AddrInst->getType()->getPointerAddressSpace();
3283  auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
3284  // This inttoptr is a no-op if the integer type is pointer sized.
3285  if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
3286  return matchAddr(AddrInst->getOperand(0), Depth);
3287  return false;
3288  }
3289  case Instruction::BitCast:
3290  // BitCast is always a noop, and we can handle it as long as it is
3291  // int->int or pointer->pointer (we don't want int<->fp or something).
3292  if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
3293  AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
3294  // Don't touch identity bitcasts. These were probably put here by LSR,
3295  // and we don't want to mess around with them. Assume it knows what it
3296  // is doing.
3297  AddrInst->getOperand(0)->getType() != AddrInst->getType())
3298  return matchAddr(AddrInst->getOperand(0), Depth);
3299  return false;
3300  case Instruction::AddrSpaceCast: {
3301  unsigned SrcAS
3302  = AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
3303  unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
3304  if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
3305  return matchAddr(AddrInst->getOperand(0), Depth);
3306  return false;
3307  }
3308  case Instruction::Add: {
3309  // Check to see if we can merge in the RHS then the LHS. If so, we win.
3310  ExtAddrMode BackupAddrMode = AddrMode;
3311  unsigned OldSize = AddrModeInsts.size();
3312  // Start a transaction at this point.
3313  // The LHS may match but not the RHS.
3314  // Therefore, we need a higher level restoration point to undo partially
3315  // matched operation.
3316  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
3317  TPT.getRestorationPoint();
3318 
3319  if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
3320  matchAddr(AddrInst->getOperand(0), Depth+1))
3321  return true;
3322 
3323  // Restore the old addr mode info.
3324  AddrMode = BackupAddrMode;
3325  AddrModeInsts.resize(OldSize);
3326  TPT.rollback(LastKnownGood);
3327 
3328  // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
3329  if (matchAddr(AddrInst->getOperand(0), Depth+1) &&
3330  matchAddr(AddrInst->getOperand(1), Depth+1))
3331  return true;
3332 
3333  // Otherwise we definitely can't merge the ADD in.
3334  AddrMode = BackupAddrMode;
3335  AddrModeInsts.resize(OldSize);
3336  TPT.rollback(LastKnownGood);
3337  break;
3338  }
3339  //case Instruction::Or:
3340  // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
3341  //break;
3342  case Instruction::Mul:
3343  case Instruction::Shl: {
3344  // Can only handle X*C and X << C.
3345  ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
3346  if (!RHS)
3347  return false;
3348  int64_t Scale = RHS->getSExtValue();
3349  if (Opcode == Instruction::Shl)
3350  Scale = 1LL << Scale;
3351 
3352  return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
3353  }
3354  case Instruction::GetElementPtr: {
3355  // Scan the GEP. We check it if it contains constant offsets and at most
3356  // one variable offset.
3357  int VariableOperand = -1;
3358  unsigned VariableScale = 0;
3359 
3360  int64_t ConstantOffset = 0;
3361  gep_type_iterator GTI = gep_type_begin(AddrInst);
3362  for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
3363  if (StructType *STy = GTI.getStructTypeOrNull()) {
3364  const StructLayout *SL = DL.getStructLayout(STy);
3365  unsigned Idx =
3366  cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
3367  ConstantOffset += SL->getElementOffset(Idx);
3368  } else {
3369  uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
3370  if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
3371  ConstantOffset += CI->getSExtValue()*TypeSize;
3372  } else if (TypeSize) { // Scales of zero don't do anything.
3373  // We only allow one variable index at the moment.
3374  if (VariableOperand != -1)
3375  return false;
3376 
3377  // Remember the variable index.
3378  VariableOperand = i;
3379  VariableScale = TypeSize;
3380  }
3381  }
3382  }
3383 
3384  // A common case is for the GEP to only do a constant offset. In this case,
3385  // just add it to the disp field and check validity.
3386  if (VariableOperand == -1) {
3387  AddrMode.BaseOffs += ConstantOffset;
3388  if (ConstantOffset == 0 ||
3389  TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
3390  // Check to see if we can fold the base pointer in too.
3391  if (matchAddr(AddrInst->getOperand(0), Depth+1))
3392  return true;
3393  }
3394  AddrMode.BaseOffs -= ConstantOffset;
3395  return false;
3396  }
3397 
3398  // Save the valid addressing mode in case we can't match.
3399  ExtAddrMode BackupAddrMode = AddrMode;
3400  unsigned OldSize = AddrModeInsts.size();
3401 
3402  // See if the scale and offset amount is valid for this target.
3403  AddrMode.BaseOffs += ConstantOffset;
3404 
3405  // Match the base operand of the GEP.
3406  if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
3407  // If it couldn't be matched, just stuff the value in a register.
3408  if (AddrMode.HasBaseReg) {
3409  AddrMode = BackupAddrMode;
3410  AddrModeInsts.resize(OldSize);
3411  return false;
3412  }
3413  AddrMode.HasBaseReg = true;
3414  AddrMode.BaseReg = AddrInst->getOperand(0);
3415  }
3416 
3417  // Match the remaining variable portion of the GEP.
3418  if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
3419  Depth)) {
3420  // If it couldn't be matched, try stuffing the base into a register
3421  // instead of matching it, and retrying the match of the scale.
3422  AddrMode = BackupAddrMode;
3423  AddrModeInsts.resize(OldSize);
3424  if (AddrMode.HasBaseReg)
3425  return false;
3426  AddrMode.HasBaseReg = true;
3427  AddrMode.BaseReg = AddrInst->getOperand(0);
3428  AddrMode.BaseOffs += ConstantOffset;
3429  if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
3430  VariableScale, Depth)) {
3431  // If even that didn't work, bail.
3432  AddrMode = BackupAddrMode;
3433  AddrModeInsts.resize(OldSize);
3434  return false;
3435  }
3436  }
3437 
3438  return true;
3439  }
3440  case Instruction::SExt:
3441  case Instruction::ZExt: {
3442  Instruction *Ext = dyn_cast<Instruction>(AddrInst);
3443  if (!Ext)
3444  return false;
3445 
3446  // Try to move this ext out of the way of the addressing mode.
3447  // Ask for a method for doing so.
3448  TypePromotionHelper::Action TPH =
3449  TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
3450  if (!TPH)
3451  return false;
3452 
3453  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
3454  TPT.getRestorationPoint();
3455  unsigned CreatedInstsCost = 0;
3456  unsigned ExtCost = !TLI.isExtFree(Ext);
3457  Value *PromotedOperand =
3458  TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
3459  // SExt has been moved away.
3460  // Thus either it will be rematched later in the recursive calls or it is
3461  // gone. Anyway, we must not fold it into the addressing mode at this point.
3462  // E.g.,
3463  // op = add opnd, 1
3464  // idx = ext op
3465  // addr = gep base, idx
3466  // is now:
3467  // promotedOpnd = ext opnd <- no match here
3468  // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
3469  // addr = gep base, op <- match
3470  if (MovedAway)
3471  *MovedAway = true;
3472 
3473  assert(PromotedOperand &&
3474  "TypePromotionHelper should have filtered out those cases");
3475 
3476  ExtAddrMode BackupAddrMode = AddrMode;
3477  unsigned OldSize = AddrModeInsts.size();
3478 
3479  if (!matchAddr(PromotedOperand, Depth) ||
3480  // The total of the new cost is equal to the cost of the created
3481  // instructions.
3482  // The total of the old cost is equal to the cost of the extension plus
3483  // what we have saved in the addressing mode.
3484  !isPromotionProfitable(CreatedInstsCost,
3485  ExtCost + (AddrModeInsts.size() - OldSize),
3486  PromotedOperand)) {
3487  AddrMode = BackupAddrMode;
3488  AddrModeInsts.resize(OldSize);
3489  DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
3490  TPT.rollback(LastKnownGood);
3491  return false;
3492  }
3493  return true;
3494  }
3495  }
3496  return false;
3497 }
3498 
3499 /// If we can, try to add the value of 'Addr' into the current addressing mode.
3500 /// If Addr can't be added to AddrMode this returns false and leaves AddrMode
3501 /// unmodified. This assumes that Addr is either a pointer type or intptr_t
3502 /// for the target.
3503 ///
3504 bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
3505  // Start a transaction at this point that we will rollback if the matching
3506  // fails.
3507  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
3508  TPT.getRestorationPoint();
3509  if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
3510  // Fold in immediates if legal for the target.
3511  AddrMode.BaseOffs += CI->getSExtValue();
3512  if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
3513  return true;
3514  AddrMode.BaseOffs -= CI->getSExtValue();
3515  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
3516  // If this is a global variable, try to fold it into the addressing mode.
3517  if (!AddrMode.BaseGV) {
3518  AddrMode.BaseGV = GV;
3519  if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
3520  return true;
3521  AddrMode.BaseGV = nullptr;
3522  }
3523  } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
3524  ExtAddrMode BackupAddrMode = AddrMode;
3525  unsigned OldSize = AddrModeInsts.size();
3526 
3527  // Check to see if it is possible to fold this operation.
3528  bool MovedAway = false;
3529  if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
3530  // This instruction may have been moved away. If so, there is nothing
3531  // to check here.
3532  if (MovedAway)
3533  return true;
3534  // Okay, it's possible to fold this. Check to see if it is actually
3535  // *profitable* to do so. We use a simple cost model to avoid increasing
3536  // register pressure too much.
3537  if (I->hasOneUse() ||
3538  isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
3539  AddrModeInsts.push_back(I);
3540  return true;
3541  }
3542 
3543  // It isn't profitable to do this, roll back.
3544  //cerr << "NOT FOLDING: " << *I;
3545  AddrMode = BackupAddrMode;
3546  AddrModeInsts.resize(OldSize);
3547  TPT.rollback(LastKnownGood);
3548  }
3549  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
3550  if (matchOperationAddr(CE, CE->getOpcode(), Depth))
3551  return true;
3552  TPT.rollback(LastKnownGood);
3553  } else if (isa<ConstantPointerNull>(Addr)) {
3554  // Null pointer gets folded without affecting the addressing mode.
3555  return true;
3556  }
3557 
3558  // Worse case, the target should support [reg] addressing modes. :)
3559  if (!AddrMode.HasBaseReg) {
3560  AddrMode.HasBaseReg = true;
3561  AddrMode.BaseReg = Addr;
3562  // Still check for legality in case the target supports [imm] but not [i+r].
3563  if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
3564  return true;
3565  AddrMode.HasBaseReg = false;
3566  AddrMode.BaseReg = nullptr;
3567  }
3568 
3569  // If the base register is already taken, see if we can do [r+r].
3570  if (AddrMode.Scale == 0) {
3571  AddrMode.Scale = 1;
3572  AddrMode.ScaledReg = Addr;
3573  if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
3574  return true;
3575  AddrMode.Scale = 0;
3576  AddrMode.ScaledReg = nullptr;
3577  }
3578  // Couldn't match.
3579  TPT.rollback(LastKnownGood);
3580  return false;
3581 }
3582 
3583 /// Check to see if all uses of OpVal by the specified inline asm call are due
3584 /// to memory operands. If so, return true, otherwise return false.
3585 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
3586  const TargetMachine &TM) {
3587  const Function *F = CI->getParent()->getParent();
3588  const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering();
3589  const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo();
3590  TargetLowering::AsmOperandInfoVector TargetConstraints =
3591  TLI->ParseConstraints(F->getParent()->getDataLayout(), TRI,
3592  ImmutableCallSite(CI));
3593  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
3594  TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
3595 
3596  // Compute the constraint code and ConstraintType to use.
3597  TLI->ComputeConstraintToUse(OpInfo, SDValue());
3598 
3599  // If this asm operand is our Value*, and if it isn't an indirect memory
3600  // operand, we can't fold it!
3601  if (OpInfo.CallOperandVal == OpVal &&
3603  !OpInfo.isIndirect))
3604  return false;
3605  }
3606 
3607  return true;
3608 }
3609 
3610 /// Recursively walk all the uses of I until we find a memory use.
3611 /// If we find an obviously non-foldable instruction, return true.
3612 /// Add the ultimately found memory instructions to MemoryUses.
3613 static bool FindAllMemoryUses(
3614  Instruction *I,
3615  SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
3616  SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetMachine &TM) {
3617  // If we already considered this instruction, we're done.
3618  if (!ConsideredInsts.insert(I).second)
3619  return false;
3620 
3621  // If this is an obviously unfoldable instruction, bail out.
3622  if (!MightBeFoldableInst(I))
3623  return true;
3624 
3625  const bool OptSize = I->getFunction()->optForSize();
3626 
3627  // Loop over all the uses, recursively processing them.
3628  for (Use &U : I->uses()) {
3629  Instruction *UserI = cast<Instruction>(U.getUser());
3630 
3631  if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
3632  MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
3633  continue;
3634  }
3635 
3636  if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
3637  unsigned opNo = U.getOperandNo();
3638  if (opNo == 0) return true; // Storing addr, not into addr.
3639  MemoryUses.push_back(std::make_pair(SI, opNo));
3640  continue;
3641  }
3642 
3643  if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
3644  // If this is a cold call, we can sink the addressing calculation into
3645  // the cold path. See optimizeCallInst
3646  if (!OptSize && CI->hasFnAttr(Attribute::Cold))
3647  continue;
3648 
3650  if (!IA) return true;
3651 
3652  // If this is a memory operand, we're cool, otherwise bail out.
3653  if (!IsOperandAMemoryOperand(CI, IA, I, TM))
3654  return true;
3655  continue;
3656  }
3657 
3658  if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TM))
3659  return true;
3660  }
3661 
3662  return false;
3663 }
3664 
3665 /// Return true if Val is already known to be live at the use site that we're
3666 /// folding it into. If so, there is no cost to include it in the addressing
3667 /// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
3668 /// instruction already.
3669 bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
3670  Value *KnownLive2) {
3671  // If Val is either of the known-live values, we know it is live!
3672  if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
3673  return true;
3674 
3675  // All values other than instructions and arguments (e.g. constants) are live.
3676  if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
3677 
3678  // If Val is a constant sized alloca in the entry block, it is live, this is
3679  // true because it is just a reference to the stack/frame pointer, which is
3680  // live for the whole function.
3681  if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
3682  if (AI->isStaticAlloca())
3683  return true;
3684 
3685  // Check to see if this value is already used in the memory instruction's
3686  // block. If so, it's already live into the block at the very least, so we
3687  // can reasonably fold it.
3688  return Val->isUsedInBasicBlock(MemoryInst->getParent());
3689 }
3690 
3691 /// It is possible for the addressing mode of the machine to fold the specified
3692 /// instruction into a load or store that ultimately uses it.
3693 /// However, the specified instruction has multiple uses.
3694 /// Given this, it may actually increase register pressure to fold it
3695 /// into the load. For example, consider this code:
3696 ///
3697 /// X = ...
3698 /// Y = X+1
3699 /// use(Y) -> nonload/store
3700 /// Z = Y+1
3701 /// load Z
3702 ///
3703 /// In this case, Y has multiple uses, and can be folded into the load of Z
3704 /// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
3705 /// be live at the use(Y) line. If we don't fold Y into load Z, we use one
3706 /// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
3707 /// number of computations either.
3708 ///
3709 /// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
3710 /// X was live across 'load Z' for other reasons, we actually *would* want to
3711 /// fold the addressing mode in the Z case. This would make Y die earlier.
3712 bool AddressingModeMatcher::
3713 isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
3714  ExtAddrMode &AMAfter) {
3715  if (IgnoreProfitability) return true;
3716 
3717  // AMBefore is the addressing mode before this instruction was folded into it,
3718  // and AMAfter is the addressing mode after the instruction was folded. Get
3719  // the set of registers referenced by AMAfter and subtract out those
3720  // referenced by AMBefore: this is the set of values which folding in this
3721  // address extends the lifetime of.
3722  //
3723  // Note that there are only two potential values being referenced here,
3724  // BaseReg and ScaleReg (global addresses are always available, as are any
3725  // folded immediates).
3726  Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
3727 
3728  // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
3729  // lifetime wasn't extended by adding this instruction.
3730  if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
3731  BaseReg = nullptr;
3732  if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
3733  ScaledReg = nullptr;
3734 
3735  // If folding this instruction (and it's subexprs) didn't extend any live
3736  // ranges, we're ok with it.
3737  if (!BaseReg && !ScaledReg)
3738  return true;
3739 
3740  // If all uses of this instruction can have the address mode sunk into them,
3741  // we can remove the addressing mode and effectively trade one live register
3742  // for another (at worst.) In this context, folding an addressing mode into
3743  // the use is just a particularly nice way of sinking it.
3745  SmallPtrSet<Instruction*, 16> ConsideredInsts;
3746  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM))
3747  return false; // Has a non-memory, non-foldable use!
3748 
3749  // Now that we know that all uses of this instruction are part of a chain of
3750  // computation involving only operations that could theoretically be folded
3751  // into a memory use, loop over each of these memory operation uses and see
3752  // if they could *actually* fold the instruction. The assumption is that
3753  // addressing modes are cheap and that duplicating the computation involved
3754  // many times is worthwhile, even on a fastpath. For sinking candidates
3755  // (i.e. cold call sites), this serves as a way to prevent excessive code
3756  // growth since most architectures have some reasonable small and fast way to
3757  // compute an effective address. (i.e LEA on x86)
3758  SmallVector<Instruction*, 32> MatchedAddrModeInsts;
3759  for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
3760  Instruction *User = MemoryUses[i].first;
3761  unsigned OpNo = MemoryUses[i].second;
3762 
3763  // Get the access type of this use. If the use isn't a pointer, we don't
3764  // know what it accesses.
3765  Value *Address = User->getOperand(OpNo);
3766  PointerType *AddrTy = dyn_cast<PointerType>(Address->getType());
3767  if (!AddrTy)
3768  return false;
3769  Type *AddressAccessTy = AddrTy->getElementType();
3770  unsigned AS = AddrTy->getAddressSpace();
3771 
3772  // Do a match against the root of this address, ignoring profitability. This
3773  // will tell us if the addressing mode for the memory operation will
3774  // *actually* cover the shared instruction.
3775  ExtAddrMode Result;
3776  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
3777  TPT.getRestorationPoint();
3778  AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy, AS,
3779  MemoryInst, Result, InsertedInsts,
3780  PromotedInsts, TPT);
3781  Matcher.IgnoreProfitability = true;
3782  bool Success = Matcher.matchAddr(Address, 0);
3783  (void)Success; assert(Success && "Couldn't select *anything*?");
3784 
3785  // The match was to check the profitability, the changes made are not
3786  // part of the original matcher. Therefore, they should be dropped
3787  // otherwise the original matcher will not present the right state.
3788  TPT.rollback(LastKnownGood);
3789 
3790  // If the match didn't cover I, then it won't be shared by it.
3791  if (!is_contained(MatchedAddrModeInsts, I))
3792  return false;
3793 
3794  MatchedAddrModeInsts.clear();
3795  }
3796 
3797  return true;
3798 }
3799 
3800 } // end anonymous namespace
3801 
3802 /// Return true if the specified values are defined in a
3803 /// different basic block than BB.
3804 static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
3805  if (Instruction *I = dyn_cast<Instruction>(V))
3806  return I->getParent() != BB;
3807  return false;
3808 }
3809 
3810 /// Sink addressing mode computation immediate before MemoryInst if doing so
3811 /// can be done without increasing register pressure. The need for the
3812 /// register pressure constraint means this can end up being an all or nothing
3813 /// decision for all uses of the same addressing computation.
3814 ///
3815 /// Load and Store Instructions often have addressing modes that can do
3816 /// significant amounts of computation. As such, instruction selection will try
3817 /// to get the load or store to do as much computation as possible for the
3818 /// program. The problem is that isel can only see within a single block. As
3819 /// such, we sink as much legal addressing mode work into the block as possible.
3820 ///
3821 /// This method is used to optimize both load/store and inline asms with memory
3822 /// operands. It's also used to sink addressing computations feeding into cold
3823 /// call sites into their (cold) basic block.
3824 ///
3825 /// The motivation for handling sinking into cold blocks is that doing so can
3826 /// both enable other address mode sinking (by satisfying the register pressure
3827 /// constraint above), and reduce register pressure globally (by removing the
3828 /// addressing mode computation from the fast path entirely.).
3829 bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
3830  Type *AccessTy, unsigned AddrSpace) {
3831  Value *Repl = Addr;
3832 
3833  // Try to collapse single-value PHI nodes. This is necessary to undo
3834  // unprofitable PRE transformations.
3835  SmallVector<Value*, 8> worklist;
3836  SmallPtrSet<Value*, 16> Visited;
3837  worklist.push_back(Addr);
3838 
3839  // Use a worklist to iteratively look through PHI nodes, and ensure that
3840  // the addressing mode obtained from the non-PHI roots of the graph
3841  // are equivalent.
3842  Value *Consensus = nullptr;
3843  unsigned NumUsesConsensus = 0;
3844  bool IsNumUsesConsensusValid = false;
3845  SmallVector<Instruction*, 16> AddrModeInsts;
3846  ExtAddrMode AddrMode;
3847  TypePromotionTransaction TPT;
3848  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
3849  TPT.getRestorationPoint();
3850  while (!worklist.empty()) {
3851  Value *V = worklist.back();
3852  worklist.pop_back();
3853 
3854  // Break use-def graph loops.
3855  if (!Visited.insert(V).second) {
3856  Consensus = nullptr;
3857  break;
3858  }
3859 
3860  // For a PHI node, push all of its incoming values.
3861  if (PHINode *P = dyn_cast<PHINode>(V)) {
3862  for (Value *IncValue : P->incoming_values())
3863  worklist.push_back(IncValue);
3864  continue;
3865  }
3866 
3867  // For non-PHIs, determine the addressing mode being computed. Note that
3868  // the result may differ depending on what other uses our candidate
3869  // addressing instructions might have.
3870  SmallVector<Instruction*, 16> NewAddrModeInsts;
3871  ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
3872  V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TM,
3873  InsertedInsts, PromotedInsts, TPT);
3874 
3875  // This check is broken into two cases with very similar code to avoid using
3876  // getNumUses() as much as possible. Some values have a lot of uses, so
3877  // calling getNumUses() unconditionally caused a significant compile-time
3878  // regression.
3879  if (!Consensus) {
3880  Consensus = V;
3881  AddrMode = NewAddrMode;
3882  AddrModeInsts = NewAddrModeInsts;
3883  continue;
3884  } else if (NewAddrMode == AddrMode) {
3885  if (!IsNumUsesConsensusValid) {
3886  NumUsesConsensus = Consensus->getNumUses();
3887  IsNumUsesConsensusValid = true;
3888  }
3889 
3890  // Ensure that the obtained addressing mode is equivalent to that obtained
3891  // for all other roots of the PHI traversal. Also, when choosing one
3892  // such root as representative, select the one with the most uses in order
3893  // to keep the cost modeling heuristics in AddressingModeMatcher
3894  // applicable.
3895  unsigned NumUses = V->getNumUses();
3896  if (NumUses > NumUsesConsensus) {
3897  Consensus = V;
3898  NumUsesConsensus = NumUses;
3899  AddrModeInsts = NewAddrModeInsts;
3900  }
3901  continue;
3902  }
3903 
3904  Consensus = nullptr;
3905  break;
3906  }
3907 
3908  // If the addressing mode couldn't be determined, or if multiple different
3909  // ones were determined, bail out now.
3910  if (!Consensus) {
3911  TPT.rollback(LastKnownGood);
3912  return false;
3913  }
3914  TPT.commit();
3915 
3916  // If all the instructions matched are already in this BB, don't do anything.
3917  if (none_of(AddrModeInsts, [&](Value *V) {
3918  return IsNonLocalValue(V, MemoryInst->getParent());
3919  })) {
3920  DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n");
3921  return false;
3922  }
3923 
3924  // Insert this computation right after this user. Since our caller is
3925  // scanning from the top of the BB to the bottom, reuse of the expr are
3926  // guaranteed to happen later.
3927  IRBuilder<> Builder(MemoryInst);
3928 
3929  // Now that we determined the addressing expression we want to use and know
3930  // that we have to sink it into this block. Check to see if we have already
3931  // done this for some other load/store instr in this block. If so, reuse the
3932  // computation.
3933  Value *&SunkAddr = SunkAddrs[Addr];
3934  if (SunkAddr) {
3935  DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
3936  << *MemoryInst << "\n");
3937  if (SunkAddr->getType() != Addr->getType())
3938  SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
3939  } else if (AddrSinkUsingGEPs ||
3940  (!AddrSinkUsingGEPs.getNumOccurrences() && TM &&
3941  TM->getSubtargetImpl(*MemoryInst->getParent()->getParent())
3942  ->useAA())) {
3943  // By default, we use the GEP-based method when AA is used later. This
3944  // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
3945  DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
3946  << *MemoryInst << "\n");
3947  Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
3948  Value *ResultPtr = nullptr, *ResultIndex = nullptr;
3949 
3950  // First, find the pointer.
3951  if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
3952  ResultPtr = AddrMode.BaseReg;
3953  AddrMode.BaseReg = nullptr;
3954  }
3955 
3956  if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
3957  // We can't add more than one pointer together, nor can we scale a
3958  // pointer (both of which seem meaningless).
3959  if (ResultPtr || AddrMode.Scale != 1)
3960  return false;
3961 
3962  ResultPtr = AddrMode.ScaledReg;
3963  AddrMode.Scale = 0;
3964  }
3965 
3966  if (AddrMode.BaseGV) {
3967  if (ResultPtr)
3968  return false;
3969 
3970  ResultPtr = AddrMode.BaseGV;
3971  }
3972 
3973  // If the real base value actually came from an inttoptr, then the matcher
3974  // will look through it and provide only the integer value. In that case,
3975  // use it here.
3976  if (!ResultPtr && AddrMode.BaseReg) {
3977  ResultPtr =
3978  Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr");
3979  AddrMode.BaseReg = nullptr;
3980  } else if (!ResultPtr && AddrMode.Scale == 1) {
3981  ResultPtr =
3982  Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr");
3983  AddrMode.Scale = 0;
3984  }
3985 
3986  if (!ResultPtr &&
3987  !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
3988  SunkAddr = Constant::getNullValue(Addr->getType());
3989  } else if (!ResultPtr) {
3990  return false;
3991  } else {
3992  Type *I8PtrTy =
3993  Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
3994  Type *I8Ty = Builder.getInt8Ty();
3995 
3996  // Start with the base register. Do this first so that subsequent address
3997  // matching finds it last, which will prevent it from trying to match it
3998  // as the scaled value in case it happens to be a mul. That would be
3999  // problematic if we've sunk a different mul for the scale, because then
4000  // we'd end up sinking both muls.
4001  if (AddrMode.BaseReg) {
4002  Value *V = AddrMode.BaseReg;
4003  if (V->getType() != IntPtrTy)
4004  V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
4005 
4006  ResultIndex = V;
4007  }
4008 
4009  // Add the scale value.
4010  if (AddrMode.Scale) {
4011  Value *V = AddrMode.ScaledReg;
4012  if (V->getType() == IntPtrTy) {
4013  // done.
4014  } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
4015  cast<IntegerType>(V->getType())->getBitWidth()) {
4016  V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
4017  } else {
4018  // It is only safe to sign extend the BaseReg if we know that the math
4019  // required to create it did not overflow before we extend it. Since
4020  // the original IR value was tossed in favor of a constant back when
4021  // the AddrMode was created we need to bail out gracefully if widths
4022  // do not match instead of extending it.
4023  Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex);
4024  if (I && (ResultIndex != AddrMode.BaseReg))
4025  I->eraseFromParent();
4026  return false;
4027  }
4028 
4029  if (AddrMode.Scale != 1)
4030  V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
4031  "sunkaddr");
4032  if (ResultIndex)
4033  ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
4034  else
4035  ResultIndex = V;
4036  }
4037 
4038  // Add in the Base Offset if present.
4039  if (AddrMode.BaseOffs) {
4040  Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
4041  if (ResultIndex) {
4042  // We need to add this separately from the scale above to help with
4043  // SDAG consecutive load/store merging.
4044  if (ResultPtr->getType() != I8PtrTy)
4045  ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
4046  ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
4047  }
4048 
4049  ResultIndex = V;
4050  }
4051 
4052  if (!ResultIndex) {
4053  SunkAddr = ResultPtr;
4054  } else {
4055  if (ResultPtr->getType() != I8PtrTy)
4056  ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
4057  SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
4058  }
4059 
4060  if (SunkAddr->getType() != Addr->getType())
4061  SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
4062  }
4063  } else {
4064  DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
4065  << *MemoryInst << "\n");
4066  Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
4067  Value *Result = nullptr;
4068 
4069  // Start with the base register. Do this first so that subsequent address
4070  // matching finds it last, which will prevent it from trying to match it
4071  // as the scaled value in case it happens to be a mul. That would be
4072  // problematic if we've sunk a different mul for the scale, because then
4073  // we'd end up sinking both muls.
4074  if (AddrMode.BaseReg) {
4075  Value *V = AddrMode.BaseReg;
4076  if (V->getType()->isPointerTy())
4077  V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
4078  if (V->getType() != IntPtrTy)
4079  V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
4080  Result = V;
4081  }
4082 
4083  // Add the scale value.
4084  if (AddrMode.Scale) {
4085  Value *V = AddrMode.ScaledReg;
4086  if (V->getType() == IntPtrTy) {
4087  // done.
4088  } else if (V->getType()->isPointerTy()) {
4089  V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
4090  } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
4091  cast<IntegerType>(V->getType())->getBitWidth()) {
4092  V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
4093  } else {
4094  // It is only safe to sign extend the BaseReg if we know that the math
4095  // required to create it did not overflow before we extend it. Since
4096  // the original IR value was tossed in favor of a constant back when
4097  // the AddrMode was created we need to bail out gracefully if widths
4098  // do not match instead of extending it.
4099  Instruction *I = dyn_cast_or_null<Instruction>(Result);
4100  if (I && (Result != AddrMode.BaseReg))
4101  I->eraseFromParent();
4102  return false;
4103  }
4104  if (AddrMode.Scale != 1)
4105  V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
4106  "sunkaddr");
4107  if (Result)
4108  Result = Builder.CreateAdd(Result, V, "sunkaddr");
4109  else
4110  Result = V;
4111  }
4112 
4113  // Add in the BaseGV if present.
4114  if (AddrMode.BaseGV) {
4115  Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
4116  if (Result)
4117  Result = Builder.CreateAdd(Result, V, "sunkaddr");
4118  else
4119  Result = V;
4120  }
4121 
4122  // Add in the Base Offset if present.
4123  if (AddrMode.BaseOffs) {
4124  Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
4125  if (Result)
4126  Result = Builder.CreateAdd(Result, V, "sunkaddr");
4127  else
4128  Result = V;
4129  }
4130 
4131  if (!Result)
4132  SunkAddr = Constant::getNullValue(Addr->getType());
4133  else
4134  SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
4135  }
4136 
4137  MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
4138 
4139  // If we have no uses, recursively delete the value and all dead instructions
4140  // using it.
4141  if (Repl->use_empty()) {
4142  // This can cause recursive deletion, which can invalidate our iterator.
4143  // Use a WeakVH to hold onto it in case this happens.
4144  Value *CurValue = &*CurInstIterator;
4145  WeakVH IterHandle(CurValue);
4146  BasicBlock *BB = CurInstIterator->getParent();
4147 
4149 
4150  if (IterHandle != CurValue) {
4151  // If the iterator instruction was recursively deleted, start over at the
4152  // start of the block.
4153  CurInstIterator = BB->begin();
4154  SunkAddrs.clear();
4155  }
4156  }
4157  ++NumMemoryInsts;
4158  return true;
4159 }
4160 
4161 /// If there are any memory operands, use OptimizeMemoryInst to sink their
4162 /// address computing into the block when possible / profitable.
4163 bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
4164  bool MadeChange = false;
4165 
4166  const TargetRegisterInfo *TRI =
4167  TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo();
4168  TargetLowering::AsmOperandInfoVector TargetConstraints =
4169  TLI->ParseConstraints(*DL, TRI, CS);
4170  unsigned ArgNo = 0;
4171  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
4172  TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
4173 
4174  // Compute the constraint code and ConstraintType to use.
4175  TLI->ComputeConstraintToUse(OpInfo, SDValue());
4176 
4177  if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
4178  OpInfo.isIndirect) {
4179  Value *OpVal = CS->getArgOperand(ArgNo++);
4180  MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
4181  } else if (OpInfo.Type == InlineAsm::isInput)
4182  ArgNo++;
4183  }
4184 
4185  return MadeChange;
4186 }
4187 
4188 /// \brief Check if all the uses of \p Inst are equivalent (or free) zero or
4189 /// sign extensions.
4190 static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
4191  assert(!Inst->use_empty() && "Input must have at least one use");
4192  const Instruction *FirstUser = cast<Instruction>(*Inst->user_begin());
4193  bool IsSExt = isa<SExtInst>(FirstUser);
4194  Type *ExtTy = FirstUser->getType();
4195  for (const User *U : Inst->users()) {
4196  const Instruction *UI = cast<Instruction>(U);
4197  if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
4198  return false;
4199  Type *CurTy = UI->getType();
4200  // Same input and output types: Same instruction after CSE.
4201  if (CurTy == ExtTy)
4202  continue;
4203 
4204  // If IsSExt is true, we are in this situation:
4205  // a = Inst
4206  // b = sext ty1 a to ty2
4207  // c = sext ty1 a to ty3
4208  // Assuming ty2 is shorter than ty3, this could be turned into:
4209  // a = Inst
4210  // b = sext ty1 a to ty2
4211  // c = sext ty2 b to ty3
4212  // However, the last sext is not free.
4213  if (IsSExt)
4214  return false;
4215 
4216  // This is a ZExt, maybe this is free to extend from one type to another.
4217  // In that case, we would not account for a different use.
4218  Type *NarrowTy;
4219  Type *LargeTy;
4220  if (ExtTy->getScalarType()->getIntegerBitWidth() >
4221  CurTy->getScalarType()->getIntegerBitWidth()) {
4222  NarrowTy = CurTy;
4223  LargeTy = ExtTy;
4224  } else {
4225  NarrowTy = ExtTy;
4226  LargeTy = CurTy;
4227  }
4228 
4229  if (!TLI.isZExtFree(NarrowTy, LargeTy))
4230  return false;
4231  }
4232  // All uses are the same or can be derived from one another for free.
4233  return true;
4234 }
4235 
4236 /// \brief Try to form ExtLd by promoting \p Exts until they reach a
4237 /// load instruction.
4238 /// If an ext(load) can be formed, it is returned via \p LI for the load
4239 /// and \p Inst for the extension.
4240 /// Otherwise LI == nullptr and Inst == nullptr.
4241 /// When some promotion happened, \p TPT contains the proper state to
4242 /// revert them.
4243 ///
4244 /// \return true when promoting was necessary to expose the ext(load)
4245 /// opportunity, false otherwise.
4246 ///
4247 /// Example:
4248 /// \code
4249 /// %ld = load i32* %addr
4250 /// %add = add nuw i32 %ld, 4
4251 /// %zext = zext i32 %add to i64
4252 /// \endcode
4253 /// =>
4254 /// \code
4255 /// %ld = load i32* %addr
4256 /// %zext = zext i32 %ld to i64
4257 /// %add = add nuw i64 %zext, 4
4258 /// \encode
4259 /// Thanks to the promotion, we can match zext(load i32*) to i64.
4260 bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT,
4261  LoadInst *&LI, Instruction *&Inst,
4262  const SmallVectorImpl<Instruction *> &Exts,
4263  unsigned CreatedInstsCost = 0) {
4264  // Iterate over all the extensions to see if one form an ext(load).
4265  for (auto I : Exts) {
4266  // Check if we directly have ext(load).
4267  if ((LI = dyn_cast<LoadInst>(I->getOperand(0)))) {
4268  Inst = I;
4269  // No promotion happened here.
4270  return false;
4271  }
4272  // Check whether or not we want to do any promotion.
4273  if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
4274  continue;
4275  // Get the action to perform the promotion.
4276  TypePromotionHelper::Action TPH = TypePromotionHelper::getAction(
4277  I, InsertedInsts, *TLI, PromotedInsts);
4278  // Check if we can promote.
4279  if (!TPH)
4280  continue;
4281  // Save the current state.
4282  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4283  TPT.getRestorationPoint();
4285  unsigned NewCreatedInstsCost = 0;
4286  unsigned ExtCost = !TLI->isExtFree(I);
4287  // Promote.
4288  Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
4289  &NewExts, nullptr, *TLI);
4290  assert(PromotedVal &&
4291  "TypePromotionHelper should have filtered out those cases");
4292 
4293  // We would be able to merge only one extension in a load.
4294  // Therefore, if we have more than 1 new extension we heuristically
4295  // cut this search path, because it means we degrade the code quality.
4296  // With exactly 2, the transformation is neutral, because we will merge
4297  // one extension but leave one. However, we optimistically keep going,
4298  // because the new extension may be removed too.
4299  long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
4300  TotalCreatedInstsCost -= ExtCost;
4301  if (!StressExtLdPromotion &&
4302  (TotalCreatedInstsCost > 1 ||
4303  !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
4304  // The promotion is not profitable, rollback to the previous state.
4305  TPT.rollback(LastKnownGood);
4306  continue;
4307  }
4308  // The promotion is profitable.
4309  // Check if it exposes an ext(load).
4310  (void)extLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
4311  if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
4312  // If we have created a new extension, i.e., now we have two
4313  // extensions. We must make sure one of them is merged with
4314  // the load, otherwise we may degrade the code quality.
4315  (LI->hasOneUse() || hasSameExtUse(LI, *TLI))))
4316  // Promotion happened.
4317  return true;
4318  // If this does not help to expose an ext(load) then, rollback.
4319  TPT.rollback(LastKnownGood);
4320  }
4321  // None of the extension can form an ext(load).
4322  LI = nullptr;
4323  Inst = nullptr;
4324  return false;
4325 }
4326 
4327 /// Move a zext or sext fed by a load into the same basic block as the load,
4328 /// unless conditions are unfavorable. This allows SelectionDAG to fold the
4329 /// extend into the load.
4330 /// \p I[in/out] the extension may be modified during the process if some
4331 /// promotions apply.
4332 ///
4333 bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
4334  // ExtLoad formation infrastructure requires TLI to be effective.
4335  if (!TLI)
4336  return false;
4337 
4338  // Try to promote a chain of computation if it allows to form
4339  // an extended load.
4340  TypePromotionTransaction TPT;
4341  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4342  TPT.getRestorationPoint();
4344  Exts.push_back(I);
4345  // Look for a load being extended.
4346  LoadInst *LI = nullptr;
4347  Instruction *OldExt = I;
4348  bool HasPromoted = extLdPromotion(TPT, LI, I, Exts);
4349  if (!LI || !I) {
4350  assert(!HasPromoted && !LI && "If we did not match any load instruction "
4351  "the code must remain the same");
4352  I = OldExt;
4353  return false;
4354  }
4355 
4356  // If they're already in the same block, there's nothing to do.
4357  // Make the cheap checks first if we did not promote.
4358  // If we promoted, we need to check if it is indeed profitable.
4359  if (!HasPromoted && LI->getParent() == I->getParent())
4360  return false;
4361 
4362  EVT VT = TLI->getValueType(*DL, I->getType());
4363  EVT LoadVT = TLI->getValueType(*DL, LI->getType());
4364 
4365  // If the load has other users and the truncate is not free, this probably
4366  // isn't worthwhile.
4367  if (!LI->hasOneUse() &&
4368  (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
4369  !TLI->isTruncateFree(I->getType(), LI->getType())) {
4370  I = OldExt;
4371  TPT.rollback(LastKnownGood);
4372  return false;
4373  }
4374 
4375  // Check whether the target supports casts folded into loads.
4376  unsigned LType;
4377  if (isa<ZExtInst>(I))
4378  LType = ISD::ZEXTLOAD;
4379  else {
4380  assert(isa<SExtInst>(I) && "Unexpected ext type!");
4381  LType = ISD::SEXTLOAD;
4382  }
4383  if (!TLI->isLoadExtLegal(LType, VT, LoadVT)) {
4384  I = OldExt;
4385  TPT.rollback(LastKnownGood);
4386  return false;
4387  }
4388 
4389  // Move the extend into the same block as the load, so that SelectionDAG
4390  // can fold it.
4391  TPT.commit();
4392  I->removeFromParent();
4393  I->insertAfter(LI);
4394  // CGP does not check if the zext would be speculatively executed when moved
4395  // to the same basic block as the load. Preserving its original location would
4396  // pessimize the debugging experience, as well as negatively impact the
4397  // quality of sample pgo. We don't want to use "line 0" as that has a
4398  // size cost in the line-table section and logically the zext can be seen as
4399  // part of the load. Therefore we conservatively reuse the same debug location
4400  // for the load and the zext.
4401  I->setDebugLoc(LI->getDebugLoc());
4402  ++NumExtsMoved;
4403  return true;
4404 }
4405 
4406 bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
4407  BasicBlock *DefBB = I->getParent();
4408 
4409  // If the result of a {s|z}ext and its source are both live out, rewrite all
4410  // other uses of the source with result of extension.
4411  Value *Src = I->getOperand(0);
4412  if (Src->hasOneUse())
4413  return false;
4414 
4415  // Only do this xform if truncating is free.
4416  if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
4417  return false;
4418 
4419  // Only safe to perform the optimization if the source is also defined in
4420  // this block.
4421  if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
4422  return false;
4423 
4424  bool DefIsLiveOut = false;
4425  for (User *U : I->users()) {
4426  Instruction *UI = cast<Instruction>(U);
4427 
4428  // Figure out which BB this ext is used in.
4429  BasicBlock *UserBB = UI->getParent();
4430  if (UserBB == DefBB) continue;
4431  DefIsLiveOut = true;
4432  break;
4433  }
4434  if (!DefIsLiveOut)
4435  return false;
4436 
4437  // Make sure none of the uses are PHI nodes.
4438  for (User *U : Src->users()) {
4439  Instruction *UI = cast<Instruction>(U);
4440  BasicBlock *UserBB = UI->getParent();
4441  if (UserBB == DefBB) continue;
4442  // Be conservative. We don't want this xform to end up introducing
4443  // reloads just before load / store instructions.
4444  if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
4445  return false;
4446  }
4447 
4448  // InsertedTruncs - Only insert one trunc in each block once.
4449  DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
4450 
4451  bool MadeChange = false;
4452  for (Use &U : Src->uses()) {
4453  Instruction *User = cast<Instruction>(U.getUser());
4454 
4455  // Figure out which BB this ext is used in.
4456  BasicBlock *UserBB = User->getParent();
4457  if (UserBB == DefBB) continue;
4458 
4459  // Both src and def are live in this block. Rewrite the use.
4460  Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
4461 
4462  if (!InsertedTrunc) {
4463  BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
4464  assert(InsertPt != UserBB->end());
4465  InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt);
4466  InsertedInsts.insert(InsertedTrunc);
4467  }
4468 
4469  // Replace a use of the {s|z}ext source with a use of the result.
4470  U = InsertedTrunc;
4471  ++NumExtUses;
4472  MadeChange = true;
4473  }
4474 
4475  return MadeChange;
4476 }
4477 
4478 // Find loads whose uses only use some of the loaded value's bits. Add an "and"
4479 // just after the load if the target can fold this into one extload instruction,
4480 // with the hope of eliminating some of the other later "and" instructions using
4481 // the loaded value. "and"s that are made trivially redundant by the insertion
4482 // of the new "and" are removed by this function, while others (e.g. those whose
4483 // path from the load goes through a phi) are left for isel to potentially
4484 // remove.
4485 //
4486 // For example:
4487 //
4488 // b0:
4489 // x = load i32
4490 // ...
4491 // b1:
4492 // y = and x, 0xff
4493 // z = use y
4494 //
4495 // becomes:
4496 //
4497 // b0:
4498 // x = load i32
4499 // x' = and x, 0xff
4500 // ...
4501 // b1:
4502 // z = use x'
4503 //
4504 // whereas:
4505 //
4506 // b0:
4507 // x1 = load i32
4508 // ...
4509 // b1:
4510 // x2 = load i32
4511 // ...
4512 // b2:
4513 // x = phi x1, x2
4514 // y = and x, 0xff
4515 //
4516 // becomes (after a call to optimizeLoadExt for each load):
4517 //
4518 // b0:
4519 // x1 = load i32
4520 // x1' = and x1, 0xff
4521 // ...
4522 // b1:
4523 // x2 = load i32
4524 // x2' = and x2, 0xff
4525 // ...
4526 // b2:
4527 // x = phi x1', x2'
4528 // y = and x, 0xff
4529 //
4530 
4531 bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
4532 
4533  if (!Load->isSimple() ||
4534  !(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy()))
4535  return false;
4536 
4537  // Skip loads we've already transformed or have no reason to transform.
4538  if (Load->hasOneUse()) {
4539  User *LoadUser = *Load->user_begin();
4540  if (cast<Instruction>(LoadUser)->getParent() == Load->getParent() &&
4541  !dyn_cast<PHINode>(LoadUser))
4542  return false;
4543  }
4544 
4545  // Look at all uses of Load, looking through phis, to determine how many bits
4546  // of the loaded value are needed.
4549  SmallVector<Instruction *, 8> AndsToMaybeRemove;
4550  for (auto *U : Load->users())
4551  WorkList.push_back(cast<Instruction>(U));
4552 
4553  EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
4554  unsigned BitWidth = LoadResultVT.getSizeInBits();
4555  APInt DemandBits(BitWidth, 0);
4556  APInt WidestAndBits(BitWidth, 0);
4557 
4558  while (!WorkList.empty()) {
4559  Instruction *I = WorkList.back();
4560  WorkList.pop_back();
4561 
4562  // Break use-def graph loops.
4563  if (!Visited.insert(I).second)
4564  continue;
4565 
4566  // For a PHI node, push all of its users.
4567  if (auto *Phi = dyn_cast<PHINode>(I)) {
4568  for (auto *U : Phi->users())
4569  WorkList.push_back(cast<Instruction>(U));
4570  continue;
4571  }
4572 
4573  switch (I->getOpcode()) {
4574  case llvm::Instruction::And: {
4575  auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
4576  if (!AndC)
4577  return false;
4578  APInt AndBits = AndC->getValue();
4579  DemandBits |= AndBits;
4580  // Keep track of the widest and mask we see.
4581  if (AndBits.ugt(WidestAndBits))
4582  WidestAndBits = AndBits;
4583  if (AndBits == WidestAndBits && I->getOperand(0) == Load)
4584  AndsToMaybeRemove.push_back(I);
4585  break;
4586  }
4587 
4588  case llvm::Instruction::Shl: {
4589  auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
4590  if (!ShlC)
4591  return false;
4592  uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
4593  auto ShlDemandBits = APInt::getAllOnesValue(BitWidth).lshr(ShiftAmt);
4594  DemandBits |= ShlDemandBits;
4595  break;
4596  }
4597 
4598  case llvm::Instruction::Trunc: {
4599  EVT TruncVT = TLI->getValueType(*DL, I->getType());
4600  unsigned TruncBitWidth = TruncVT.getSizeInBits();
4601  auto TruncBits = APInt::getAllOnesValue(TruncBitWidth).zext(BitWidth);
4602  DemandBits |= TruncBits;
4603  break;
4604  }
4605 
4606  default:
4607  return false;
4608  }
4609  }
4610 
4611  uint32_t ActiveBits = DemandBits.getActiveBits();
4612  // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
4613  // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
4614  // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
4615  // (and (load x) 1) is not matched as a single instruction, rather as a LDR
4616  // followed by an AND.
4617  // TODO: Look into removing this restriction by fixing backends to either
4618  // return false for isLoadExtLegal for i1 or have them select this pattern to
4619  // a single instruction.
4620  //
4621  // Also avoid hoisting if we didn't see any ands with the exact DemandBits
4622  // mask, since these are the only ands that will be removed by isel.
4623  if (ActiveBits <= 1 || !APIntOps::isMask(ActiveBits, DemandBits) ||
4624  WidestAndBits != DemandBits)
4625  return false;
4626 
4627  LLVMContext &Ctx = Load->getType()->getContext();
4628  Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
4629  EVT TruncVT = TLI->getValueType(*DL, TruncTy);
4630 
4631  // Reject cases that won't be matched as extloads.
4632  if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
4633  !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
4634  return false;
4635 
4636  IRBuilder<> Builder(Load->getNextNode());
4637  auto *NewAnd = dyn_cast<Instruction>(
4638  Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
4639 
4640  // Replace all uses of load with new and (except for the use of load in the
4641  // new and itself).
4642  Load->replaceAllUsesWith(NewAnd);
4643  NewAnd->setOperand(0, Load);
4644 
4645  // Remove any and instructions that are now redundant.
4646  for (auto *And : AndsToMaybeRemove)
4647  // Check that the and mask is the same as the one we decided to put on the
4648  // new and.
4649  if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
4650  And->replaceAllUsesWith(NewAnd);
4651  if (&*CurInstIterator == And)
4652  CurInstIterator = std::next(And->getIterator());
4653  And->eraseFromParent();
4654  ++NumAndUses;
4655  }
4656 
4657  ++NumAndsAdded;
4658  return true;
4659 }
4660 
4661 /// Check if V (an operand of a select instruction) is an expensive instruction
4662 /// that is only used once.
4663 static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {
4664  auto *I = dyn_cast<Instruction>(V);
4665  // If it's safe to speculatively execute, then it should not have side
4666  // effects; therefore, it's safe to sink and possibly *not* execute.
4667  return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
4669 }
4670 
4671 /// Returns true if a SelectInst should be turned into an explicit branch.
4672 static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
4673  const TargetLowering *TLI,
4674  SelectInst *SI) {
4675  // If even a predictable select is cheap, then a branch can't be cheaper.
4676  if (!TLI->isPredictableSelectExpensive())
4677  return false;
4678 
4679  // FIXME: This should use the same heuristics as IfConversion to determine
4680  // whether a select is better represented as a branch.
4681 
4682  // If metadata tells us that the select condition is obviously predictable,
4683  // then we want to replace the select with a branch.
4684  uint64_t TrueWeight, FalseWeight;
4685  if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
4686  uint64_t Max = std::max(TrueWeight, FalseWeight);
4687  uint64_t Sum = TrueWeight + FalseWeight;
4688  if (Sum != 0) {
4689  auto Probability = BranchProbability::getBranchProbability(Max, Sum);
4690  if (Probability > TLI->getPredictableBranchThreshold())
4691  return true;
4692  }
4693  }
4694 
4695  CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
4696 
4697  // If a branch is predictable, an out-of-order CPU can avoid blocking on its
4698  // comparison condition. If the compare has more than one use, there's
4699  // probably another cmov or setcc around, so it's not worth emitting a branch.
4700  if (!Cmp || !Cmp->hasOneUse())
4701  return false;
4702 
4703  // If either operand of the select is expensive and only needed on one side
4704  // of the select, we should form a branch.
4705  if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
4706  sinkSelectOperand(TTI, SI->getFalseValue()))
4707  return true;
4708 
4709  return false;
4710 }
4711 
4712 /// If \p isTrue is true, return the true value of \p SI, otherwise return
4713 /// false value of \p SI. If the true/false value of \p SI is defined by any
4714 /// select instructions in \p Selects, look through the defining select
4715 /// instruction until the true/false value is not defined in \p Selects.
4716 static Value *getTrueOrFalseValue(
4717  SelectInst *SI, bool isTrue,
4718  const SmallPtrSet<const Instruction *, 2> &Selects) {
4719  Value *V;
4720 
4721  for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
4722  DefSI = dyn_cast<SelectInst>(V)) {
4723  assert(DefSI->getCondition() == SI->getCondition() &&
4724  "The condition of DefSI does not match with SI");
4725  V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
4726  }
4727  return V;
4728 }
4729 
4730 /// If we have a SelectInst that will likely profit from branch prediction,
4731 /// turn it into a branch.
4732 bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
4733  // Find all consecutive select instructions that share the same condition.
4735  ASI.push_back(SI);
4737  It != SI->getParent()->end(); ++It) {
4738  SelectInst *I = dyn_cast<SelectInst>(&*It);
4739  if (I && SI->getCondition() == I->getCondition()) {
4740  ASI.push_back(I);
4741  } else {
4742  break;
4743  }
4744  }
4745 
4746  SelectInst *LastSI = ASI.back();
4747  // Increment the current iterator to skip all the rest of select instructions
4748  // because they will be either "not lowered" or "all lowered" to branch.
4749  CurInstIterator = std::next(LastSI->getIterator());
4750 
4751  bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
4752 
4753  // Can we convert the 'select' to CF ?
4754  if (DisableSelectToBranch || OptSize || !TLI || VectorCond ||
4756  return false;
4757 
4759  if (VectorCond)
4760  SelectKind = TargetLowering::VectorMaskSelect;
4761  else if (SI->getType()->isVectorTy())
4763  else
4764  SelectKind = TargetLowering::ScalarValSelect;
4765 
4766  if (TLI->isSelectSupported(SelectKind) &&
4767  !isFormingBranchFromSelectProfitable(TTI, TLI, SI))
4768  return false;
4769 
4770  ModifiedDT = true;
4771 
4772  // Transform a sequence like this:
4773  // start:
4774  // %cmp = cmp uge i32 %a, %b
4775  // %sel = select i1 %cmp, i32 %c, i32 %d
4776  //
4777  // Into:
4778  // start:
4779  // %cmp = cmp uge i32 %a, %b
4780  // br i1 %cmp, label %select.true, label %select.false
4781  // select.true:
4782  // br label %select.end
4783  // select.false:
4784  // br label %select.end
4785  // select.end:
4786  // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
4787  //
4788  // In addition, we may sink instructions that produce %c or %d from
4789  // the entry block into the destination(s) of the new branch.
4790  // If the true or false blocks do not contain a sunken instruction, that
4791  // block and its branch may be optimized away. In that case, one side of the
4792  // first branch will point directly to select.end, and the corresponding PHI
4793  // predecessor block will be the start block.
4794 
4795  // First, we split the block containing the select into 2 blocks.
4796  BasicBlock *StartBlock = SI->getParent();
4797  BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
4798  BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
4799 
4800  // Delete the unconditional branch that was just created by the split.
4801  StartBlock->getTerminator()->eraseFromParent();
4802 
4803  // These are the new basic blocks for the conditional branch.
4804  // At least one will become an actual new basic block.
4805  BasicBlock *TrueBlock = nullptr;
4806  BasicBlock *FalseBlock = nullptr;
4807  BranchInst *TrueBranch = nullptr;
4808  BranchInst *FalseBranch = nullptr;
4809 
4810  // Sink expensive instructions into the conditional blocks to avoid executing
4811  // them speculatively.
4812  for (SelectInst *SI : ASI) {
4813  if (sinkSelectOperand(TTI, SI->getTrueValue())) {
4814  if (TrueBlock == nullptr) {
4815  TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
4816  EndBlock->getParent(), EndBlock);
4817  TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
4818  }
4819  auto *TrueInst = cast<Instruction>(SI->getTrueValue());
4820  TrueInst->moveBefore(TrueBranch);
4821  }
4822  if (sinkSelectOperand(TTI, SI->getFalseValue())) {
4823  if (FalseBlock == nullptr) {
4824  FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
4825  EndBlock->getParent(), EndBlock);
4826  FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
4827  }
4828  auto *FalseInst = cast<Instruction>(SI->getFalseValue());
4829  FalseInst->moveBefore(FalseBranch);
4830  }
4831  }
4832 
4833  // If there was nothing to sink, then arbitrarily choose the 'false' side
4834  // for a new input value to the PHI.
4835  if (TrueBlock == FalseBlock) {
4836  assert(TrueBlock == nullptr &&
4837  "Unexpected basic block transform while optimizing select");
4838 
4839  FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
4840  EndBlock->getParent(), EndBlock);
4841  BranchInst::Create(EndBlock, FalseBlock);
4842  }
4843 
4844  // Insert the real conditional branch based on the original condition.
4845  // If we did not create a new block for one of the 'true' or 'false' paths
4846  // of the condition, it means that side of the branch goes to the end block
4847  // directly and the path originates from the start block from the point of
4848  // view of the new PHI.
4849  BasicBlock *TT, *FT;
4850  if (TrueBlock == nullptr) {
4851  TT = EndBlock;
4852  FT = FalseBlock;
4853  TrueBlock = StartBlock;
4854  } else if (FalseBlock == nullptr) {
4855  TT = TrueBlock;
4856  FT = EndBlock;
4857  FalseBlock = StartBlock;
4858  } else {
4859  TT = TrueBlock;
4860  FT = FalseBlock;
4861  }
4862  IRBuilder<>(SI).CreateCondBr(SI->getCondition(), TT, FT, SI);
4863 
4865  INS.insert(ASI.begin(), ASI.end());
4866  // Use reverse iterator because later select may use the value of the
4867  // earlier select, and we need to propagate value through earlier select
4868  // to get the PHI operand.
4869  for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
4870  SelectInst *SI = *It;
4871  // The select itself is replaced with a PHI Node.
4872  PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
4873  PN->takeName(SI);
4874  PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
4875  PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
4876 
4877  SI->replaceAllUsesWith(PN);
4878  SI->eraseFromParent();
4879  INS.erase(SI);
4880  ++NumSelectsExpanded;
4881  }
4882 
4883  // Instruct OptimizeBlock to skip to the next block.
4884  CurInstIterator = StartBlock->end();
4885  return true;
4886 }
4887 
4888 static bool isBroadcastShuffle(ShuffleVectorInst *SVI) {
4890  int SplatElem = -1;
4891  for (unsigned i = 0; i < Mask.size(); ++i) {
4892  if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem)
4893  return false;
4894  SplatElem = Mask[i];
4895  }
4896 
4897  return true;
4898 }
4899 
4900 /// Some targets have expensive vector shifts if the lanes aren't all the same
4901 /// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
4902 /// it's often worth sinking a shufflevector splat down to its use so that
4903 /// codegen can spot all lanes are identical.
4904 bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
4905  BasicBlock *DefBB = SVI->getParent();
4906 
4907  // Only do this xform if variable vector shifts are particularly expensive.
4908  if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType()))
4909  return false;
4910 
4911  // We only expect better codegen by sinking a shuffle if we can recognise a
4912  // constant splat.
4913  if (!isBroadcastShuffle(SVI))
4914  return false;
4915 
4916  // InsertedShuffles - Only insert a shuffle in each block once.
4917  DenseMap<BasicBlock*, Instruction*> InsertedShuffles;
4918 
4919  bool MadeChange = false;
4920  for (User *U : SVI->users()) {
4921  Instruction *UI = cast<Instruction>(U);
4922 
4923  // Figure out which BB this ext is used in.
4924  BasicBlock *UserBB = UI->getParent();
4925  if (UserBB == DefBB) continue;
4926 
4927  // For now only apply this when the splat is used by a shift instruction.
4928  if (!UI->isShift()) continue;
4929 
4930  // Everything checks out, sink the shuffle if the user's block doesn't
4931  // already have a copy.
4932  Instruction *&InsertedShuffle = InsertedShuffles[UserBB];
4933 
4934  if (!InsertedShuffle) {
4935  BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
4936  assert(InsertPt != UserBB->end());
4937  InsertedShuffle =
4938  new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1),
4939  SVI->getOperand(2), "", &*InsertPt);
4940  }
4941 
4942  UI->replaceUsesOfWith(SVI, InsertedShuffle);
4943  MadeChange = true;
4944  }
4945 
4946  // If we removed all uses, nuke the shuffle.
4947  if (SVI->use_empty()) {
4948  SVI->eraseFromParent();
4949  MadeChange = true;
4950  }
4951 
4952  return MadeChange;
4953 }
4954 
4955 bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
4956  if (!TLI || !DL)
4957  return false;
4958 
4959  Value *Cond = SI->getCondition();
4960  Type *OldType = Cond->getType();
4961  LLVMContext &Context = Cond->getContext();
4962  MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType));
4963  unsigned RegWidth = RegType.getSizeInBits();
4964 
4965  if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
4966  return false;
4967 
4968  // If the register width is greater than the type width, expand the condition
4969  // of the switch instruction and each case constant to the width of the
4970  // register. By widening the type of the switch condition, subsequent
4971  // comparisons (for case comparisons) will not need to be extended to the
4972  // preferred register width, so we will potentially eliminate N-1 extends,
4973  // where N is the number of cases in the switch.
4974  auto *NewType = Type::getIntNTy(Context, RegWidth);
4975 
4976  // Zero-extend the switch condition and case constants unless the switch
4977  // condition is a function argument that is already being sign-extended.
4978  // In that case, we can avoid an unnecessary mask/extension by sign-extending
4979  // everything instead.
4980  Instruction::CastOps ExtType = Instruction::ZExt;
4981  if (auto *Arg = dyn_cast<Argument>(Cond))
4982  if (Arg->hasSExtAttr())
4983  ExtType = Instruction::SExt;
4984 
4985  auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
4986  ExtInst->insertBefore(SI);
4987  SI->setCondition(ExtInst);
4988  for (SwitchInst::CaseIt Case : SI->cases()) {
4989  APInt NarrowConst = Case.getCaseValue()->getValue();
4990  APInt WideConst = (ExtType == Instruction::ZExt) ?
4991  NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth);
4992  Case.setValue(ConstantInt::get(Context, WideConst));
4993  }
4994 
4995  return true;
4996 }
4997 
4998 namespace {
4999 /// \brief Helper class to promote a scalar operation to a vector one.
5000 /// This class is used to move downward extractelement transition.
5001 /// E.g.,
5002 /// a = vector_op <2 x i32>
5003 /// b = extractelement <2 x i32> a, i32 0
5004 /// c = scalar_op b
5005 /// store c
5006 ///
5007 /// =>
5008 /// a = vector_op <2 x i32>
5009 /// c = vector_op a (equivalent to scalar_op on the related lane)
5010 /// * d = extractelement <2 x i32> c, i32 0
5011 /// * store d
5012 /// Assuming both extractelement and store can be combine, we get rid of the
5013 /// transition.
5014 class VectorPromoteHelper {
5015  /// DataLayout associated with the current module.
5016  const DataLayout &DL;
5017 
5018  /// Used to perform some checks on the legality of vector operations.
5019  const TargetLowering &TLI;
5020 
5021  /// Used to estimated the cost of the promoted chain.
5022  const TargetTransformInfo &TTI;
5023 
5024  /// The transition being moved downwards.
5025  Instruction *Transition;
5026  /// The sequence of instructions to be promoted.
5027  SmallVector<Instruction *, 4> InstsToBePromoted;
5028  /// Cost of combining a store and an extract.
5029  unsigned StoreExtractCombineCost;
5030  /// Instruction that will be combined with the transition.
5031  Instruction *CombineInst;
5032 
5033  /// \brief The instruction that represents the current end of the transition.
5034  /// Since we are faking the promotion until we reach the end of the chain
5035  /// of computation, we need a way to get the current end of the transition.
5036  Instruction *getEndOfTransition() const {
5037  if (InstsToBePromoted.empty())
5038  return Transition;
5039  return InstsToBePromoted.back();
5040  }
5041 
5042  /// \brief Return the index of the original value in the transition.
5043  /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
5044  /// c, is at index 0.
5045  unsigned getTransitionOriginalValueIdx() const {
5046  assert(isa<ExtractElementInst>(Transition) &&
5047  "Other kind of transitions are not supported yet");
5048  return 0;
5049  }
5050 
5051  /// \brief Return the index of the index in the transition.
5052  /// E.g., for "extractelement <2 x i32> c, i32 0" the index
5053  /// is at index 1.
5054  unsigned getTransitionIdx() const {
5055  assert(isa<ExtractElementInst>(Transition) &&
5056  "Other kind of transitions are not supported yet");
5057  return 1;
5058  }
5059 
5060  /// \brief Get the type of the transition.
5061  /// This is the type of the original value.
5062  /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
5063  /// transition is <2 x i32>.
5064  Type *getTransitionType() const {
5065  return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
5066  }
5067 
5068  /// \brief Promote \p ToBePromoted by moving \p Def downward through.
5069  /// I.e., we have the following sequence:
5070  /// Def = Transition <ty1> a to <ty2>
5071  /// b = ToBePromoted <ty2> Def, ...
5072  /// =>
5073  /// b = ToBePromoted <ty1> a, ...
5074  /// Def = Transition <ty1> ToBePromoted to <ty2>
5075  void promoteImpl(Instruction *ToBePromoted);
5076 
5077  /// \brief Check whether or not it is profitable to promote all the
5078  /// instructions enqueued to be promoted.
5079  bool isProfitableToPromote() {
5080  Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
5081  unsigned Index = isa<ConstantInt>(ValIdx)
5082  ? cast<ConstantInt>(ValIdx)->getZExtValue()
5083  : -1;
5084  Type *PromotedType = getTransitionType();
5085 
5086  StoreInst *ST = cast<StoreInst>(CombineInst);
5087  unsigned AS = ST->getPointerAddressSpace();
5088  unsigned Align = ST->getAlignment();
5089  // Check if this store is supported.
5091  TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
5092  Align)) {
5093  // If this is not supported, there is no way we can combine
5094  // the extract with the store.
5095  return false;
5096  }
5097 
5098  // The scalar chain of computation has to pay for the transition
5099  // scalar to vector.
5100  // The vector chain has to account for the combining cost.
5101  uint64_t ScalarCost =
5102  TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index);
5103  uint64_t VectorCost = StoreExtractCombineCost;
5104  for (const auto &Inst : InstsToBePromoted) {
5105  // Compute the cost.
5106  // By construction, all instructions being promoted are arithmetic ones.
5107  // Moreover, one argument is a constant that can be viewed as a splat
5108  // constant.
5109  Value *Arg0 = Inst->getOperand(0);
5110  bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
5111  isa<ConstantFP>(Arg0);
5118  ScalarCost += TTI.getArithmeticInstrCost(
5119  Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK);
5120  VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
5121  Arg0OVK, Arg1OVK);
5122  }
5123  DEBUG(dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
5124  << ScalarCost << "\nVector: " << VectorCost << '\n');
5125  return ScalarCost > VectorCost;
5126  }
5127 
5128  /// \brief Generate a constant vector with \p Val with the same
5129  /// number of elements as the transition.
5130  /// \p UseSplat defines whether or not \p Val should be replicated
5131  /// across the whole vector.
5132  /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
5133  /// otherwise we generate a vector with as many undef as possible:
5134  /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
5135  /// used at the index of the extract.
5136  Value *getConstantVector(Constant *Val, bool UseSplat) const {
5137  unsigned ExtractIdx = UINT_MAX;
5138  if (!UseSplat) {
5139  // If we cannot determine where the constant must be, we have to
5140  // use a splat constant.
5141  Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
5142  if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
5143  ExtractIdx = CstVal->getSExtValue();
5144  else
5145  UseSplat = true;
5146  }
5147 
5148  unsigned End = getTransitionType()->getVectorNumElements();
5149  if (UseSplat)
5150  return ConstantVector::getSplat(End, Val);
5151 
5152  SmallVector<Constant *, 4> ConstVec;
5153  UndefValue *UndefVal = UndefValue::get(Val->getType());
5154  for (unsigned Idx = 0; Idx != End; ++Idx) {
5155  if (Idx == ExtractIdx)
5156  ConstVec.push_back(Val);
5157  else
5158  ConstVec.push_back(UndefVal);
5159  }
5160  return ConstantVector::get(ConstVec);
5161  }
5162 
5163  /// \brief Check if promoting to a vector type an operand at \p OperandIdx
5164  /// in \p Use can trigger undefined behavior.
5165  static bool canCauseUndefinedBehavior(const Instruction *Use,
5166  unsigned OperandIdx) {
5167  // This is not safe to introduce undef when the operand is on
5168  // the right hand side of a division-like instruction.
5169  if (OperandIdx != 1)
5170  return false;
5171  switch (Use->getOpcode()) {
5172  default:
5173  return false;
5174  case Instruction::SDiv:
5175  case Instruction::UDiv:
5176  case Instruction::SRem:
5177  case Instruction::URem:
5178  return true;
5179  case Instruction::FDiv:
5180  case Instruction::FRem:
5181  return !Use->hasNoNaNs();
5182  }
5183  llvm_unreachable(nullptr);
5184  }
5185 
5186 public:
5187  VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
5188  const TargetTransformInfo &TTI, Instruction *Transition,
5189  unsigned CombineCost)
5190  : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
5191  StoreExtractCombineCost(CombineCost), CombineInst(nullptr) {
5192  assert(Transition && "Do not know how to promote null");
5193  }
5194 
5195  /// \brief Check if we can promote \p ToBePromoted to \p Type.
5196  bool canPromote(const Instruction *ToBePromoted) const {
5197  // We could support CastInst too.
5198  return isa<BinaryOperator>(ToBePromoted);
5199  }
5200 
5201  /// \brief Check if it is profitable to promote \p ToBePromoted
5202  /// by moving downward the transition through.
5203  bool shouldPromote(const Instruction *ToBePromoted) const {
5204  // Promote only if all the operands can be statically expanded.
5205  // Indeed, we do not want to introduce any new kind of transitions.
5206  for (const Use &U : ToBePromoted->operands()) {
5207  const Value *Val = U.get();
5208  if (Val == getEndOfTransition()) {
5209  // If the use is a division and the transition is on the rhs,
5210  // we cannot promote the operation, otherwise we may create a
5211  // division by zero.
5212  if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
5213  return false;
5214  continue;
5215  }
5216  if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
5217  !isa<ConstantFP>(Val))
5218  return false;
5219  }
5220  // Check that the resulting operation is legal.
5221  int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
5222  if (!ISDOpcode)
5223  return false;
5224  return StressStoreExtract ||
5226  ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
5227  }
5228 
5229  /// \brief Check whether or not \p Use can be combined
5230  /// with the transition.
5231  /// I.e., is it possible to do Use(Transition) => AnotherUse?
5232  bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
5233 
5234  /// \brief Record \p ToBePromoted as part of the chain to be promoted.
5235  void enqueueForPromotion(Instruction *ToBePromoted) {
5236  InstsToBePromoted.push_back(ToBePromoted);
5237  }
5238 
5239  /// \brief Set the instruction that will be combined with the transition.
5240  void recordCombineInstruction(Instruction *ToBeCombined) {
5241  assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
5242  CombineInst = ToBeCombined;
5243  }
5244 
5245  /// \brief Promote all the instructions enqueued for promotion if it is
5246  /// is profitable.
5247  /// \return True if the promotion happened, false otherwise.
5248  bool promote() {
5249  // Check if there is something to promote.
5250  // Right now, if we do not have anything to combine with,
5251  // we assume the promotion is not profitable.
5252  if (InstsToBePromoted.empty() || !CombineInst)
5253  return false;
5254 
5255  // Check cost.
5256  if (!StressStoreExtract && !isProfitableToPromote())
5257  return false;
5258 
5259  // Promote.
5260  for (auto &ToBePromoted : InstsToBePromoted)
5261  promoteImpl(ToBePromoted);
5262  InstsToBePromoted.clear();
5263  return true;
5264  }
5265 };
5266 } // End of anonymous namespace.
5267 
5268 void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
5269  // At this point, we know that all the operands of ToBePromoted but Def
5270  // can be statically promoted.
5271  // For Def, we need to use its parameter in ToBePromoted:
5272  // b = ToBePromoted ty1 a
5273  // Def = Transition ty1 b to ty2
5274  // Move the transition down.
5275  // 1. Replace all uses of the promoted operation by the transition.
5276  // = ... b => = ... Def.
5277  assert(ToBePromoted->getType() == Transition->getType() &&
5278  "The type of the result of the transition does not match "
5279  "the final type");
5280  ToBePromoted->replaceAllUsesWith(Transition);
5281  // 2. Update the type of the uses.
5282  // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
5283  Type *TransitionTy = getTransitionType();
5284  ToBePromoted->mutateType(TransitionTy);
5285  // 3. Update all the operands of the promoted operation with promoted
5286  // operands.
5287  // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
5288  for (Use &U : ToBePromoted->operands()) {
5289  Value *Val = U.get();
5290  Value *NewVal = nullptr;
5291  if (Val == Transition)
5292  NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
5293  else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
5294  isa<ConstantFP>(Val)) {
5295  // Use a splat constant if it is not safe to use undef.
5296  NewVal = getConstantVector(
5297  cast<Constant>(Val),
5298  isa<UndefValue>(Val) ||
5299  canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
5300  } else
5301  llvm_unreachable("Did you modified shouldPromote and forgot to update "
5302  "this?");
5303  ToBePromoted->setOperand(U.getOperandNo(), NewVal);
5304  }
5305  Transition->removeFromParent();
5306  Transition->insertAfter(ToBePromoted);
5307  Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
5308 }
5309 
5310 /// Some targets can do store(extractelement) with one instruction.
5311 /// Try to push the extractelement towards the stores when the target
5312 /// has this feature and this is profitable.
5313 bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
5314  unsigned CombineCost = UINT_MAX;
5315  if (DisableStoreExtract || !TLI ||
5316  (!StressStoreExtract &&
5317  !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(),
5318  Inst->getOperand(1), CombineCost)))
5319  return false;
5320 
5321  // At this point we know that Inst is a vector to scalar transition.
5322  // Try to move it down the def-use chain, until:
5323  // - We can combine the transition with its single use
5324  // => we got rid of the transition.
5325  // - We escape the current basic block
5326  // => we would need to check that we are moving it at a cheaper place and
5327  // we do not do that for now.
5328  BasicBlock *Parent = Inst->getParent();
5329  DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
5330  VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
5331  // If the transition has more than one use, assume this is not going to be
5332  // beneficial.
5333  while (Inst->hasOneUse()) {
5334  Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
5335  DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
5336 
5337  if (ToBePromoted->getParent() != Parent) {
5338  DEBUG(dbgs() << "Instruction to promote is in a different block ("
5339  << ToBePromoted->getParent()->getName()
5340  << ") than the transition (" << Parent->getName() << ").\n");
5341  return false;
5342  }
5343 
5344  if (VPH.canCombine(ToBePromoted)) {
5345  DEBUG(dbgs() << "Assume " << *Inst << '\n'
5346  << "will be combined with: " << *ToBePromoted << '\n');
5347  VPH.recordCombineInstruction(ToBePromoted);
5348  bool Changed = VPH.promote();
5349  NumStoreExtractExposed += Changed;
5350  return Changed;
5351  }
5352 
5353  DEBUG(dbgs() << "Try promoting.\n");
5354  if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
5355  return false;
5356 
5357  DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
5358 
5359  VPH.enqueueForPromotion(ToBePromoted);
5360  Inst = ToBePromoted;
5361  }
5362  return false;
5363 }
5364 
5365 /// For the instruction sequence of store below, F and I values
5366 /// are bundled together as an i64 value before being stored into memory.
5367 /// Sometimes it is more efficent to generate separate stores for F and I,
5368 /// which can remove the bitwise instructions or sink them to colder places.
5369 ///
5370 /// (store (or (zext (bitcast F to i32) to i64),
5371 /// (shl (zext I to i64), 32)), addr) -->
5372 /// (store F, addr) and (store I, addr+4)
5373 ///
5374 /// Similarly, splitting for other merged store can also be beneficial, like:
5375 /// For pair of {i32, i32}, i64 store --> two i32 stores.
5376 /// For pair of {i32, i16}, i64 store --> two i32 stores.
5377 /// For pair of {i16, i16}, i32 store --> two i16 stores.
5378 /// For pair of {i16, i8}, i32 store --> two i16 stores.
5379 /// For pair of {i8, i8}, i16 store --> two i8 stores.
5380 ///
5381 /// We allow each target to determine specifically which kind of splitting is
5382 /// supported.
5383 ///
5384 /// The store patterns are commonly seen from the simple code snippet below
5385 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
5386 /// void goo(const std::pair<int, float> &);
5387 /// hoo() {
5388 /// ...
5389 /// goo(std::make_pair(tmp, ftmp));
5390 /// ...
5391 /// }
5392 ///
5393 /// Although we already have similar splitting in DAG Combine, we duplicate
5394 /// it in CodeGenPrepare to catch the case in which pattern is across
5395 /// multiple BBs. The logic in DAG Combine is kept to catch case generated
5396 /// during code expansion.
5397 static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
5398  const TargetLowering &TLI) {
5399  // Handle simple but common cases only.
5400  Type *StoreType = SI.getValueOperand()->getType();
5401  if (DL.getTypeStoreSizeInBits(StoreType) != DL.getTypeSizeInBits(StoreType) ||
5402  DL.getTypeSizeInBits(StoreType) == 0)
5403  return false;
5404 
5405  unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
5406  Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
5407  if (DL.getTypeStoreSizeInBits(SplitStoreType) !=
5408  DL.getTypeSizeInBits(SplitStoreType))
5409  return false;
5410 
5411  // Match the following patterns:
5412  // (store (or (zext LValue to i64),
5413  // (shl (zext HValue to i64), 32)), HalfValBitSize)
5414  // or
5415  // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
5416  // (zext LValue to i64),
5417  // Expect both operands of OR and the first operand of SHL have only
5418  // one use.
5419  Value *LValue, *HValue;
5420  if (!match(SI.getValueOperand(),
5421  m_c_Or(m_OneUse(m_ZExt(m_Value(LValue))),
5422  m_OneUse(m_Shl(m_OneUse(m_ZExt(m_Value(HValue))),
5423  m_SpecificInt(HalfValBitSize))))))
5424  return false;
5425 
5426  // Check LValue and HValue are int with size less or equal than 32.
5427  if (!LValue->getType()->isIntegerTy() ||
5428  DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
5429  !HValue->getType()->isIntegerTy() ||
5430  DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
5431  return false;
5432 
5433  // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
5434  // as the input of target query.
5435  auto *LBC = dyn_cast<BitCastInst>(LValue);
5436  auto *HBC = dyn_cast<BitCastInst>(HValue);
5437  EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
5438  : EVT::getEVT(LValue->getType());
5439  EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
5440  : EVT::getEVT(HValue->getType());
5441  if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
5442  return false;
5443 
5444  // Start to split store.
5445  IRBuilder<> Builder(SI.getContext());
5446  Builder.SetInsertPoint(&SI);
5447 
5448  // If LValue/HValue is a bitcast in another BB, create a new one in current
5449  // BB so it may be merged with the splitted stores by dag combiner.
5450  if (LBC && LBC->getParent() != SI.getParent())
5451  LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
5452  if (HBC && HBC->getParent() != SI.getParent())
5453  HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
5454 
5455  auto CreateSplitStore = [&](Value *V, bool Upper) {
5456  V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
5457  Value *Addr = Builder.CreateBitCast(
5458  SI.getOperand(1),
5459  SplitStoreType->getPointerTo(SI.getPointerAddressSpace()));
5460  if (Upper)
5461  Addr = Builder.CreateGEP(
5462  SplitStoreType, Addr,
5464  Builder.CreateAlignedStore(
5465  V, Addr, Upper ? SI.getAlignment() / 2 : SI.getAlignment());
5466  };
5467 
5468  CreateSplitStore(LValue, false);
5469  CreateSplitStore(HValue, true);
5470 
5471  // Delete the old store.
5472  SI.eraseFromParent();
5473  return true;
5474 }
5475 
5476 bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
5477  // Bail out if we inserted the instruction to prevent optimizations from
5478  // stepping on each other's toes.
5479  if (InsertedInsts.count(I))
5480  return false;
5481 
5482  if (PHINode *P = dyn_cast<PHINode>(I)) {
5483  // It is possible for very late stage optimizations (such as SimplifyCFG)
5484  // to introduce PHI nodes too late to be cleaned up. If we detect such a
5485  // trivial PHI, go ahead and zap it here.
5486  if (Value *V = SimplifyInstruction(P, *DL, TLInfo, nullptr)) {
5487  P->replaceAllUsesWith(V);
5488  P->eraseFromParent();
5489  ++NumPHIsElim;
5490  return true;
5491  }
5492  return false;
5493  }
5494 
5495  if (CastInst *CI = dyn_cast<CastInst>(I)) {
5496  // If the source of the cast is a constant, then this should have
5497  // already been constant folded. The only reason NOT to constant fold
5498  // it is if something (e.g. LSR) was careful to place the constant
5499  // evaluation in a block other than then one that uses it (e.g. to hoist
5500  // the address of globals out of a loop). If this is the case, we don't
5501  // want to forward-subst the cast.
5502  if (isa<Constant>(CI->getOperand(0)))
5503  return false;
5504 
5505  if (TLI && OptimizeNoopCopyExpression(CI, *TLI, *DL))
5506  return true;
5507 
5508  if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
5509  /// Sink a zext or sext into its user blocks if the target type doesn't
5510  /// fit in one register
5511  if (TLI &&
5512  TLI->getTypeAction(CI->getContext(),
5513  TLI->getValueType(*DL, CI->getType())) ==
5515  return SinkCast(CI);
5516  } else {
5517  bool MadeChange = moveExtToFormExtLoad(I);
5518  return MadeChange | optimizeExtUses(I);
5519  }
5520  }
5521  return false;
5522  }
5523 
5524  if (CmpInst *CI = dyn_cast<CmpInst>(I))
5525  if (!TLI || !TLI->hasMultipleConditionRegisters())
5526  return OptimizeCmpExpression(CI, TLI);
5527 
5528  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
5530  if (TLI) {
5531  bool Modified = optimizeLoadExt(LI);
5532  unsigned AS = LI->getPointerAddressSpace();
5533  Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
5534  return Modified;
5535  }
5536  return false;
5537  }
5538 
5539  if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
5540  if (TLI && splitMergedValStore(*SI, *DL, *TLI))
5541  return true;
5543  if (TLI) {
5544  unsigned AS = SI->getPointerAddressSpace();
5545  return optimizeMemoryInst(I, SI->getOperand(1),
5546  SI->getOperand(0)->getType(), AS);
5547  }
5548  return false;
5549  }
5550 
5552 
5553  if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
5554  BinOp->getOpcode() == Instruction::LShr)) {
5555  ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
5556  if (TLI && CI && TLI->hasExtractBitsInsn())
5557  return OptimizeExtractBits(BinOp, CI, *TLI, *DL);
5558 
5559  return false;
5560  }
5561 
5562  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
5563  if (GEPI->hasAllZeroIndices()) {
5564  /// The GEP operand must be a pointer, so must its result -> BitCast
5565  Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
5566  GEPI->getName(), GEPI);
5567  GEPI->replaceAllUsesWith(NC);
5568  GEPI->eraseFromParent();
5569  ++NumGEPsElim;
5570  optimizeInst(NC, ModifiedDT);
5571  return true;
5572  }
5573  return false;
5574  }
5575 
5576  if (CallInst *CI = dyn_cast<CallInst>(I))
5577  return optimizeCallInst(CI, ModifiedDT);
5578 
5579  if (SelectInst *SI = dyn_cast<SelectInst>(I))
5580  return optimizeSelectInst(SI);
5581 
5582  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
5583  return optimizeShuffleVectorInst(SVI);
5584 
5585  if (auto *Switch = dyn_cast<SwitchInst>(I))
5586  return optimizeSwitchInst(Switch);
5587 
5588  if (isa<ExtractElementInst>(I))
5589  return optimizeExtractElementInst(I);
5590 
5591  return false;
5592 }
5593 
5594 /// Given an OR instruction, check to see if this is a bitreverse
5595 /// idiom. If so, insert the new intrinsic and return true.
5596 static bool makeBitReverse(Instruction &I, const DataLayout &DL,
5597  const TargetLowering &TLI) {
5598  if (!I.getType()->isIntegerTy() ||
5600  TLI.getValueType(DL, I.getType(), true)))
5601  return false;
5602 
5604  if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
5605  return false;
5606  Instruction *LastInst = Insts.back();
5607  I.replaceAllUsesWith(LastInst);
5609  return true;
5610 }
5611 
5612 // In this pass we look for GEP and cast instructions that are used
5613 // across basic blocks and rewrite them to improve basic-block-at-a-time
5614 // selection.
5615 bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
5616  SunkAddrs.clear();
5617  bool MadeChange = false;
5618 
5619  CurInstIterator = BB.begin();
5620  while (CurInstIterator != BB.end()) {
5621  MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
5622  if (ModifiedDT)
5623  return true;
5624  }
5625 
5626  bool MadeBitReverse = true;
5627  while (TLI && MadeBitReverse) {
5628  MadeBitReverse = false;
5629  for (auto &I : reverse(BB)) {
5630  if (makeBitReverse(I, *DL, *TLI)) {
5631  MadeBitReverse = MadeChange = true;
5632  ModifiedDT = true;
5633  break;
5634  }
5635  }
5636  }
5637  MadeChange |= dupRetToEnableTailCallOpts(&BB);
5638 
5639  return MadeChange;
5640 }
5641 
5642 // llvm.dbg.value is far away from the value then iSel may not be able
5643 // handle it properly. iSel will drop llvm.dbg.value if it can not
5644 // find a node corresponding to the value.
5645 bool CodeGenPrepare::placeDbgValues(Function &F) {
5646  bool MadeChange = false;
5647  for (BasicBlock &BB : F) {
5648  Instruction *PrevNonDbgInst = nullptr;
5649  for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
5650  Instruction *Insn = &*BI++;
5651  DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
5652  // Leave dbg.values that refer to an alloca alone. These
5653  // instrinsics describe the address of a variable (= the alloca)
5654  // being taken. They should not be moved next to the alloca
5655  // (and to the beginning of the scope), but rather stay close to
5656  // where said address is used.
5657  if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) {
5658  PrevNonDbgInst = Insn;
5659  continue;
5660  }
5661 
5662  Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
5663  if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
5664  // If VI is a phi in a block with an EHPad terminator, we can't insert
5665  // after it.
5666  if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
5667  continue;
5668  DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
5669  DVI->removeFromParent();
5670  if (isa<PHINode>(VI))
5671  DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
5672  else
5673  DVI->insertAfter(VI);
5674  MadeChange = true;
5675  ++NumDbgValueMoved;
5676  }
5677  }
5678  }
5679  return MadeChange;
5680 }
5681 
5682 // If there is a sequence that branches based on comparing a single bit
5683 // against zero that can be combined into a single instruction, and the
5684 // target supports folding these into a single instruction, sink the
5685 // mask and compare into the branch uses. Do this before OptimizeBlock ->
5686 // OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being
5687 // searched for.
5688 bool CodeGenPrepare::sinkAndCmp(Function &F) {
5689  if (!EnableAndCmpSinking)
5690  return false;
5691  if (!TLI || !TLI->isMaskAndBranchFoldingLegal())
5692  return false;
5693  bool MadeChange = false;
5694  for (BasicBlock &BB : F) {
5695  // Does this BB end with the following?
5696  // %andVal = and %val, #single-bit-set
5697  // %icmpVal = icmp %andResult, 0
5698  // br i1 %cmpVal label %dest1, label %dest2"
5700  if (!Brcc || !Brcc->isConditional())
5701  continue;
5702  ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0));
5703  if (!Cmp || Cmp->getParent() != &BB)
5704  continue;
5706  if (!Zero || !Zero->isZero())
5707  continue;
5709  if (!And || And->getOpcode() != Instruction::And || And->getParent() != &BB)
5710  continue;
5712  if (!Mask || !Mask->getUniqueInteger().isPowerOf2())
5713  continue;
5714  DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB.dump());
5715 
5716  // Push the "and; icmp" for any users that are conditional branches.
5717  // Since there can only be one branch use per BB, we don't need to keep
5718  // track of which BBs we insert into.
5719  for (Use &TheUse : Cmp->uses()) {
5720  // Find brcc use.
5721  BranchInst *BrccUser = dyn_cast<BranchInst>(TheUse);
5722  if (!BrccUser || !BrccUser->isConditional())
5723  continue;
5724  BasicBlock *UserBB = BrccUser->getParent();
5725  if (UserBB == &BB) continue;
5726  DEBUG(dbgs() << "found Brcc use\n");
5727 
5728  // Sink the "and; icmp" to use.
5729  MadeChange = true;
5730  BinaryOperator *NewAnd =
5731  BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "",
5732  BrccUser);
5733  CmpInst *NewCmp =
5734  CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero,
5735  "", BrccUser);
5736  TheUse = NewCmp;
5737  ++NumAndCmpsMoved;
5738  DEBUG(BrccUser->getParent()->dump());
5739  }
5740  }
5741  return MadeChange;
5742 }
5743 
5744 /// \brief Scale down both weights to fit into uint32_t.
5745 static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
5746  uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
5747  uint32_t Scale = (NewMax / UINT32_MAX) + 1;
5748  NewTrue = NewTrue / Scale;
5749  NewFalse = NewFalse / Scale;
5750 }
5751 
5752 /// \brief Some targets prefer to split a conditional branch like:
5753 /// \code
5754 /// %0 = icmp ne i32 %a, 0
5755 /// %1 = icmp ne i32 %b, 0
5756 /// %or.cond = or i1 %0, %1
5757 /// br i1 %or.cond, label %TrueBB, label %FalseBB
5758 /// \endcode
5759 /// into multiple branch instructions like:
5760 /// \code
5761 /// bb1:
5762 /// %0 = icmp ne i32 %a, 0
5763 /// br i1 %0, label %TrueBB, label %bb2
5764 /// bb2:
5765 /// %1 = icmp ne i32 %b, 0
5766 /// br i1 %1, label %TrueBB, label %FalseBB
5767 /// \endcode
5768 /// This usually allows instruction selection to do even further optimizations
5769 /// and combine the compare with the branch instruction. Currently this is
5770 /// applied for targets which have "cheap" jump instructions.
5771 ///
5772 /// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
5773 ///
5774 bool CodeGenPrepare::splitBranchCondition(Function &F) {
5775  if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive())
5776  return false;
5777 
5778  bool MadeChange = false;
5779  for (auto &BB : F) {
5780  // Does this BB end with the following?
5781  // %cond1 = icmp|fcmp|binary instruction ...
5782  // %cond2 = icmp|fcmp|binary instruction ...
5783  // %cond.or = or|and i1 %cond1, cond2
5784  // br i1 %cond.or label %dest1, label %dest2"
5785  BinaryOperator *LogicOp;
5786  BasicBlock *TBB, *FBB;
5787  if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB)))
5788  continue;
5789 
5790  auto *Br1 = cast<BranchInst>(BB.getTerminator());
5791  if (Br1->getMetadata(LLVMContext::MD_unpredictable))
5792  continue;
5793 
5794  unsigned Opc;
5795  Value *Cond1, *Cond2;
5796  if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
5797  m_OneUse(m_Value(Cond2)))))
5798  Opc = Instruction::And;
5799  else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)),
5800  m_OneUse(m_Value(Cond2)))))
5801  Opc = Instruction::Or;
5802  else
5803  continue;
5804 
5805  if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) ||
5806  !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp())) )
5807  continue;
5808 
5809  DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
5810 
5811  // Create a new BB.
5812  auto TmpBB =
5813  BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
5814  BB.getParent(), BB.getNextNode());
5815 
5816  // Update original basic block by using the first condition directly by the
5817  // branch instruction and removing the no longer needed and/or instruction.
5818  Br1->setCondition(Cond1);
5819  LogicOp->eraseFromParent();
5820 
5821  // Depending on the conditon we have to either replace the true or the false
5822  // successor of the original branch instruction.
5823  if (Opc == Instruction::And)
5824  Br1->setSuccessor(0, TmpBB);
5825  else
5826  Br1->setSuccessor(1, TmpBB);
5827 
5828  // Fill in the new basic block.
5829  auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
5830  if (auto *I = dyn_cast<Instruction>(Cond2)) {
5831  I->removeFromParent();
5832  I->insertBefore(Br2);
5833  }
5834 
5835  // Update PHI nodes in both successors. The original BB needs to be
5836  // replaced in one succesor's PHI nodes, because the branch comes now from
5837  // the newly generated BB (NewBB). In the other successor we need to add one
5838  // incoming edge to the PHI nodes, because both branch instructions target
5839  // now the same successor. Depending on the original branch condition
5840  // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
5841  // we perform the correct update for the PHI nodes.
5842  // This doesn't change the successor order of the just created branch
5843  // instruction (or any other instruction).
5844  if (Opc == Instruction::Or)
5845  std::swap(TBB, FBB);
5846 
5847  // Replace the old BB with the new BB.
5848  for (auto &I : *TBB) {
5849  PHINode *PN = dyn_cast<PHINode>(&I);
5850  if (!PN)
5851  break;
5852  int i;
5853  while ((i = PN->getBasicBlockIndex(&BB)) >= 0)
5854  PN->setIncomingBlock(i, TmpBB);
5855  }
5856 
5857  // Add another incoming edge form the new BB.
5858  for (auto &I : *FBB) {
5859  PHINode *PN = dyn_cast<PHINode>(&I);
5860  if (!PN)
5861  break;
5862  auto *Val = PN->getIncomingValueForBlock(&BB);
5863  PN->addIncoming(Val, TmpBB);
5864  }
5865 
5866  // Update the branch weights (from SelectionDAGBuilder::
5867  // FindMergedConditions).
5868  if (Opc == Instruction::Or) {
5869  // Codegen X | Y as:
5870  // BB1:
5871  // jmp_if_X TBB
5872  // jmp TmpBB
5873  // TmpBB:
5874  // jmp_if_Y TBB
5875  // jmp FBB
5876  //
5877 
5878  // We have flexibility in setting Prob for BB1 and Prob for NewBB.
5879  // The requirement is that
5880  // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
5881  // = TrueProb for orignal BB.
5882  // Assuming the orignal weights are A and B, one choice is to set BB1's
5883  // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
5884  // assumes that
5885  // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
5886  // Another choice is to assume TrueProb for BB1 equals to TrueProb for
5887  // TmpBB, but the math is more complicated.
5888  uint64_t TrueWeight, FalseWeight;
5889  if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) {
5890  uint64_t NewTrueWeight = TrueWeight;
5891  uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
5892  scaleWeights(NewTrueWeight, NewFalseWeight);
5893  Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
5894  .createBranchWeights(TrueWeight, FalseWeight));
5895 
5896  NewTrueWeight = TrueWeight;
5897  NewFalseWeight = 2 * FalseWeight;
5898  scaleWeights(NewTrueWeight, NewFalseWeight);
5899  Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
5900  .createBranchWeights(TrueWeight, FalseWeight));
5901  }
5902  } else {
5903  // Codegen X & Y as:
5904  // BB1:
5905  // jmp_if_X TmpBB
5906  // jmp FBB
5907  // TmpBB:
5908  // jmp_if_Y TBB
5909  // jmp FBB
5910  //
5911  // This requires creation of TmpBB after CurBB.
5912 
5913  // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
5914  // The requirement is that
5915  // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
5916  // = FalseProb for orignal BB.
5917  // Assuming the orignal weights are A and B, one choice is to set BB1's
5918  // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
5919  // assumes that
5920  // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
5921  uint64_t TrueWeight, FalseWeight;
5922  if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) {
5923  uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
5924  uint64_t NewFalseWeight = FalseWeight;
5925  scaleWeights(NewTrueWeight, NewFalseWeight);
5926  Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
5927  .createBranchWeights(TrueWeight, FalseWeight));
5928 
5929  NewTrueWeight = 2 * TrueWeight;
5930  NewFalseWeight = FalseWeight;
5931  scaleWeights(NewTrueWeight, NewFalseWeight);
5932  Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
5933  .createBranchWeights(TrueWeight, FalseWeight));
5934  }
5935  }
5936 
5937  // Note: No point in getting fancy here, since the DT info is never
5938  // available to CodeGenPrepare.
5939  ModifiedDT = true;
5940 
5941  MadeChange = true;
5942 
5943  DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
5944  TmpBB->dump());
5945  }
5946  return MadeChange;
5947 }
MachineLoop * L
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type (if unknown returns 0).
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:506
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Return a value (possibly void), from a function.
Value * getValueOperand()
Definition: Instructions.h:391
const Value * getCalledValue() const
Get a pointer to the function that is invoked by this instruction.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:76
static MVT getIntegerVT(unsigned BitWidth)
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
FunctionPass * createCodeGenPreparePass(const TargetMachine *TM=nullptr)
createCodeGenPreparePass - Transform the code to expose more pattern matching during instruction sele...
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:840
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:64
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:870
iterator_range< use_iterator > uses()
Definition: Value.h:326
BasicBlock * getUniquePredecessor()
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:239
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:72
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI)
Sink the given CmpInst into user blocks to reduce the number of virtual registers that must be create...
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:458
LLVMContext & Context
static cl::opt< bool > ForceSplitStore("force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says."))
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:274
STATISTIC(NumFunctions,"Total number of functions")
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
size_t i
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:450
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:52
void setAlignment(unsigned Align)
iterator end()
Definition: Function.h:537
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
virtual bool isCheapToSpeculateCttz() const
Return true if it is cheap to speculate a call to intrinsic cttz.
This class represents zero extension of integer types.
unsigned getNumOperands() const
Definition: User.h:167
void DeleteDeadBlock(BasicBlock *BB)
Delete the specified block, which must have no predecessors.
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
Analysis providing profile information.
Type * getValueType() const
Definition: GlobalValue.h:261
This class represents a function call, abstracting a target machine's calling convention.
size_type count(PtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:380
void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DominatorTree *DT=nullptr)
BB is a block with one predecessor and its predecessor is known to have one successor (BB!)...
Definition: Local.cpp:572
unsigned getSizeInBits() const
This instruction constructs a fixed permutation of two input vectors.
bool isMask(unsigned numBits, const APInt &APIVal)
Definition: APInt.h:1812
void setSectionPrefix(StringRef Prefix)
Set the section prefix for this function.
Definition: Function.cpp:1301
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:283
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:100
const Instruction & front() const
Definition: BasicBlock.h:240
The two locations do not alias at all.
Definition: AliasAnalysis.h:79
const APInt & getUniqueInteger() const
If C is a constant integer then return its value, otherwise C must be a vector of constant integers...
Definition: Constants.cpp:1311
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, unsigned Align=1, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:471
An instruction for reading from memory.
Definition: Instructions.h:164
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:905
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
Hexagon Common GEP
Type * getElementType() const
Definition: DerivedTypes.h:462
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
void setAlignment(unsigned Align)
Definition: Globals.cpp:86
bool isSimple() const
Definition: Instructions.h:263
bool canIncreaseAlignment() const
Definition: Globals.cpp:202
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:464
bool isFunctionEntryCold(const Function *F)
Returns true if F has cold function entry.
bool bitsLT(EVT VT) const
bitsLT - Return true if this has less bits than VT.
Definition: ValueTypes.h:212
static bool despeculateCountZeros(IntrinsicInst *CountZeros, const TargetLowering *TLI, const DataLayout *DL, bool &ModifiedDT)
If counting leading or trailing zeros is an expensive operation and a zero input is defined...
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:195
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:191
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:345
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:228
bool enableExtLdPromotion() const
Return true if the target wants to use the optimization that turns ext(promotableInst1(...(promotableInstN(load)))) into promotedInst1(...(promotedInstN(ext(load)))).
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:172
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:41
match_combine_or< BinaryOp_match< LHS, RHS, Instruction::Or >, BinaryOp_match< RHS, LHS, Instruction::Or > > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
AnalysisUsage & addRequired()
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:496
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
bool hasNoNaNs() const
Determine whether the no-NaNs flag is set.
bool isUnconditional() const
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
bool isRound() const
isRound - Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:188
This class represents the LLVM 'select' instruction.
bool hasMultipleConditionRegisters() const
Return true if multiple condition registers are available.
unsigned getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:599
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:566
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:578
static cl::opt< bool > DisableExtLdPromotion("disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in ""CodeGenPrepare"))
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:440
'undef' values are things that do not have specified contents.
Definition: Constants.h:1258
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
Class to represent struct types.
Definition: DerivedTypes.h:199
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:68
TargetLowering::ConstraintType ConstraintType
Information about the constraint code, e.g.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.cpp:1122
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
Instruction * getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:180
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:994
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:588
static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
Definition: BasicBlock.h:308
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:750
Windows NT (Windows on ARM)
static void scalarizeMaskedStore(CallInst *CI)
Type * getVectorElementType() const
Definition: Type.h:353
Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset)
Accumulate offsets from stripInBoundsConstantOffsets().
Definition: Value.cpp:502
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:123
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
static cl::opt< bool > DisableStoreExtract("disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare"))
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:434
bool isExtFree(const Instruction *I) const
Return true if the extension represented by I is free.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
static cl::opt< bool > DisablePreheaderProtect("disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders"))
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:106
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:399
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:241
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL)
If the specified cast instruction is a noop copy (e.g.
user_iterator_impl< User > user_iterator
Definition: Value.h:340
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:873
static cl::opt< bool > AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(false), cl::desc("Address sinking in CGP using GEPs."))
#define F(x, y, z)
Definition: MD5.cpp:51
static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl< GCRelocateInst * > &Targets)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:117
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:813
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:134
This contains information for each constraint that we are lowering.
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:256
BasicBlock * getSuccessor(unsigned i) const
This class represents a no-op cast from one type to another.
static CmpInst * Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2, const Twine &Name="", Instruction *InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:75
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:949
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:68
An instruction for storing to memory.
Definition: Instructions.h:300
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:151
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:401
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
ConstraintPrefix Type
Type - The basic type of the constraint: input/output/clobber.
Definition: InlineAsm.h:120
virtual bool isSelectSupported(SelectSupportKind) const
Optimize for code generation
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:263
iterator begin()
Definition: Function.h:535
This class implements simplifications for calls to fortified library functions (__st*cpy_chk, __memcpy_chk, __memmove_chk, __memset_chk), to, when possible, replace them with their non-checking counterparts.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:96
Type * getScalarType() const LLVM_READONLY
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.cpp:44
int Switch(int a)
Definition: Switch2Test.cpp:11
Type * getElementType() const
Definition: DerivedTypes.h:336
This class represents a truncation of integer types.
static unsigned getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition: Local.h:183
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:127
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
Class to represent pointers.
Definition: DerivedTypes.h:443
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:109
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:24
static cl::opt< bool > StressStoreExtract("stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"))
static bool isExtractBitsCandidateUse(Instruction *User)
Check if the candidates could be combined with a shift instruction, which includes: ...
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:517
static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:830
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:55
#define P(N)
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
static cl::opt< bool > ProfileGuidedSectionPrefix("profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions"))
iterator begin() const
Definition: LoopInfo.h:132
static bool CombineUAddWithOverflow(CmpInst *CI)
Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if possible.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:348
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Wrapper pass for TargetTransformInfo.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:256
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:109
static void scalarizeMaskedGather(CallInst *CI)
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:82
MVT - Machine Value Type.
ConstantInt * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to .objectsize into an integer value of the given Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
PointerIntPair - This class implements a pair of a pointer and small integer.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition: PatternMatch.h:512
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
uint64_t getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Definition: DataLayout.h:399
virtual bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
Conditional or Unconditional Branch instruction.
static ExtractValueInst * Create(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:219
bool isIndirect
isIndirect - True if this operand is an indirect operand.
Definition: InlineAsm.h:144
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This is an important base class in LLVM.
Definition: Constant.h:42
const Value * getCondition() const
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1321
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isMaskAndBranchFoldingLegal() const
Return if the target supports combining a chain like:
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
Definition: APInt.h:1947
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:109
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
#define INITIALIZE_TM_PASS_END(passName, arg, name, cfg, analysis)
Target machine pass initializer for passes with dependencies.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:116
static void computeBaseDerivedRelocateMap(const SmallVectorImpl< GCRelocateInst * > &AllRelocateCalls, DenseMap< GCRelocateInst *, SmallVector< GCRelocateInst *, 2 >> &RelocateInstMap)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:259
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:939
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1272
iterator end() const
Definition: LoopInfo.h:133
brc_match< Cond_t > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
Definition: PatternMatch.h:942
Represent the analysis usage information of a pass.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
Definition: PatternMatch.h:524
static void scalarizeMaskedLoad(CallInst *CI)
uint32_t Offset
This instruction compares its operands according to the predicate given to the constructor.
static const unsigned End
static bool SinkCast(CastInst *CI)
SinkCast - Sink the specified cast instruction into its user blocks.
virtual bool useSoftFloat() const
uint64_t getNumElements() const
Definition: DerivedTypes.h:335
for(unsigned i=0, e=MI->getNumOperands();i!=e;++i)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
Value * getOperand(unsigned i) const
Definition: User.h:145
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:119
op_range operands()
Definition: User.h:213
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:93
iterator begin() const
Definition: SmallPtrSet.h:398
self_iterator getIterator()
Definition: ilist_node.h:81
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:67
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:96
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:960
INITIALIZE_TM_PASS_BEGIN(CodeGenPrepare,"codegenprepare","Optimize for code generation", false, false) INITIALIZE_TM_PASS_END(CodeGenPrepare
static cl::opt< bool > DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare"))
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:98
EVT - Extended Value Type.
Definition: ValueTypes.h:31
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:213
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1337
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:355
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:654
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:213
bool isUsedInBasicBlock(const BasicBlock *BB) const
Check if this value is used in the specified basic block.
Definition: Value.cpp:116
const Value * getTrueValue() const
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:391
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1183
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:3540
bool isTerminator() const
Definition: Instruction.h:114
bool isConditional() const
bool bitsGT(EVT VT) const
bitsGT - Return true if this has more bits than VT.
Definition: ValueTypes.h:200
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
bool ugt(const APInt &RHS) const
Unsigned greather than comparison.
Definition: APInt.h:1083
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:709
std::vector< AsmOperandInfo > AsmOperandInfoVector
BinaryOps getOpcode() const
Definition: InstrTypes.h:541
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1034
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
Definition: Statepoint.h:379
void initializeCodeGenPreparePass(PassRegistry &)
This is the common base class for memset/memcpy/memmove.
Iterator for intrusive lists based on ilist_node.
static bool IsNonLocalValue(Value *V, BasicBlock *BB)
Return true if the specified values are defined in a different basic block than BB.
See the file comment.
Definition: ValueMap.h:87
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:425
This is the shared class of boolean and integer constants.
Definition: Constants.h:88
void setIncomingBlock(unsigned i, BasicBlock *BB)
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:375
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:408
iterator end()
Definition: BasicBlock.h:230
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:123
static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap< BasicBlock *, BinaryOperator * > &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL)
Sink both shift and truncate instruction to the use of truncate's BB.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:58
static CallInst * Create(Value *Func, ArrayRef< Value * > Args, ArrayRef< OperandBundleDef > Bundles=None, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
Provides information about what library functions are available for the current target.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:175
virtual const TargetLowering * getTargetLowering() const
static Constant * getConstantVector(MVT VT, APInt SplatValue, unsigned SplatBitSize, LLVMContext &C)
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
static void scalarizeMaskedScatter(CallInst *CI)
static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:173
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:558
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:198
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
const BasicBlock & getEntryBlock() const
Definition: Function.h:519
#define NC
Definition: regutils.h:42
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:275
bool hasExtractBitsInsn() const
Return true if the target has BitExtract instructions.
void setOperand(unsigned i, Value *Val)
Definition: User.h:150
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
Class to represent vector types.
Definition: DerivedTypes.h:369
Class for arbitrary precision integers.
Definition: APInt.h:77
BasicBlock * getSingleSuccessor()
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:253
Value * getIncomingValueForBlock(const BasicBlock *BB) const
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:195
iterator_range< user_iterator > users()
Definition: Value.h:370
BasicBlock * getSinglePredecessor()
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:226
AddrMode
ARM Addressing Modes.
Definition: ARMBaseInfo.h:235
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:438
Value * getValue() const
static cl::opt< bool > StressExtLdPromotion("stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) ""optimization in CodeGenPrepare"))
Value * getCondition() const
static cl::opt< unsigned > FreqRatioToSkipMerge("cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / ""(frequency of destination block) is greater than this ratio"))
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1942
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:72
#define Success
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl< Value * > &OffsetV)
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:453
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class wraps the llvm.memcpy/memmove intrinsics.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:384
Analysis providing branch probability information.
bool isFunctionEntryHot(const Function *F)
Returns true if F has hot function entry.
virtual bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
SelectSupportKind
Enum that describes what type of support for selects the target has.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:169
void setCondition(Value *V)
Optimize for code false
This represents the llvm.dbg.value instruction.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:368
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
ImmutableCallSite - establish a view to a call site for examination.
Definition: CallSite.h:665
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:256
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction...
Definition: Instruction.cpp:88
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:97
#define I(x, y, z)
Definition: MD5.cpp:54
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:124
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:383
void setArgOperand(unsigned i, Value *v)
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:349
bool recognizeBSwapOrBitReverseIdiom(Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl< Instruction * > &InsertedInsts)
Try and match a bswap or bitreverse idiom.
Definition: Local.cpp:1994
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL)
Sink the shift right instruction into user blocks if the uses could potentially be combined with this...
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred)
This method duplicates the specified return instruction into a predecessor which ends in an unconditi...
VectorType * getType() const
Overload to return most specific vector type.
static volatile int Zero
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:374
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
getEVT - Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:309
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:1726
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition: Value.h:558
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition: InstrTypes.h:955
bool isStatepoint(ImmutableCallSite CS)
Definition: Statepoint.cpp:27
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
Multiway switch.
iterator_range< op_iterator > arg_operands()
Iteration adapter for range-for loops.
bool use_empty() const
Definition: Value.h:299
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
virtual BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor, it is very likely to be predicted correctly.
user_iterator user_begin()
Definition: Value.h:346
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:33
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
aarch64 promote const
Represents calls to the gc.relocate intrinsic.
Definition: Statepoint.h:367
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:108
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:537
LLVM Value Representation.
Definition: Value.h:71
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:111
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:244
static cl::opt< bool > EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinkinig and/cmp into branches."))
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
static const Function * getParent(const Value *V)
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:95
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:533
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:100
virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const
Return true if the target can combine store(extractelement VectorTy, Idx).
Primary interface to the complete machine description for the target machine.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:831
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:678
IRTranslator LLVM IR MI
const Value * getFalseValue() const
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
OperandValueKind
Additional information about an operand's possible values.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:980
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:731
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
Value * SimplifyInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr)
See if we can compute a simplified version of this instruction.
static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI)
Check if all the uses of Inst are equivalent (or free) zero or sign extensions.
This pass exposes codegen information to IR-level passes.
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1722
iterator getFirstInsertionPt()
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:209
int * Ptr
specific_intval m_SpecificInt(uint64_t V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:383
T Max(T a, T b)
Definition: FuzzerDefs.h:57
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:137
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:102
bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
int getUserCost(const User *U) const
Estimate the cost of a given IR user when lowered.
bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)
Test if given that the input instruction is in the tail call position, if there is an attribute misma...
static void getShuffleMask(Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:162
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:479
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")
codegenprepare
bool isJumpExpensive() const
Return true if Flow Control is an expensive operation that should be avoided.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
The cost of a 'div' instruction on x86.
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:167
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:103
const BasicBlock * getParent() const
Definition: Instruction.h:62
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:83
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, ImmutableCallSite CS) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >()) const
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:300
UAddWithOverflow_match< LHS_t, RHS_t, Sum_t > m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S)
Match an icmp instruction checking for unsigned overflow on addition.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
This file describes how to lower LLVM code to machine code.
an instruction to allocate memory on the stack
Definition: Instructions.h:60
virtual bool isCheapToSpeculateCtlz() const
Return true if it is cheap to speculate a call to intrinsic ctlz.
gep_type_iterator gep_type_begin(const User *GEP)
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:783
user_iterator user_end()
Definition: Value.h:354