LLVM  9.0.0svn
CodeGenPrepare.cpp
Go to the documentation of this file.
1 //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass munges the code in the input function to better prepare it for
10 // SelectionDAG-based code generation. This works around limitations in it's
11 // basic-block-at-a-time approach. It should eventually be removed.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallPtrSet.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
27 #include "llvm/Analysis/LoopInfo.h"
34 #include "llvm/CodeGen/Analysis.h"
41 #include "llvm/Config/llvm-config.h"
42 #include "llvm/IR/Argument.h"
43 #include "llvm/IR/Attributes.h"
44 #include "llvm/IR/BasicBlock.h"
45 #include "llvm/IR/CallSite.h"
46 #include "llvm/IR/Constant.h"
47 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/DataLayout.h"
49 #include "llvm/IR/DerivedTypes.h"
50 #include "llvm/IR/Dominators.h"
51 #include "llvm/IR/Function.h"
53 #include "llvm/IR/GlobalValue.h"
54 #include "llvm/IR/GlobalVariable.h"
55 #include "llvm/IR/IRBuilder.h"
56 #include "llvm/IR/InlineAsm.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/LLVMContext.h"
63 #include "llvm/IR/MDBuilder.h"
64 #include "llvm/IR/Module.h"
65 #include "llvm/IR/Operator.h"
66 #include "llvm/IR/PatternMatch.h"
67 #include "llvm/IR/Statepoint.h"
68 #include "llvm/IR/Type.h"
69 #include "llvm/IR/Use.h"
70 #include "llvm/IR/User.h"
71 #include "llvm/IR/Value.h"
72 #include "llvm/IR/ValueHandle.h"
73 #include "llvm/IR/ValueMap.h"
74 #include "llvm/Pass.h"
77 #include "llvm/Support/Casting.h"
79 #include "llvm/Support/Compiler.h"
80 #include "llvm/Support/Debug.h"
90 #include <algorithm>
91 #include <cassert>
92 #include <cstdint>
93 #include <iterator>
94 #include <limits>
95 #include <memory>
96 #include <utility>
97 #include <vector>
98 
99 using namespace llvm;
100 using namespace llvm::PatternMatch;
101 
102 #define DEBUG_TYPE "codegenprepare"
103 
104 STATISTIC(NumBlocksElim, "Number of blocks eliminated");
105 STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
106 STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
107 STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
108  "sunken Cmps");
109 STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
110  "of sunken Casts");
111 STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
112  "computations were sunk");
113 STATISTIC(NumMemoryInstsPhiCreated,
114  "Number of phis created when address "
115  "computations were sunk to memory instructions");
116 STATISTIC(NumMemoryInstsSelectCreated,
117  "Number of select created when address "
118  "computations were sunk to memory instructions");
119 STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
120 STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
121 STATISTIC(NumAndsAdded,
122  "Number of and mask instructions added to form ext loads");
123 STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
124 STATISTIC(NumRetsDup, "Number of return instructions duplicated");
125 STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
126 STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
127 STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
128 
130  "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
131  cl::desc("Disable branch optimizations in CodeGenPrepare"));
132 
133 static cl::opt<bool>
134  DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
135  cl::desc("Disable GC optimizations in CodeGenPrepare"));
136 
138  "disable-cgp-select2branch", cl::Hidden, cl::init(false),
139  cl::desc("Disable select to branch conversion."));
140 
142  "addr-sink-using-gep", cl::Hidden, cl::init(true),
143  cl::desc("Address sinking in CGP using GEPs."));
144 
146  "enable-andcmp-sinking", cl::Hidden, cl::init(true),
147  cl::desc("Enable sinkinig and/cmp into branches."));
148 
150  "disable-cgp-store-extract", cl::Hidden, cl::init(false),
151  cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
152 
154  "stress-cgp-store-extract", cl::Hidden, cl::init(false),
155  cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
156 
158  "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
159  cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
160  "CodeGenPrepare"));
161 
163  "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
164  cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
165  "optimization in CodeGenPrepare"));
166 
168  "disable-preheader-prot", cl::Hidden, cl::init(false),
169  cl::desc("Disable protection against removing loop preheaders"));
170 
172  "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore,
173  cl::desc("Use profile info to add section prefix for hot/cold functions"));
174 
176  "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
177  cl::desc("Skip merging empty blocks if (frequency of empty block) / "
178  "(frequency of destination block) is greater than this ratio"));
179 
181  "force-split-store", cl::Hidden, cl::init(false),
182  cl::desc("Force store splitting no matter what the target query says."));
183 
184 static cl::opt<bool>
185 EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
186  cl::desc("Enable merging of redundant sexts when one is dominating"
187  " the other."), cl::init(true));
188 
190  "disable-complex-addr-modes", cl::Hidden, cl::init(false),
191  cl::desc("Disables combining addressing modes with different parts "
192  "in optimizeMemoryInst."));
193 
194 static cl::opt<bool>
195 AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
196  cl::desc("Allow creation of Phis in Address sinking."));
197 
198 static cl::opt<bool>
199 AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true),
200  cl::desc("Allow creation of selects in Address sinking."));
201 
203  "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
204  cl::desc("Allow combining of BaseReg field in Address sinking."));
205 
207  "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
208  cl::desc("Allow combining of BaseGV field in Address sinking."));
209 
211  "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
212  cl::desc("Allow combining of BaseOffs field in Address sinking."));
213 
215  "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
216  cl::desc("Allow combining of ScaledReg field in Address sinking."));
217 
218 static cl::opt<bool>
219  EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
220  cl::init(true),
221  cl::desc("Enable splitting large offset of GEP."));
222 
223 namespace {
224 
225 enum ExtType {
226  ZeroExtension, // Zero extension has been seen.
227  SignExtension, // Sign extension has been seen.
228  BothExtension // This extension type is used if we saw sext after
229  // ZeroExtension had been set, or if we saw zext after
230  // SignExtension had been set. It makes the type
231  // information of a promoted instruction invalid.
232 };
233 
234 using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
235 using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
236 using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
237 using SExts = SmallVector<Instruction *, 16>;
238 using ValueToSExts = DenseMap<Value *, SExts>;
239 
240 class TypePromotionTransaction;
241 
242  class CodeGenPrepare : public FunctionPass {
243  const TargetMachine *TM = nullptr;
244  const TargetSubtargetInfo *SubtargetInfo;
245  const TargetLowering *TLI = nullptr;
246  const TargetRegisterInfo *TRI;
247  const TargetTransformInfo *TTI = nullptr;
248  const TargetLibraryInfo *TLInfo;
249  const LoopInfo *LI;
250  std::unique_ptr<BlockFrequencyInfo> BFI;
251  std::unique_ptr<BranchProbabilityInfo> BPI;
252 
253  /// As we scan instructions optimizing them, this is the next instruction
254  /// to optimize. Transforms that can invalidate this should update it.
255  BasicBlock::iterator CurInstIterator;
256 
257  /// Keeps track of non-local addresses that have been sunk into a block.
258  /// This allows us to avoid inserting duplicate code for blocks with
259  /// multiple load/stores of the same address. The usage of WeakTrackingVH
260  /// enables SunkAddrs to be treated as a cache whose entries can be
261  /// invalidated if a sunken address computation has been erased.
263 
264  /// Keeps track of all instructions inserted for the current function.
265  SetOfInstrs InsertedInsts;
266 
267  /// Keeps track of the type of the related instruction before their
268  /// promotion for the current function.
269  InstrToOrigTy PromotedInsts;
270 
271  /// Keep track of instructions removed during promotion.
272  SetOfInstrs RemovedInsts;
273 
274  /// Keep track of sext chains based on their initial value.
275  DenseMap<Value *, Instruction *> SeenChainsForSExt;
276 
277  /// Keep track of GEPs accessing the same data structures such as structs or
278  /// arrays that are candidates to be split later because of their large
279  /// size.
280  MapVector<
283  LargeOffsetGEPMap;
284 
285  /// Keep track of new GEP base after splitting the GEPs having large offset.
286  SmallSet<AssertingVH<Value>, 2> NewGEPBases;
287 
288  /// Map serial numbers to Large offset GEPs.
289  DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
290 
291  /// Keep track of SExt promoted.
292  ValueToSExts ValToSExtendedUses;
293 
294  /// True if CFG is modified in any way.
295  bool ModifiedDT;
296 
297  /// True if optimizing for size.
298  bool OptSize;
299 
300  /// DataLayout for the Function being processed.
301  const DataLayout *DL = nullptr;
302 
303  public:
304  static char ID; // Pass identification, replacement for typeid
305 
306  CodeGenPrepare() : FunctionPass(ID) {
308  }
309 
310  bool runOnFunction(Function &F) override;
311 
312  StringRef getPassName() const override { return "CodeGen Prepare"; }
313 
314  void getAnalysisUsage(AnalysisUsage &AU) const override {
315  // FIXME: When we can selectively preserve passes, preserve the domtree.
320  }
321 
322  private:
323  template <typename F>
324  void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
325  // Substituting can cause recursive simplifications, which can invalidate
326  // our iterator. Use a WeakTrackingVH to hold onto it in case this
327  // happens.
328  Value *CurValue = &*CurInstIterator;
329  WeakTrackingVH IterHandle(CurValue);
330 
331  f();
332 
333  // If the iterator instruction was recursively deleted, start over at the
334  // start of the block.
335  if (IterHandle != CurValue) {
336  CurInstIterator = BB->begin();
337  SunkAddrs.clear();
338  }
339  }
340 
341  bool eliminateFallThrough(Function &F);
342  bool eliminateMostlyEmptyBlocks(Function &F);
343  BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
344  bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
345  void eliminateMostlyEmptyBlock(BasicBlock *BB);
346  bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
347  bool isPreheader);
348  bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
349  bool optimizeInst(Instruction *I, bool &ModifiedDT);
350  bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
351  Type *AccessTy, unsigned AddrSpace);
352  bool optimizeInlineAsmInst(CallInst *CS);
353  bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
354  bool optimizeExt(Instruction *&I);
355  bool optimizeExtUses(Instruction *I);
356  bool optimizeLoadExt(LoadInst *Load);
357  bool optimizeSelectInst(SelectInst *SI);
358  bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
359  bool optimizeSwitchInst(SwitchInst *SI);
360  bool optimizeExtractElementInst(Instruction *Inst);
361  bool dupRetToEnableTailCallOpts(BasicBlock *BB);
362  bool placeDbgValues(Function &F);
363  bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
364  LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
365  bool tryToPromoteExts(TypePromotionTransaction &TPT,
366  const SmallVectorImpl<Instruction *> &Exts,
367  SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
368  unsigned CreatedInstsCost = 0);
369  bool mergeSExts(Function &F);
370  bool splitLargeGEPOffsets();
371  bool performAddressTypePromotion(
372  Instruction *&Inst,
373  bool AllowPromotionWithoutCommonHeader,
374  bool HasPromoted, TypePromotionTransaction &TPT,
375  SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
376  bool splitBranchCondition(Function &F);
377  bool simplifyOffsetableRelocate(Instruction &I);
378  };
379 
380 } // end anonymous namespace
381 
382 char CodeGenPrepare::ID = 0;
383 
384 INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,
385  "Optimize for code generation", false, false)
388  "Optimize for code generation", false, false)
389 
390 FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); }
391 
393  if (skipFunction(F))
394  return false;
395 
396  DL = &F.getParent()->getDataLayout();
397 
398  bool EverMadeChange = false;
399  // Clear per function information.
400  InsertedInsts.clear();
401  PromotedInsts.clear();
402 
403  ModifiedDT = false;
404  if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
405  TM = &TPC->getTM<TargetMachine>();
406  SubtargetInfo = TM->getSubtargetImpl(F);
407  TLI = SubtargetInfo->getTargetLowering();
408  TRI = SubtargetInfo->getRegisterInfo();
409  }
410  TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
411  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
412  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
413  BPI.reset(new BranchProbabilityInfo(F, *LI));
414  BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
415  OptSize = F.optForSize();
416 
417  ProfileSummaryInfo *PSI =
418  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
420  if (PSI->isFunctionHotInCallGraph(&F, *BFI))
421  F.setSectionPrefix(".hot");
422  else if (PSI->isFunctionColdInCallGraph(&F, *BFI))
423  F.setSectionPrefix(".unlikely");
424  }
425 
426  /// This optimization identifies DIV instructions that can be
427  /// profitably bypassed and carried out with a shorter, faster divide.
428  if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI &&
429  TLI->isSlowDivBypassed()) {
430  const DenseMap<unsigned int, unsigned int> &BypassWidths =
431  TLI->getBypassSlowDivWidths();
432  BasicBlock* BB = &*F.begin();
433  while (BB != nullptr) {
434  // bypassSlowDivision may create new BBs, but we don't want to reapply the
435  // optimization to those blocks.
436  BasicBlock* Next = BB->getNextNode();
437  EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
438  BB = Next;
439  }
440  }
441 
442  // Eliminate blocks that contain only PHI nodes and an
443  // unconditional branch.
444  EverMadeChange |= eliminateMostlyEmptyBlocks(F);
445 
446  if (!DisableBranchOpts)
447  EverMadeChange |= splitBranchCondition(F);
448 
449  // Split some critical edges where one of the sources is an indirect branch,
450  // to help generate sane code for PHIs involving such edges.
451  EverMadeChange |= SplitIndirectBrCriticalEdges(F);
452 
453  bool MadeChange = true;
454  while (MadeChange) {
455  MadeChange = false;
456  for (Function::iterator I = F.begin(); I != F.end(); ) {
457  BasicBlock *BB = &*I++;
458  bool ModifiedDTOnIteration = false;
459  MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
460 
461  // Restart BB iteration if the dominator tree of the Function was changed
462  if (ModifiedDTOnIteration)
463  break;
464  }
465  if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
466  MadeChange |= mergeSExts(F);
467  if (!LargeOffsetGEPMap.empty())
468  MadeChange |= splitLargeGEPOffsets();
469 
470  // Really free removed instructions during promotion.
471  for (Instruction *I : RemovedInsts)
472  I->deleteValue();
473 
474  EverMadeChange |= MadeChange;
475  SeenChainsForSExt.clear();
476  ValToSExtendedUses.clear();
477  RemovedInsts.clear();
478  LargeOffsetGEPMap.clear();
479  LargeOffsetGEPID.clear();
480  }
481 
482  SunkAddrs.clear();
483 
484  if (!DisableBranchOpts) {
485  MadeChange = false;
486  // Use a set vector to get deterministic iteration order. The order the
487  // blocks are removed may affect whether or not PHI nodes in successors
488  // are removed.
490  for (BasicBlock &BB : F) {
491  SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));
492  MadeChange |= ConstantFoldTerminator(&BB, true);
493  if (!MadeChange) continue;
494 
496  II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
497  if (pred_begin(*II) == pred_end(*II))
498  WorkList.insert(*II);
499  }
500 
501  // Delete the dead blocks and any of their dead successors.
502  MadeChange |= !WorkList.empty();
503  while (!WorkList.empty()) {
504  BasicBlock *BB = WorkList.pop_back_val();
505  SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
506 
507  DeleteDeadBlock(BB);
508 
510  II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
511  if (pred_begin(*II) == pred_end(*II))
512  WorkList.insert(*II);
513  }
514 
515  // Merge pairs of basic blocks with unconditional branches, connected by
516  // a single edge.
517  if (EverMadeChange || MadeChange)
518  MadeChange |= eliminateFallThrough(F);
519 
520  EverMadeChange |= MadeChange;
521  }
522 
523  if (!DisableGCOpts) {
524  SmallVector<Instruction *, 2> Statepoints;
525  for (BasicBlock &BB : F)
526  for (Instruction &I : BB)
527  if (isStatepoint(I))
528  Statepoints.push_back(&I);
529  for (auto &I : Statepoints)
530  EverMadeChange |= simplifyOffsetableRelocate(*I);
531  }
532 
533  // Do this last to clean up use-before-def scenarios introduced by other
534  // preparatory transforms.
535  EverMadeChange |= placeDbgValues(F);
536 
537  return EverMadeChange;
538 }
539 
540 /// Merge basic blocks which are connected by a single edge, where one of the
541 /// basic blocks has a single successor pointing to the other basic block,
542 /// which has a single predecessor.
543 bool CodeGenPrepare::eliminateFallThrough(Function &F) {
544  bool Changed = false;
545  // Scan all of the blocks in the function, except for the entry block.
546  // Use a temporary array to avoid iterator being invalidated when
547  // deleting blocks.
549  for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
550  Blocks.push_back(&Block);
551 
552  for (auto &Block : Blocks) {
553  auto *BB = cast_or_null<BasicBlock>(Block);
554  if (!BB)
555  continue;
556  // If the destination block has a single pred, then this is a trivial
557  // edge, just collapse it.
558  BasicBlock *SinglePred = BB->getSinglePredecessor();
559 
560  // Don't merge if BB's address is taken.
561  if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
562 
563  BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
564  if (Term && !Term->isConditional()) {
565  Changed = true;
566  LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
567 
568  // Merge BB into SinglePred and delete it.
570  }
571  }
572  return Changed;
573 }
574 
575 /// Find a destination block from BB if BB is mergeable empty block.
576 BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
577  // If this block doesn't end with an uncond branch, ignore it.
579  if (!BI || !BI->isUnconditional())
580  return nullptr;
581 
582  // If the instruction before the branch (skipping debug info) isn't a phi
583  // node, then other stuff is happening here.
584  BasicBlock::iterator BBI = BI->getIterator();
585  if (BBI != BB->begin()) {
586  --BBI;
587  while (isa<DbgInfoIntrinsic>(BBI)) {
588  if (BBI == BB->begin())
589  break;
590  --BBI;
591  }
592  if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
593  return nullptr;
594  }
595 
596  // Do not break infinite loops.
597  BasicBlock *DestBB = BI->getSuccessor(0);
598  if (DestBB == BB)
599  return nullptr;
600 
601  if (!canMergeBlocks(BB, DestBB))
602  DestBB = nullptr;
603 
604  return DestBB;
605 }
606 
607 /// Eliminate blocks that contain only PHI nodes, debug info directives, and an
608 /// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
609 /// edges in ways that are non-optimal for isel. Start by eliminating these
610 /// blocks so we can split them the way we want them.
611 bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
613  SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
614  while (!LoopList.empty()) {
615  Loop *L = LoopList.pop_back_val();
616  LoopList.insert(LoopList.end(), L->begin(), L->end());
617  if (BasicBlock *Preheader = L->getLoopPreheader())
618  Preheaders.insert(Preheader);
619  }
620 
621  bool MadeChange = false;
622  // Copy blocks into a temporary array to avoid iterator invalidation issues
623  // as we remove them.
624  // Note that this intentionally skips the entry block.
626  for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
627  Blocks.push_back(&Block);
628 
629  for (auto &Block : Blocks) {
630  BasicBlock *BB = cast_or_null<BasicBlock>(Block);
631  if (!BB)
632  continue;
633  BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
634  if (!DestBB ||
635  !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
636  continue;
637 
638  eliminateMostlyEmptyBlock(BB);
639  MadeChange = true;
640  }
641  return MadeChange;
642 }
643 
644 bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
645  BasicBlock *DestBB,
646  bool isPreheader) {
647  // Do not delete loop preheaders if doing so would create a critical edge.
648  // Loop preheaders can be good locations to spill registers. If the
649  // preheader is deleted and we create a critical edge, registers may be
650  // spilled in the loop body instead.
651  if (!DisablePreheaderProtect && isPreheader &&
652  !(BB->getSinglePredecessor() &&
654  return false;
655 
656  // Try to skip merging if the unique predecessor of BB is terminated by a
657  // switch or indirect branch instruction, and BB is used as an incoming block
658  // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
659  // add COPY instructions in the predecessor of BB instead of BB (if it is not
660  // merged). Note that the critical edge created by merging such blocks wont be
661  // split in MachineSink because the jump table is not analyzable. By keeping
662  // such empty block (BB), ISel will place COPY instructions in BB, not in the
663  // predecessor of BB.
664  BasicBlock *Pred = BB->getUniquePredecessor();
665  if (!Pred ||
666  !(isa<SwitchInst>(Pred->getTerminator()) ||
667  isa<IndirectBrInst>(Pred->getTerminator())))
668  return true;
669 
670  if (BB->getTerminator() != BB->getFirstNonPHIOrDbg())
671  return true;
672 
673  // We use a simple cost heuristic which determine skipping merging is
674  // profitable if the cost of skipping merging is less than the cost of
675  // merging : Cost(skipping merging) < Cost(merging BB), where the
676  // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
677  // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
678  // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
679  // Freq(Pred) / Freq(BB) > 2.
680  // Note that if there are multiple empty blocks sharing the same incoming
681  // value for the PHIs in the DestBB, we consider them together. In such
682  // case, Cost(merging BB) will be the sum of their frequencies.
683 
684  if (!isa<PHINode>(DestBB->begin()))
685  return true;
686 
687  SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
688 
689  // Find all other incoming blocks from which incoming values of all PHIs in
690  // DestBB are the same as the ones from BB.
691  for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E;
692  ++PI) {
693  BasicBlock *DestBBPred = *PI;
694  if (DestBBPred == BB)
695  continue;
696 
697  if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
698  return DestPN.getIncomingValueForBlock(BB) ==
699  DestPN.getIncomingValueForBlock(DestBBPred);
700  }))
701  SameIncomingValueBBs.insert(DestBBPred);
702  }
703 
704  // See if all BB's incoming values are same as the value from Pred. In this
705  // case, no reason to skip merging because COPYs are expected to be place in
706  // Pred already.
707  if (SameIncomingValueBBs.count(Pred))
708  return true;
709 
710  BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
711  BlockFrequency BBFreq = BFI->getBlockFreq(BB);
712 
713  for (auto SameValueBB : SameIncomingValueBBs)
714  if (SameValueBB->getUniquePredecessor() == Pred &&
715  DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
716  BBFreq += BFI->getBlockFreq(SameValueBB);
717 
718  return PredFreq.getFrequency() <=
720 }
721 
722 /// Return true if we can merge BB into DestBB if there is a single
723 /// unconditional branch between them, and BB contains no other non-phi
724 /// instructions.
725 bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
726  const BasicBlock *DestBB) const {
727  // We only want to eliminate blocks whose phi nodes are used by phi nodes in
728  // the successor. If there are more complex condition (e.g. preheaders),
729  // don't mess around with them.
730  for (const PHINode &PN : BB->phis()) {
731  for (const User *U : PN.users()) {
732  const Instruction *UI = cast<Instruction>(U);
733  if (UI->getParent() != DestBB || !isa<PHINode>(UI))
734  return false;
735  // If User is inside DestBB block and it is a PHINode then check
736  // incoming value. If incoming value is not from BB then this is
737  // a complex condition (e.g. preheaders) we want to avoid here.
738  if (UI->getParent() == DestBB) {
739  if (const PHINode *UPN = dyn_cast<PHINode>(UI))
740  for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
741  Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
742  if (Insn && Insn->getParent() == BB &&
743  Insn->getParent() != UPN->getIncomingBlock(I))
744  return false;
745  }
746  }
747  }
748  }
749 
750  // If BB and DestBB contain any common predecessors, then the phi nodes in BB
751  // and DestBB may have conflicting incoming values for the block. If so, we
752  // can't merge the block.
753  const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
754  if (!DestBBPN) return true; // no conflict.
755 
756  // Collect the preds of BB.
758  if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
759  // It is faster to get preds from a PHI than with pred_iterator.
760  for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
761  BBPreds.insert(BBPN->getIncomingBlock(i));
762  } else {
763  BBPreds.insert(pred_begin(BB), pred_end(BB));
764  }
765 
766  // Walk the preds of DestBB.
767  for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
768  BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
769  if (BBPreds.count(Pred)) { // Common predecessor?
770  for (const PHINode &PN : DestBB->phis()) {
771  const Value *V1 = PN.getIncomingValueForBlock(Pred);
772  const Value *V2 = PN.getIncomingValueForBlock(BB);
773 
774  // If V2 is a phi node in BB, look up what the mapped value will be.
775  if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
776  if (V2PN->getParent() == BB)
777  V2 = V2PN->getIncomingValueForBlock(Pred);
778 
779  // If there is a conflict, bail out.
780  if (V1 != V2) return false;
781  }
782  }
783  }
784 
785  return true;
786 }
787 
788 /// Eliminate a basic block that has only phi's and an unconditional branch in
789 /// it.
790 void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
791  BranchInst *BI = cast<BranchInst>(BB->getTerminator());
792  BasicBlock *DestBB = BI->getSuccessor(0);
793 
794  LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
795  << *BB << *DestBB);
796 
797  // If the destination block has a single pred, then this is a trivial edge,
798  // just collapse it.
799  if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
800  if (SinglePred != DestBB) {
801  assert(SinglePred == BB &&
802  "Single predecessor not the same as predecessor");
803  // Merge DestBB into SinglePred/BB and delete it.
805  // Note: BB(=SinglePred) will not be deleted on this path.
806  // DestBB(=its single successor) is the one that was deleted.
807  LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
808  return;
809  }
810  }
811 
812  // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
813  // to handle the new incoming edges it is about to have.
814  for (PHINode &PN : DestBB->phis()) {
815  // Remove the incoming value for BB, and remember it.
816  Value *InVal = PN.removeIncomingValue(BB, false);
817 
818  // Two options: either the InVal is a phi node defined in BB or it is some
819  // value that dominates BB.
820  PHINode *InValPhi = dyn_cast<PHINode>(InVal);
821  if (InValPhi && InValPhi->getParent() == BB) {
822  // Add all of the input values of the input PHI as inputs of this phi.
823  for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
824  PN.addIncoming(InValPhi->getIncomingValue(i),
825  InValPhi->getIncomingBlock(i));
826  } else {
827  // Otherwise, add one instance of the dominating value for each edge that
828  // we will be adding.
829  if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
830  for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
831  PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
832  } else {
833  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
834  PN.addIncoming(InVal, *PI);
835  }
836  }
837  }
838 
839  // The PHIs are now updated, change everything that refers to BB to use
840  // DestBB and remove BB.
841  BB->replaceAllUsesWith(DestBB);
842  BB->eraseFromParent();
843  ++NumBlocksElim;
844 
845  LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
846 }
847 
848 // Computes a map of base pointer relocation instructions to corresponding
849 // derived pointer relocation instructions given a vector of all relocate calls
851  const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
853  &RelocateInstMap) {
854  // Collect information in two maps: one primarily for locating the base object
855  // while filling the second map; the second map is the final structure holding
856  // a mapping between Base and corresponding Derived relocate calls
858  for (auto *ThisRelocate : AllRelocateCalls) {
859  auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
860  ThisRelocate->getDerivedPtrIndex());
861  RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
862  }
863  for (auto &Item : RelocateIdxMap) {
864  std::pair<unsigned, unsigned> Key = Item.first;
865  if (Key.first == Key.second)
866  // Base relocation: nothing to insert
867  continue;
868 
869  GCRelocateInst *I = Item.second;
870  auto BaseKey = std::make_pair(Key.first, Key.first);
871 
872  // We're iterating over RelocateIdxMap so we cannot modify it.
873  auto MaybeBase = RelocateIdxMap.find(BaseKey);
874  if (MaybeBase == RelocateIdxMap.end())
875  // TODO: We might want to insert a new base object relocate and gep off
876  // that, if there are enough derived object relocates.
877  continue;
878 
879  RelocateInstMap[MaybeBase->second].push_back(I);
880  }
881 }
882 
883 // Accepts a GEP and extracts the operands into a vector provided they're all
884 // small integer constants
886  SmallVectorImpl<Value *> &OffsetV) {
887  for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
888  // Only accept small constant integer operands
889  auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
890  if (!Op || Op->getZExtValue() > 20)
891  return false;
892  }
893 
894  for (unsigned i = 1; i < GEP->getNumOperands(); i++)
895  OffsetV.push_back(GEP->getOperand(i));
896  return true;
897 }
898 
899 // Takes a RelocatedBase (base pointer relocation instruction) and Targets to
900 // replace, computes a replacement, and affects it.
901 static bool
903  const SmallVectorImpl<GCRelocateInst *> &Targets) {
904  bool MadeChange = false;
905  // We must ensure the relocation of derived pointer is defined after
906  // relocation of base pointer. If we find a relocation corresponding to base
907  // defined earlier than relocation of base then we move relocation of base
908  // right before found relocation. We consider only relocation in the same
909  // basic block as relocation of base. Relocations from other basic block will
910  // be skipped by optimization and we do not care about them.
911  for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
912  &*R != RelocatedBase; ++R)
913  if (auto RI = dyn_cast<GCRelocateInst>(R))
914  if (RI->getStatepoint() == RelocatedBase->getStatepoint())
915  if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
916  RelocatedBase->moveBefore(RI);
917  break;
918  }
919 
920  for (GCRelocateInst *ToReplace : Targets) {
921  assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
922  "Not relocating a derived object of the original base object");
923  if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
924  // A duplicate relocate call. TODO: coalesce duplicates.
925  continue;
926  }
927 
928  if (RelocatedBase->getParent() != ToReplace->getParent()) {
929  // Base and derived relocates are in different basic blocks.
930  // In this case transform is only valid when base dominates derived
931  // relocate. However it would be too expensive to check dominance
932  // for each such relocate, so we skip the whole transformation.
933  continue;
934  }
935 
936  Value *Base = ToReplace->getBasePtr();
937  auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
938  if (!Derived || Derived->getPointerOperand() != Base)
939  continue;
940 
941  SmallVector<Value *, 2> OffsetV;
942  if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
943  continue;
944 
945  // Create a Builder and replace the target callsite with a gep
946  assert(RelocatedBase->getNextNode() &&
947  "Should always have one since it's not a terminator");
948 
949  // Insert after RelocatedBase
950  IRBuilder<> Builder(RelocatedBase->getNextNode());
951  Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
952 
953  // If gc_relocate does not match the actual type, cast it to the right type.
954  // In theory, there must be a bitcast after gc_relocate if the type does not
955  // match, and we should reuse it to get the derived pointer. But it could be
956  // cases like this:
957  // bb1:
958  // ...
959  // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
960  // br label %merge
961  //
962  // bb2:
963  // ...
964  // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
965  // br label %merge
966  //
967  // merge:
968  // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
969  // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
970  //
971  // In this case, we can not find the bitcast any more. So we insert a new bitcast
972  // no matter there is already one or not. In this way, we can handle all cases, and
973  // the extra bitcast should be optimized away in later passes.
974  Value *ActualRelocatedBase = RelocatedBase;
975  if (RelocatedBase->getType() != Base->getType()) {
976  ActualRelocatedBase =
977  Builder.CreateBitCast(RelocatedBase, Base->getType());
978  }
979  Value *Replacement = Builder.CreateGEP(
980  Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
981  Replacement->takeName(ToReplace);
982  // If the newly generated derived pointer's type does not match the original derived
983  // pointer's type, cast the new derived pointer to match it. Same reasoning as above.
984  Value *ActualReplacement = Replacement;
985  if (Replacement->getType() != ToReplace->getType()) {
986  ActualReplacement =
987  Builder.CreateBitCast(Replacement, ToReplace->getType());
988  }
989  ToReplace->replaceAllUsesWith(ActualReplacement);
990  ToReplace->eraseFromParent();
991 
992  MadeChange = true;
993  }
994  return MadeChange;
995 }
996 
997 // Turns this:
998 //
999 // %base = ...
1000 // %ptr = gep %base + 15
1001 // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1002 // %base' = relocate(%tok, i32 4, i32 4)
1003 // %ptr' = relocate(%tok, i32 4, i32 5)
1004 // %val = load %ptr'
1005 //
1006 // into this:
1007 //
1008 // %base = ...
1009 // %ptr = gep %base + 15
1010 // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1011 // %base' = gc.relocate(%tok, i32 4, i32 4)
1012 // %ptr' = gep %base' + 15
1013 // %val = load %ptr'
1014 bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
1015  bool MadeChange = false;
1016  SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1017 
1018  for (auto *U : I.users())
1019  if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1020  // Collect all the relocate calls associated with a statepoint
1021  AllRelocateCalls.push_back(Relocate);
1022 
1023  // We need atleast one base pointer relocation + one derived pointer
1024  // relocation to mangle
1025  if (AllRelocateCalls.size() < 2)
1026  return false;
1027 
1028  // RelocateInstMap is a mapping from the base relocate instruction to the
1029  // corresponding derived relocate instructions
1031  computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1032  if (RelocateInstMap.empty())
1033  return false;
1034 
1035  for (auto &Item : RelocateInstMap)
1036  // Item.first is the RelocatedBase to offset against
1037  // Item.second is the vector of Targets to replace
1038  MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1039  return MadeChange;
1040 }
1041 
1042 /// SinkCast - Sink the specified cast instruction into its user blocks
1043 static bool SinkCast(CastInst *CI) {
1044  BasicBlock *DefBB = CI->getParent();
1045 
1046  /// InsertedCasts - Only insert a cast in each block once.
1047  DenseMap<BasicBlock*, CastInst*> InsertedCasts;
1048 
1049  bool MadeChange = false;
1050  for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1051  UI != E; ) {
1052  Use &TheUse = UI.getUse();
1053  Instruction *User = cast<Instruction>(*UI);
1054 
1055  // Figure out which BB this cast is used in. For PHI's this is the
1056  // appropriate predecessor block.
1057  BasicBlock *UserBB = User->getParent();
1058  if (PHINode *PN = dyn_cast<PHINode>(User)) {
1059  UserBB = PN->getIncomingBlock(TheUse);
1060  }
1061 
1062  // Preincrement use iterator so we don't invalidate it.
1063  ++UI;
1064 
1065  // The first insertion point of a block containing an EH pad is after the
1066  // pad. If the pad is the user, we cannot sink the cast past the pad.
1067  if (User->isEHPad())
1068  continue;
1069 
1070  // If the block selected to receive the cast is an EH pad that does not
1071  // allow non-PHI instructions before the terminator, we can't sink the
1072  // cast.
1073  if (UserBB->getTerminator()->isEHPad())
1074  continue;
1075 
1076  // If this user is in the same block as the cast, don't change the cast.
1077  if (UserBB == DefBB) continue;
1078 
1079  // If we have already inserted a cast into this block, use it.
1080  CastInst *&InsertedCast = InsertedCasts[UserBB];
1081 
1082  if (!InsertedCast) {
1083  BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1084  assert(InsertPt != UserBB->end());
1085  InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
1086  CI->getType(), "", &*InsertPt);
1087  InsertedCast->setDebugLoc(CI->getDebugLoc());
1088  }
1089 
1090  // Replace a use of the cast with a use of the new cast.
1091  TheUse = InsertedCast;
1092  MadeChange = true;
1093  ++NumCastUses;
1094  }
1095 
1096  // If we removed all uses, nuke the cast.
1097  if (CI->use_empty()) {
1098  salvageDebugInfo(*CI);
1099  CI->eraseFromParent();
1100  MadeChange = true;
1101  }
1102 
1103  return MadeChange;
1104 }
1105 
1106 /// If the specified cast instruction is a noop copy (e.g. it's casting from
1107 /// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1108 /// reduce the number of virtual registers that must be created and coalesced.
1109 ///
1110 /// Return true if any changes are made.
1112  const DataLayout &DL) {
1113  // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1114  // than sinking only nop casts, but is helpful on some platforms.
1115  if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1116  if (!TLI.isCheapAddrSpaceCast(ASC->getSrcAddressSpace(),
1117  ASC->getDestAddressSpace()))
1118  return false;
1119  }
1120 
1121  // If this is a noop copy,
1122  EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1123  EVT DstVT = TLI.getValueType(DL, CI->getType());
1124 
1125  // This is an fp<->int conversion?
1126  if (SrcVT.isInteger() != DstVT.isInteger())
1127  return false;
1128 
1129  // If this is an extension, it will be a zero or sign extension, which
1130  // isn't a noop.
1131  if (SrcVT.bitsLT(DstVT)) return false;
1132 
1133  // If these values will be promoted, find out what they will be promoted
1134  // to. This helps us consider truncates on PPC as noop copies when they
1135  // are.
1136  if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1138  SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1139  if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1141  DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1142 
1143  // If, after promotion, these are the same types, this is a noop copy.
1144  if (SrcVT != DstVT)
1145  return false;
1146 
1147  return SinkCast(CI);
1148 }
1149 
1150 /// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if
1151 /// possible.
1152 ///
1153 /// Return true if any changes were made.
1155  Value *A, *B;
1156  Instruction *AddI;
1157  if (!match(CI,
1159  return false;
1160 
1161  Type *Ty = AddI->getType();
1162  if (!isa<IntegerType>(Ty))
1163  return false;
1164 
1165  // We don't want to move around uses of condition values this late, so we we
1166  // check if it is legal to create the call to the intrinsic in the basic
1167  // block containing the icmp:
1168 
1169  if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
1170  return false;
1171 
1172 #ifndef NDEBUG
1173  // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
1174  // for now:
1175  if (AddI->hasOneUse())
1176  assert(*AddI->user_begin() == CI && "expected!");
1177 #endif
1178 
1179  Module *M = CI->getModule();
1180  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
1181 
1182  auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
1183 
1184  DebugLoc Loc = CI->getDebugLoc();
1185  auto *UAddWithOverflow =
1186  CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
1187  UAddWithOverflow->setDebugLoc(Loc);
1188  auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
1189  UAdd->setDebugLoc(Loc);
1190  auto *Overflow =
1191  ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
1192  Overflow->setDebugLoc(Loc);
1193 
1194  CI->replaceAllUsesWith(Overflow);
1195  AddI->replaceAllUsesWith(UAdd);
1196  CI->eraseFromParent();
1197  AddI->eraseFromParent();
1198  return true;
1199 }
1200 
1201 /// Sink the given CmpInst into user blocks to reduce the number of virtual
1202 /// registers that must be created and coalesced. This is a clear win except on
1203 /// targets with multiple condition code registers (PowerPC), where it might
1204 /// lose; some adjustment may be wanted there.
1205 ///
1206 /// Return true if any changes are made.
1207 static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
1208  BasicBlock *DefBB = CI->getParent();
1209 
1210  // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1211  if (TLI && TLI->useSoftFloat() && isa<FCmpInst>(CI))
1212  return false;
1213 
1214  // Only insert a cmp in each block once.
1215  DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
1216 
1217  bool MadeChange = false;
1218  for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1219  UI != E; ) {
1220  Use &TheUse = UI.getUse();
1221  Instruction *User = cast<Instruction>(*UI);
1222 
1223  // Preincrement use iterator so we don't invalidate it.
1224  ++UI;
1225 
1226  // Don't bother for PHI nodes.
1227  if (isa<PHINode>(User))
1228  continue;
1229 
1230  // Figure out which BB this cmp is used in.
1231  BasicBlock *UserBB = User->getParent();
1232 
1233  // If this user is in the same block as the cmp, don't change the cmp.
1234  if (UserBB == DefBB) continue;
1235 
1236  // If we have already inserted a cmp into this block, use it.
1237  CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1238 
1239  if (!InsertedCmp) {
1240  BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1241  assert(InsertPt != UserBB->end());
1242  InsertedCmp =
1243  CmpInst::Create(CI->getOpcode(), CI->getPredicate(),
1244  CI->getOperand(0), CI->getOperand(1), "", &*InsertPt);
1245  // Propagate the debug info.
1246  InsertedCmp->setDebugLoc(CI->getDebugLoc());
1247  }
1248 
1249  // Replace a use of the cmp with a use of the new cmp.
1250  TheUse = InsertedCmp;
1251  MadeChange = true;
1252  ++NumCmpUses;
1253  }
1254 
1255  // If we removed all uses, nuke the cmp.
1256  if (CI->use_empty()) {
1257  CI->eraseFromParent();
1258  MadeChange = true;
1259  }
1260 
1261  return MadeChange;
1262 }
1263 
1264 static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
1265  if (SinkCmpExpression(CI, TLI))
1266  return true;
1267 
1268  if (CombineUAddWithOverflow(CI))
1269  return true;
1270 
1271  return false;
1272 }
1273 
1274 /// Duplicate and sink the given 'and' instruction into user blocks where it is
1275 /// used in a compare to allow isel to generate better code for targets where
1276 /// this operation can be combined.
1277 ///
1278 /// Return true if any changes are made.
1280  const TargetLowering &TLI,
1281  SetOfInstrs &InsertedInsts) {
1282  // Double-check that we're not trying to optimize an instruction that was
1283  // already optimized by some other part of this pass.
1284  assert(!InsertedInsts.count(AndI) &&
1285  "Attempting to optimize already optimized and instruction");
1286  (void) InsertedInsts;
1287 
1288  // Nothing to do for single use in same basic block.
1289  if (AndI->hasOneUse() &&
1290  AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
1291  return false;
1292 
1293  // Try to avoid cases where sinking/duplicating is likely to increase register
1294  // pressure.
1295  if (!isa<ConstantInt>(AndI->getOperand(0)) &&
1296  !isa<ConstantInt>(AndI->getOperand(1)) &&
1297  AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
1298  return false;
1299 
1300  for (auto *U : AndI->users()) {
1301  Instruction *User = cast<Instruction>(U);
1302 
1303  // Only sink for and mask feeding icmp with 0.
1304  if (!isa<ICmpInst>(User))
1305  return false;
1306 
1307  auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
1308  if (!CmpC || !CmpC->isZero())
1309  return false;
1310  }
1311 
1312  if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
1313  return false;
1314 
1315  LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
1316  LLVM_DEBUG(AndI->getParent()->dump());
1317 
1318  // Push the 'and' into the same block as the icmp 0. There should only be
1319  // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
1320  // others, so we don't need to keep track of which BBs we insert into.
1321  for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
1322  UI != E; ) {
1323  Use &TheUse = UI.getUse();
1324  Instruction *User = cast<Instruction>(*UI);
1325 
1326  // Preincrement use iterator so we don't invalidate it.
1327  ++UI;
1328 
1329  LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
1330 
1331  // Keep the 'and' in the same place if the use is already in the same block.
1332  Instruction *InsertPt =
1333  User->getParent() == AndI->getParent() ? AndI : User;
1334  Instruction *InsertedAnd =
1335  BinaryOperator::Create(Instruction::And, AndI->getOperand(0),
1336  AndI->getOperand(1), "", InsertPt);
1337  // Propagate the debug info.
1338  InsertedAnd->setDebugLoc(AndI->getDebugLoc());
1339 
1340  // Replace a use of the 'and' with a use of the new 'and'.
1341  TheUse = InsertedAnd;
1342  ++NumAndUses;
1343  LLVM_DEBUG(User->getParent()->dump());
1344  }
1345 
1346  // We removed all uses, nuke the and.
1347  AndI->eraseFromParent();
1348  return true;
1349 }
1350 
1351 /// Check if the candidates could be combined with a shift instruction, which
1352 /// includes:
1353 /// 1. Truncate instruction
1354 /// 2. And instruction and the imm is a mask of the low bits:
1355 /// imm & (imm+1) == 0
1357  if (!isa<TruncInst>(User)) {
1358  if (User->getOpcode() != Instruction::And ||
1359  !isa<ConstantInt>(User->getOperand(1)))
1360  return false;
1361 
1362  const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
1363 
1364  if ((Cimm & (Cimm + 1)).getBoolValue())
1365  return false;
1366  }
1367  return true;
1368 }
1369 
1370 /// Sink both shift and truncate instruction to the use of truncate's BB.
1371 static bool
1374  const TargetLowering &TLI, const DataLayout &DL) {
1375  BasicBlock *UserBB = User->getParent();
1376  DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
1377  TruncInst *TruncI = dyn_cast<TruncInst>(User);
1378  bool MadeChange = false;
1379 
1380  for (Value::user_iterator TruncUI = TruncI->user_begin(),
1381  TruncE = TruncI->user_end();
1382  TruncUI != TruncE;) {
1383 
1384  Use &TruncTheUse = TruncUI.getUse();
1385  Instruction *TruncUser = cast<Instruction>(*TruncUI);
1386  // Preincrement use iterator so we don't invalidate it.
1387 
1388  ++TruncUI;
1389 
1390  int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
1391  if (!ISDOpcode)
1392  continue;
1393 
1394  // If the use is actually a legal node, there will not be an
1395  // implicit truncate.
1396  // FIXME: always querying the result type is just an
1397  // approximation; some nodes' legality is determined by the
1398  // operand or other means. There's no good way to find out though.
1399  if (TLI.isOperationLegalOrCustom(
1400  ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
1401  continue;
1402 
1403  // Don't bother for PHI nodes.
1404  if (isa<PHINode>(TruncUser))
1405  continue;
1406 
1407  BasicBlock *TruncUserBB = TruncUser->getParent();
1408 
1409  if (UserBB == TruncUserBB)
1410  continue;
1411 
1412  BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
1413  CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
1414 
1415  if (!InsertedShift && !InsertedTrunc) {
1416  BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
1417  assert(InsertPt != TruncUserBB->end());
1418  // Sink the shift
1419  if (ShiftI->getOpcode() == Instruction::AShr)
1420  InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
1421  "", &*InsertPt);
1422  else
1423  InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
1424  "", &*InsertPt);
1425  InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
1426 
1427  // Sink the trunc
1428  BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
1429  TruncInsertPt++;
1430  assert(TruncInsertPt != TruncUserBB->end());
1431 
1432  InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
1433  TruncI->getType(), "", &*TruncInsertPt);
1434  InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
1435 
1436  MadeChange = true;
1437 
1438  TruncTheUse = InsertedTrunc;
1439  }
1440  }
1441  return MadeChange;
1442 }
1443 
1444 /// Sink the shift *right* instruction into user blocks if the uses could
1445 /// potentially be combined with this shift instruction and generate BitExtract
1446 /// instruction. It will only be applied if the architecture supports BitExtract
1447 /// instruction. Here is an example:
1448 /// BB1:
1449 /// %x.extract.shift = lshr i64 %arg1, 32
1450 /// BB2:
1451 /// %x.extract.trunc = trunc i64 %x.extract.shift to i16
1452 /// ==>
1453 ///
1454 /// BB2:
1455 /// %x.extract.shift.1 = lshr i64 %arg1, 32
1456 /// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
1457 ///
1458 /// CodeGen will recognize the pattern in BB2 and generate BitExtract
1459 /// instruction.
1460 /// Return true if any changes are made.
1462  const TargetLowering &TLI,
1463  const DataLayout &DL) {
1464  BasicBlock *DefBB = ShiftI->getParent();
1465 
1466  /// Only insert instructions in each block once.
1468 
1469  bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
1470 
1471  bool MadeChange = false;
1472  for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
1473  UI != E;) {
1474  Use &TheUse = UI.getUse();
1475  Instruction *User = cast<Instruction>(*UI);
1476  // Preincrement use iterator so we don't invalidate it.
1477  ++UI;
1478 
1479  // Don't bother for PHI nodes.
1480  if (isa<PHINode>(User))
1481  continue;
1482 
1483  if (!isExtractBitsCandidateUse(User))
1484  continue;
1485 
1486  BasicBlock *UserBB = User->getParent();
1487 
1488  if (UserBB == DefBB) {
1489  // If the shift and truncate instruction are in the same BB. The use of
1490  // the truncate(TruncUse) may still introduce another truncate if not
1491  // legal. In this case, we would like to sink both shift and truncate
1492  // instruction to the BB of TruncUse.
1493  // for example:
1494  // BB1:
1495  // i64 shift.result = lshr i64 opnd, imm
1496  // trunc.result = trunc shift.result to i16
1497  //
1498  // BB2:
1499  // ----> We will have an implicit truncate here if the architecture does
1500  // not have i16 compare.
1501  // cmp i16 trunc.result, opnd2
1502  //
1503  if (isa<TruncInst>(User) && shiftIsLegal
1504  // If the type of the truncate is legal, no truncate will be
1505  // introduced in other basic blocks.
1506  &&
1507  (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
1508  MadeChange =
1509  SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
1510 
1511  continue;
1512  }
1513  // If we have already inserted a shift into this block, use it.
1514  BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
1515 
1516  if (!InsertedShift) {
1517  BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1518  assert(InsertPt != UserBB->end());
1519 
1520  if (ShiftI->getOpcode() == Instruction::AShr)
1521  InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
1522  "", &*InsertPt);
1523  else
1524  InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
1525  "", &*InsertPt);
1526  InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
1527 
1528  MadeChange = true;
1529  }
1530 
1531  // Replace a use of the shift with a use of the new shift.
1532  TheUse = InsertedShift;
1533  }
1534 
1535  // If we removed all uses, nuke the shift.
1536  if (ShiftI->use_empty()) {
1537  salvageDebugInfo(*ShiftI);
1538  ShiftI->eraseFromParent();
1539  }
1540 
1541  return MadeChange;
1542 }
1543 
1544 /// If counting leading or trailing zeros is an expensive operation and a zero
1545 /// input is defined, add a check for zero to avoid calling the intrinsic.
1546 ///
1547 /// We want to transform:
1548 /// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
1549 ///
1550 /// into:
1551 /// entry:
1552 /// %cmpz = icmp eq i64 %A, 0
1553 /// br i1 %cmpz, label %cond.end, label %cond.false
1554 /// cond.false:
1555 /// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
1556 /// br label %cond.end
1557 /// cond.end:
1558 /// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
1559 ///
1560 /// If the transform is performed, return true and set ModifiedDT to true.
1561 static bool despeculateCountZeros(IntrinsicInst *CountZeros,
1562  const TargetLowering *TLI,
1563  const DataLayout *DL,
1564  bool &ModifiedDT) {
1565  if (!TLI || !DL)
1566  return false;
1567 
1568  // If a zero input is undefined, it doesn't make sense to despeculate that.
1569  if (match(CountZeros->getOperand(1), m_One()))
1570  return false;
1571 
1572  // If it's cheap to speculate, there's nothing to do.
1573  auto IntrinsicID = CountZeros->getIntrinsicID();
1574  if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) ||
1575  (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz()))
1576  return false;
1577 
1578  // Only handle legal scalar cases. Anything else requires too much work.
1579  Type *Ty = CountZeros->getType();
1580  unsigned SizeInBits = Ty->getPrimitiveSizeInBits();
1581  if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
1582  return false;
1583 
1584  // The intrinsic will be sunk behind a compare against zero and branch.
1585  BasicBlock *StartBlock = CountZeros->getParent();
1586  BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
1587 
1588  // Create another block after the count zero intrinsic. A PHI will be added
1589  // in this block to select the result of the intrinsic or the bit-width
1590  // constant if the input to the intrinsic is zero.
1591  BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
1592  BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
1593 
1594  // Set up a builder to create a compare, conditional branch, and PHI.
1595  IRBuilder<> Builder(CountZeros->getContext());
1596  Builder.SetInsertPoint(StartBlock->getTerminator());
1597  Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
1598 
1599  // Replace the unconditional branch that was created by the first split with
1600  // a compare against zero and a conditional branch.
1601  Value *Zero = Constant::getNullValue(Ty);
1602  Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz");
1603  Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
1604  StartBlock->getTerminator()->eraseFromParent();
1605 
1606  // Create a PHI in the end block to select either the output of the intrinsic
1607  // or the bit width of the operand.
1608  Builder.SetInsertPoint(&EndBlock->front());
1609  PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
1610  CountZeros->replaceAllUsesWith(PN);
1611  Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
1612  PN->addIncoming(BitWidth, StartBlock);
1613  PN->addIncoming(CountZeros, CallBlock);
1614 
1615  // We are explicitly handling the zero case, so we can set the intrinsic's
1616  // undefined zero argument to 'true'. This will also prevent reprocessing the
1617  // intrinsic; we only despeculate when a zero input is defined.
1618  CountZeros->setArgOperand(1, Builder.getTrue());
1619  ModifiedDT = true;
1620  return true;
1621 }
1622 
1623 bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
1624  BasicBlock *BB = CI->getParent();
1625 
1626  // Lower inline assembly if we can.
1627  // If we found an inline asm expession, and if the target knows how to
1628  // lower it to normal LLVM code, do so now.
1629  if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
1630  if (TLI->ExpandInlineAsm(CI)) {
1631  // Avoid invalidating the iterator.
1632  CurInstIterator = BB->begin();
1633  // Avoid processing instructions out of order, which could cause
1634  // reuse before a value is defined.
1635  SunkAddrs.clear();
1636  return true;
1637  }
1638  // Sink address computing for memory operands into the block.
1639  if (optimizeInlineAsmInst(CI))
1640  return true;
1641  }
1642 
1643  // Align the pointer arguments to this call if the target thinks it's a good
1644  // idea
1645  unsigned MinSize, PrefAlign;
1646  if (TLI && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
1647  for (auto &Arg : CI->arg_operands()) {
1648  // We want to align both objects whose address is used directly and
1649  // objects whose address is used in casts and GEPs, though it only makes
1650  // sense for GEPs if the offset is a multiple of the desired alignment and
1651  // if size - offset meets the size threshold.
1652  if (!Arg->getType()->isPointerTy())
1653  continue;
1654  APInt Offset(DL->getIndexSizeInBits(
1655  cast<PointerType>(Arg->getType())->getAddressSpace()),
1656  0);
1658  uint64_t Offset2 = Offset.getLimitedValue();
1659  if ((Offset2 & (PrefAlign-1)) != 0)
1660  continue;
1661  AllocaInst *AI;
1662  if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
1663  DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
1664  AI->setAlignment(PrefAlign);
1665  // Global variables can only be aligned if they are defined in this
1666  // object (i.e. they are uniquely initialized in this object), and
1667  // over-aligning global variables that have an explicit section is
1668  // forbidden.
1669  GlobalVariable *GV;
1670  if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
1671  GV->getPointerAlignment(*DL) < PrefAlign &&
1672  DL->getTypeAllocSize(GV->getValueType()) >=
1673  MinSize + Offset2)
1674  GV->setAlignment(PrefAlign);
1675  }
1676  // If this is a memcpy (or similar) then we may be able to improve the
1677  // alignment
1678  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
1679  unsigned DestAlign = getKnownAlignment(MI->getDest(), *DL);
1680  if (DestAlign > MI->getDestAlignment())
1681  MI->setDestAlignment(DestAlign);
1682  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
1683  unsigned SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
1684  if (SrcAlign > MTI->getSourceAlignment())
1685  MTI->setSourceAlignment(SrcAlign);
1686  }
1687  }
1688  }
1689 
1690  // If we have a cold call site, try to sink addressing computation into the
1691  // cold block. This interacts with our handling for loads and stores to
1692  // ensure that we can fold all uses of a potential addressing computation
1693  // into their uses. TODO: generalize this to work over profiling data
1694  if (!OptSize && CI->hasFnAttr(Attribute::Cold))
1695  for (auto &Arg : CI->arg_operands()) {
1696  if (!Arg->getType()->isPointerTy())
1697  continue;
1698  unsigned AS = Arg->getType()->getPointerAddressSpace();
1699  return optimizeMemoryInst(CI, Arg, Arg->getType(), AS);
1700  }
1701 
1703  if (II) {
1704  switch (II->getIntrinsicID()) {
1705  default: break;
1706  case Intrinsic::objectsize: {
1707  // Lower all uses of llvm.objectsize.*
1708  ConstantInt *RetVal =
1709  lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true);
1710 
1711  resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
1712  replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
1713  });
1714  return true;
1715  }
1716  case Intrinsic::is_constant: {
1717  // If is_constant hasn't folded away yet, lower it to false now.
1718  Constant *RetVal = ConstantInt::get(II->getType(), 0);
1719  resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
1720  replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
1721  });
1722  return true;
1723  }
1724  case Intrinsic::aarch64_stlxr:
1725  case Intrinsic::aarch64_stxr: {
1726  ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
1727  if (!ExtVal || !ExtVal->hasOneUse() ||
1728  ExtVal->getParent() == CI->getParent())
1729  return false;
1730  // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
1731  ExtVal->moveBefore(CI);
1732  // Mark this instruction as "inserted by CGP", so that other
1733  // optimizations don't touch it.
1734  InsertedInsts.insert(ExtVal);
1735  return true;
1736  }
1737  case Intrinsic::launder_invariant_group:
1738  case Intrinsic::strip_invariant_group: {
1739  Value *ArgVal = II->getArgOperand(0);
1740  auto it = LargeOffsetGEPMap.find(II);
1741  if (it != LargeOffsetGEPMap.end()) {
1742  // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
1743  // Make sure not to have to deal with iterator invalidation
1744  // after possibly adding ArgVal to LargeOffsetGEPMap.
1745  auto GEPs = std::move(it->second);
1746  LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
1747  LargeOffsetGEPMap.erase(II);
1748  }
1749 
1750  II->replaceAllUsesWith(ArgVal);
1751  II->eraseFromParent();
1752  return true;
1753  }
1754  case Intrinsic::cttz:
1755  case Intrinsic::ctlz:
1756  // If counting zeros is expensive, try to avoid it.
1757  return despeculateCountZeros(II, TLI, DL, ModifiedDT);
1758  }
1759 
1760  if (TLI) {
1761  SmallVector<Value*, 2> PtrOps;
1762  Type *AccessTy;
1763  if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
1764  while (!PtrOps.empty()) {
1765  Value *PtrVal = PtrOps.pop_back_val();
1766  unsigned AS = PtrVal->getType()->getPointerAddressSpace();
1767  if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
1768  return true;
1769  }
1770  }
1771  }
1772 
1773  // From here on out we're working with named functions.
1774  if (!CI->getCalledFunction()) return false;
1775 
1776  // Lower all default uses of _chk calls. This is very similar
1777  // to what InstCombineCalls does, but here we are only lowering calls
1778  // to fortified library functions (e.g. __memcpy_chk) that have the default
1779  // "don't know" as the objectsize. Anything else should be left alone.
1780  FortifiedLibCallSimplifier Simplifier(TLInfo, true);
1781  if (Value *V = Simplifier.optimizeCall(CI)) {
1782  CI->replaceAllUsesWith(V);
1783  CI->eraseFromParent();
1784  return true;
1785  }
1786 
1787  return false;
1788 }
1789 
1790 /// Look for opportunities to duplicate return instructions to the predecessor
1791 /// to enable tail call optimizations. The case it is currently looking for is:
1792 /// @code
1793 /// bb0:
1794 /// %tmp0 = tail call i32 @f0()
1795 /// br label %return
1796 /// bb1:
1797 /// %tmp1 = tail call i32 @f1()
1798 /// br label %return
1799 /// bb2:
1800 /// %tmp2 = tail call i32 @f2()
1801 /// br label %return
1802 /// return:
1803 /// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
1804 /// ret i32 %retval
1805 /// @endcode
1806 ///
1807 /// =>
1808 ///
1809 /// @code
1810 /// bb0:
1811 /// %tmp0 = tail call i32 @f0()
1812 /// ret i32 %tmp0
1813 /// bb1:
1814 /// %tmp1 = tail call i32 @f1()
1815 /// ret i32 %tmp1
1816 /// bb2:
1817 /// %tmp2 = tail call i32 @f2()
1818 /// ret i32 %tmp2
1819 /// @endcode
1820 bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
1821  if (!TLI)
1822  return false;
1823 
1824  ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
1825  if (!RetI)
1826  return false;
1827 
1828  PHINode *PN = nullptr;
1829  BitCastInst *BCI = nullptr;
1830  Value *V = RetI->getReturnValue();
1831  if (V) {
1832  BCI = dyn_cast<BitCastInst>(V);
1833  if (BCI)
1834  V = BCI->getOperand(0);
1835 
1836  PN = dyn_cast<PHINode>(V);
1837  if (!PN)
1838  return false;
1839  }
1840 
1841  if (PN && PN->getParent() != BB)
1842  return false;
1843 
1844  // Make sure there are no instructions between the PHI and return, or that the
1845  // return is the first instruction in the block.
1846  if (PN) {
1847  BasicBlock::iterator BI = BB->begin();
1848  do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
1849  if (&*BI == BCI)
1850  // Also skip over the bitcast.
1851  ++BI;
1852  if (&*BI != RetI)
1853  return false;
1854  } else {
1855  BasicBlock::iterator BI = BB->begin();
1856  while (isa<DbgInfoIntrinsic>(BI)) ++BI;
1857  if (&*BI != RetI)
1858  return false;
1859  }
1860 
1861  /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
1862  /// call.
1863  const Function *F = BB->getParent();
1864  SmallVector<CallInst*, 4> TailCalls;
1865  if (PN) {
1866  for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
1867  CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
1868  // Make sure the phi value is indeed produced by the tail call.
1869  if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
1870  TLI->mayBeEmittedAsTailCall(CI) &&
1871  attributesPermitTailCall(F, CI, RetI, *TLI))
1872  TailCalls.push_back(CI);
1873  }
1874  } else {
1875  SmallPtrSet<BasicBlock*, 4> VisitedBBs;
1876  for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
1877  if (!VisitedBBs.insert(*PI).second)
1878  continue;
1879 
1880  BasicBlock::InstListType &InstList = (*PI)->getInstList();
1881  BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
1882  BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
1883  do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
1884  if (RI == RE)
1885  continue;
1886 
1887  CallInst *CI = dyn_cast<CallInst>(&*RI);
1888  if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
1889  attributesPermitTailCall(F, CI, RetI, *TLI))
1890  TailCalls.push_back(CI);
1891  }
1892  }
1893 
1894  bool Changed = false;
1895  for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
1896  CallInst *CI = TailCalls[i];
1897  CallSite CS(CI);
1898 
1899  // Make sure the call instruction is followed by an unconditional branch to
1900  // the return block.
1901  BasicBlock *CallBB = CI->getParent();
1902  BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
1903  if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
1904  continue;
1905 
1906  // Duplicate the return into CallBB.
1907  (void)FoldReturnIntoUncondBranch(RetI, BB, CallBB);
1908  ModifiedDT = Changed = true;
1909  ++NumRetsDup;
1910  }
1911 
1912  // If we eliminated all predecessors of the block, delete the block now.
1913  if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
1914  BB->eraseFromParent();
1915 
1916  return Changed;
1917 }
1918 
1919 //===----------------------------------------------------------------------===//
1920 // Memory Optimization
1921 //===----------------------------------------------------------------------===//
1922 
1923 namespace {
1924 
1925 /// This is an extended version of TargetLowering::AddrMode
1926 /// which holds actual Value*'s for register values.
1927 struct ExtAddrMode : public TargetLowering::AddrMode {
1928  Value *BaseReg = nullptr;
1929  Value *ScaledReg = nullptr;
1930  Value *OriginalValue = nullptr;
1931 
1932  enum FieldName {
1933  NoField = 0x00,
1934  BaseRegField = 0x01,
1935  BaseGVField = 0x02,
1936  BaseOffsField = 0x04,
1937  ScaledRegField = 0x08,
1938  ScaleField = 0x10,
1939  MultipleFields = 0xff
1940  };
1941 
1942  ExtAddrMode() = default;
1943 
1944  void print(raw_ostream &OS) const;
1945  void dump() const;
1946 
1947  FieldName compare(const ExtAddrMode &other) {
1948  // First check that the types are the same on each field, as differing types
1949  // is something we can't cope with later on.
1950  if (BaseReg && other.BaseReg &&
1951  BaseReg->getType() != other.BaseReg->getType())
1952  return MultipleFields;
1953  if (BaseGV && other.BaseGV &&
1954  BaseGV->getType() != other.BaseGV->getType())
1955  return MultipleFields;
1956  if (ScaledReg && other.ScaledReg &&
1957  ScaledReg->getType() != other.ScaledReg->getType())
1958  return MultipleFields;
1959 
1960  // Check each field to see if it differs.
1961  unsigned Result = NoField;
1962  if (BaseReg != other.BaseReg)
1963  Result |= BaseRegField;
1964  if (BaseGV != other.BaseGV)
1965  Result |= BaseGVField;
1966  if (BaseOffs != other.BaseOffs)
1967  Result |= BaseOffsField;
1968  if (ScaledReg != other.ScaledReg)
1969  Result |= ScaledRegField;
1970  // Don't count 0 as being a different scale, because that actually means
1971  // unscaled (which will already be counted by having no ScaledReg).
1972  if (Scale && other.Scale && Scale != other.Scale)
1973  Result |= ScaleField;
1974 
1975  if (countPopulation(Result) > 1)
1976  return MultipleFields;
1977  else
1978  return static_cast<FieldName>(Result);
1979  }
1980 
1981  // An AddrMode is trivial if it involves no calculation i.e. it is just a base
1982  // with no offset.
1983  bool isTrivial() {
1984  // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
1985  // trivial if at most one of these terms is nonzero, except that BaseGV and
1986  // BaseReg both being zero actually means a null pointer value, which we
1987  // consider to be 'non-zero' here.
1988  return !BaseOffs && !Scale && !(BaseGV && BaseReg);
1989  }
1990 
1991  Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
1992  switch (Field) {
1993  default:
1994  return nullptr;
1995  case BaseRegField:
1996  return BaseReg;
1997  case BaseGVField:
1998  return BaseGV;
1999  case ScaledRegField:
2000  return ScaledReg;
2001  case BaseOffsField:
2002  return ConstantInt::get(IntPtrTy, BaseOffs);
2003  }
2004  }
2005 
2006  void SetCombinedField(FieldName Field, Value *V,
2007  const SmallVectorImpl<ExtAddrMode> &AddrModes) {
2008  switch (Field) {
2009  default:
2010  llvm_unreachable("Unhandled fields are expected to be rejected earlier");
2011  break;
2012  case ExtAddrMode::BaseRegField:
2013  BaseReg = V;
2014  break;
2015  case ExtAddrMode::BaseGVField:
2016  // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
2017  // in the BaseReg field.
2018  assert(BaseReg == nullptr);
2019  BaseReg = V;
2020  BaseGV = nullptr;
2021  break;
2022  case ExtAddrMode::ScaledRegField:
2023  ScaledReg = V;
2024  // If we have a mix of scaled and unscaled addrmodes then we want scale
2025  // to be the scale and not zero.
2026  if (!Scale)
2027  for (const ExtAddrMode &AM : AddrModes)
2028  if (AM.Scale) {
2029  Scale = AM.Scale;
2030  break;
2031  }
2032  break;
2033  case ExtAddrMode::BaseOffsField:
2034  // The offset is no longer a constant, so it goes in ScaledReg with a
2035  // scale of 1.
2036  assert(ScaledReg == nullptr);
2037  ScaledReg = V;
2038  Scale = 1;
2039  BaseOffs = 0;
2040  break;
2041  }
2042  }
2043 };
2044 
2045 } // end anonymous namespace
2046 
2047 #ifndef NDEBUG
2048 static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
2049  AM.print(OS);
2050  return OS;
2051 }
2052 #endif
2053 
2054 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2055 void ExtAddrMode::print(raw_ostream &OS) const {
2056  bool NeedPlus = false;
2057  OS << "[";
2058  if (BaseGV) {
2059  OS << (NeedPlus ? " + " : "")
2060  << "GV:";
2061  BaseGV->printAsOperand(OS, /*PrintType=*/false);
2062  NeedPlus = true;
2063  }
2064 
2065  if (BaseOffs) {
2066  OS << (NeedPlus ? " + " : "")
2067  << BaseOffs;
2068  NeedPlus = true;
2069  }
2070 
2071  if (BaseReg) {
2072  OS << (NeedPlus ? " + " : "")
2073  << "Base:";
2074  BaseReg->printAsOperand(OS, /*PrintType=*/false);
2075  NeedPlus = true;
2076  }
2077  if (Scale) {
2078  OS << (NeedPlus ? " + " : "")
2079  << Scale << "*";
2080  ScaledReg->printAsOperand(OS, /*PrintType=*/false);
2081  }
2082 
2083  OS << ']';
2084 }
2085 
2086 LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
2087  print(dbgs());
2088  dbgs() << '\n';
2089 }
2090 #endif
2091 
2092 namespace {
2093 
2094 /// This class provides transaction based operation on the IR.
2095 /// Every change made through this class is recorded in the internal state and
2096 /// can be undone (rollback) until commit is called.
2097 class TypePromotionTransaction {
2098  /// This represents the common interface of the individual transaction.
2099  /// Each class implements the logic for doing one specific modification on
2100  /// the IR via the TypePromotionTransaction.
2101  class TypePromotionAction {
2102  protected:
2103  /// The Instruction modified.
2104  Instruction *Inst;
2105 
2106  public:
2107  /// Constructor of the action.
2108  /// The constructor performs the related action on the IR.
2109  TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
2110 
2111  virtual ~TypePromotionAction() = default;
2112 
2113  /// Undo the modification done by this action.
2114  /// When this method is called, the IR must be in the same state as it was
2115  /// before this action was applied.
2116  /// \pre Undoing the action works if and only if the IR is in the exact same
2117  /// state as it was directly after this action was applied.
2118  virtual void undo() = 0;
2119 
2120  /// Advocate every change made by this action.
2121  /// When the results on the IR of the action are to be kept, it is important
2122  /// to call this function, otherwise hidden information may be kept forever.
2123  virtual void commit() {
2124  // Nothing to be done, this action is not doing anything.
2125  }
2126  };
2127 
2128  /// Utility to remember the position of an instruction.
2129  class InsertionHandler {
2130  /// Position of an instruction.
2131  /// Either an instruction:
2132  /// - Is the first in a basic block: BB is used.
2133  /// - Has a previous instruction: PrevInst is used.
2134  union {
2135  Instruction *PrevInst;
2136  BasicBlock *BB;
2137  } Point;
2138 
2139  /// Remember whether or not the instruction had a previous instruction.
2140  bool HasPrevInstruction;
2141 
2142  public:
2143  /// Record the position of \p Inst.
2144  InsertionHandler(Instruction *Inst) {
2145  BasicBlock::iterator It = Inst->getIterator();
2146  HasPrevInstruction = (It != (Inst->getParent()->begin()));
2147  if (HasPrevInstruction)
2148  Point.PrevInst = &*--It;
2149  else
2150  Point.BB = Inst->getParent();
2151  }
2152 
2153  /// Insert \p Inst at the recorded position.
2154  void insert(Instruction *Inst) {
2155  if (HasPrevInstruction) {
2156  if (Inst->getParent())
2157  Inst->removeFromParent();
2158  Inst->insertAfter(Point.PrevInst);
2159  } else {
2160  Instruction *Position = &*Point.BB->getFirstInsertionPt();
2161  if (Inst->getParent())
2162  Inst->moveBefore(Position);
2163  else
2164  Inst->insertBefore(Position);
2165  }
2166  }
2167  };
2168 
2169  /// Move an instruction before another.
2170  class InstructionMoveBefore : public TypePromotionAction {
2171  /// Original position of the instruction.
2172  InsertionHandler Position;
2173 
2174  public:
2175  /// Move \p Inst before \p Before.
2176  InstructionMoveBefore(Instruction *Inst, Instruction *Before)
2177  : TypePromotionAction(Inst), Position(Inst) {
2178  LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before
2179  << "\n");
2180  Inst->moveBefore(Before);
2181  }
2182 
2183  /// Move the instruction back to its original position.
2184  void undo() override {
2185  LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
2186  Position.insert(Inst);
2187  }
2188  };
2189 
2190  /// Set the operand of an instruction with a new value.
2191  class OperandSetter : public TypePromotionAction {
2192  /// Original operand of the instruction.
2193  Value *Origin;
2194 
2195  /// Index of the modified instruction.
2196  unsigned Idx;
2197 
2198  public:
2199  /// Set \p Idx operand of \p Inst with \p NewVal.
2200  OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
2201  : TypePromotionAction(Inst), Idx(Idx) {
2202  LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
2203  << "for:" << *Inst << "\n"
2204  << "with:" << *NewVal << "\n");
2205  Origin = Inst->getOperand(Idx);
2206  Inst->setOperand(Idx, NewVal);
2207  }
2208 
2209  /// Restore the original value of the instruction.
2210  void undo() override {
2211  LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
2212  << "for: " << *Inst << "\n"
2213  << "with: " << *Origin << "\n");
2214  Inst->setOperand(Idx, Origin);
2215  }
2216  };
2217 
2218  /// Hide the operands of an instruction.
2219  /// Do as if this instruction was not using any of its operands.
2220  class OperandsHider : public TypePromotionAction {
2221  /// The list of original operands.
2222  SmallVector<Value *, 4> OriginalValues;
2223 
2224  public:
2225  /// Remove \p Inst from the uses of the operands of \p Inst.
2226  OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
2227  LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
2228  unsigned NumOpnds = Inst->getNumOperands();
2229  OriginalValues.reserve(NumOpnds);
2230  for (unsigned It = 0; It < NumOpnds; ++It) {
2231  // Save the current operand.
2232  Value *Val = Inst->getOperand(It);
2233  OriginalValues.push_back(Val);
2234  // Set a dummy one.
2235  // We could use OperandSetter here, but that would imply an overhead
2236  // that we are not willing to pay.
2237  Inst->setOperand(It, UndefValue::get(Val->getType()));
2238  }
2239  }
2240 
2241  /// Restore the original list of uses.
2242  void undo() override {
2243  LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
2244  for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
2245  Inst->setOperand(It, OriginalValues[It]);
2246  }
2247  };
2248 
2249  /// Build a truncate instruction.
2250  class TruncBuilder : public TypePromotionAction {
2251  Value *Val;
2252 
2253  public:
2254  /// Build a truncate instruction of \p Opnd producing a \p Ty
2255  /// result.
2256  /// trunc Opnd to Ty.
2257  TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
2258  IRBuilder<> Builder(Opnd);
2259  Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
2260  LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
2261  }
2262 
2263  /// Get the built value.
2264  Value *getBuiltValue() { return Val; }
2265 
2266  /// Remove the built instruction.
2267  void undo() override {
2268  LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
2269  if (Instruction *IVal = dyn_cast<Instruction>(Val))
2270  IVal->eraseFromParent();
2271  }
2272  };
2273 
2274  /// Build a sign extension instruction.
2275  class SExtBuilder : public TypePromotionAction {
2276  Value *Val;
2277 
2278  public:
2279  /// Build a sign extension instruction of \p Opnd producing a \p Ty
2280  /// result.
2281  /// sext Opnd to Ty.
2282  SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
2283  : TypePromotionAction(InsertPt) {
2284  IRBuilder<> Builder(InsertPt);
2285  Val = Builder.CreateSExt(Opnd, Ty, "promoted");
2286  LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
2287  }
2288 
2289  /// Get the built value.
2290  Value *getBuiltValue() { return Val; }
2291 
2292  /// Remove the built instruction.
2293  void undo() override {
2294  LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
2295  if (Instruction *IVal = dyn_cast<Instruction>(Val))
2296  IVal->eraseFromParent();
2297  }
2298  };
2299 
2300  /// Build a zero extension instruction.
2301  class ZExtBuilder : public TypePromotionAction {
2302  Value *Val;
2303 
2304  public:
2305  /// Build a zero extension instruction of \p Opnd producing a \p Ty
2306  /// result.
2307  /// zext Opnd to Ty.
2308  ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
2309  : TypePromotionAction(InsertPt) {
2310  IRBuilder<> Builder(InsertPt);
2311  Val = Builder.CreateZExt(Opnd, Ty, "promoted");
2312  LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
2313  }
2314 
2315  /// Get the built value.
2316  Value *getBuiltValue() { return Val; }
2317 
2318  /// Remove the built instruction.
2319  void undo() override {
2320  LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
2321  if (Instruction *IVal = dyn_cast<Instruction>(Val))
2322  IVal->eraseFromParent();
2323  }
2324  };
2325 
2326  /// Mutate an instruction to another type.
2327  class TypeMutator : public TypePromotionAction {
2328  /// Record the original type.
2329  Type *OrigTy;
2330 
2331  public:
2332  /// Mutate the type of \p Inst into \p NewTy.
2333  TypeMutator(Instruction *Inst, Type *NewTy)
2334  : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
2335  LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
2336  << "\n");
2337  Inst->mutateType(NewTy);
2338  }
2339 
2340  /// Mutate the instruction back to its original type.
2341  void undo() override {
2342  LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
2343  << "\n");
2344  Inst->mutateType(OrigTy);
2345  }
2346  };
2347 
2348  /// Replace the uses of an instruction by another instruction.
2349  class UsesReplacer : public TypePromotionAction {
2350  /// Helper structure to keep track of the replaced uses.
2351  struct InstructionAndIdx {
2352  /// The instruction using the instruction.
2353  Instruction *Inst;
2354 
2355  /// The index where this instruction is used for Inst.
2356  unsigned Idx;
2357 
2358  InstructionAndIdx(Instruction *Inst, unsigned Idx)
2359  : Inst(Inst), Idx(Idx) {}
2360  };
2361 
2362  /// Keep track of the original uses (pair Instruction, Index).
2363  SmallVector<InstructionAndIdx, 4> OriginalUses;
2364  /// Keep track of the debug users.
2366 
2367  using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator;
2368 
2369  public:
2370  /// Replace all the use of \p Inst by \p New.
2371  UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {
2372  LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
2373  << "\n");
2374  // Record the original uses.
2375  for (Use &U : Inst->uses()) {
2376  Instruction *UserI = cast<Instruction>(U.getUser());
2377  OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
2378  }
2379  // Record the debug uses separately. They are not in the instruction's
2380  // use list, but they are replaced by RAUW.
2381  findDbgValues(DbgValues, Inst);
2382 
2383  // Now, we can replace the uses.
2384  Inst->replaceAllUsesWith(New);
2385  }
2386 
2387  /// Reassign the original uses of Inst to Inst.
2388  void undo() override {
2389  LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
2390  for (use_iterator UseIt = OriginalUses.begin(),
2391  EndIt = OriginalUses.end();
2392  UseIt != EndIt; ++UseIt) {
2393  UseIt->Inst->setOperand(UseIt->Idx, Inst);
2394  }
2395  // RAUW has replaced all original uses with references to the new value,
2396  // including the debug uses. Since we are undoing the replacements,
2397  // the original debug uses must also be reinstated to maintain the
2398  // correctness and utility of debug value instructions.
2399  for (auto *DVI: DbgValues) {
2400  LLVMContext &Ctx = Inst->getType()->getContext();
2401  auto *MV = MetadataAsValue::get(Ctx, ValueAsMetadata::get(Inst));
2402  DVI->setOperand(0, MV);
2403  }
2404  }
2405  };
2406 
2407  /// Remove an instruction from the IR.
2408  class InstructionRemover : public TypePromotionAction {
2409  /// Original position of the instruction.
2410  InsertionHandler Inserter;
2411 
2412  /// Helper structure to hide all the link to the instruction. In other
2413  /// words, this helps to do as if the instruction was removed.
2414  OperandsHider Hider;
2415 
2416  /// Keep track of the uses replaced, if any.
2417  UsesReplacer *Replacer = nullptr;
2418 
2419  /// Keep track of instructions removed.
2420  SetOfInstrs &RemovedInsts;
2421 
2422  public:
2423  /// Remove all reference of \p Inst and optionally replace all its
2424  /// uses with New.
2425  /// \p RemovedInsts Keep track of the instructions removed by this Action.
2426  /// \pre If !Inst->use_empty(), then New != nullptr
2427  InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
2428  Value *New = nullptr)
2429  : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
2430  RemovedInsts(RemovedInsts) {
2431  if (New)
2432  Replacer = new UsesReplacer(Inst, New);
2433  LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
2434  RemovedInsts.insert(Inst);
2435  /// The instructions removed here will be freed after completing
2436  /// optimizeBlock() for all blocks as we need to keep track of the
2437  /// removed instructions during promotion.
2438  Inst->removeFromParent();
2439  }
2440 
2441  ~InstructionRemover() override { delete Replacer; }
2442 
2443  /// Resurrect the instruction and reassign it to the proper uses if
2444  /// new value was provided when build this action.
2445  void undo() override {
2446  LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
2447  Inserter.insert(Inst);
2448  if (Replacer)
2449  Replacer->undo();
2450  Hider.undo();
2451  RemovedInsts.erase(Inst);
2452  }
2453  };
2454 
2455 public:
2456  /// Restoration point.
2457  /// The restoration point is a pointer to an action instead of an iterator
2458  /// because the iterator may be invalidated but not the pointer.
2459  using ConstRestorationPt = const TypePromotionAction *;
2460 
2461  TypePromotionTransaction(SetOfInstrs &RemovedInsts)
2462  : RemovedInsts(RemovedInsts) {}
2463 
2464  /// Advocate every changes made in that transaction.
2465  void commit();
2466 
2467  /// Undo all the changes made after the given point.
2468  void rollback(ConstRestorationPt Point);
2469 
2470  /// Get the current restoration point.
2471  ConstRestorationPt getRestorationPoint() const;
2472 
2473  /// \name API for IR modification with state keeping to support rollback.
2474  /// @{
2475  /// Same as Instruction::setOperand.
2476  void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
2477 
2478  /// Same as Instruction::eraseFromParent.
2479  void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
2480 
2481  /// Same as Value::replaceAllUsesWith.
2482  void replaceAllUsesWith(Instruction *Inst, Value *New);
2483 
2484  /// Same as Value::mutateType.
2485  void mutateType(Instruction *Inst, Type *NewTy);
2486 
2487  /// Same as IRBuilder::createTrunc.
2488  Value *createTrunc(Instruction *Opnd, Type *Ty);
2489 
2490  /// Same as IRBuilder::createSExt.
2491  Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
2492 
2493  /// Same as IRBuilder::createZExt.
2494  Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
2495 
2496  /// Same as Instruction::moveBefore.
2497  void moveBefore(Instruction *Inst, Instruction *Before);
2498  /// @}
2499 
2500 private:
2501  /// The ordered list of actions made so far.
2503 
2504  using CommitPt = SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
2505 
2506  SetOfInstrs &RemovedInsts;
2507 };
2508 
2509 } // end anonymous namespace
2510 
2511 void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
2512  Value *NewVal) {
2513  Actions.push_back(llvm::make_unique<TypePromotionTransaction::OperandSetter>(
2514  Inst, Idx, NewVal));
2515 }
2516 
2518  Value *NewVal) {
2519  Actions.push_back(
2520  llvm::make_unique<TypePromotionTransaction::InstructionRemover>(
2521  Inst, RemovedInsts, NewVal));
2522 }
2523 
2524 void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
2525  Value *New) {
2526  Actions.push_back(
2527  llvm::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
2528 }
2529 
2530 void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
2531  Actions.push_back(
2532  llvm::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
2533 }
2534 
2535 Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
2536  Type *Ty) {
2537  std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
2538  Value *Val = Ptr->getBuiltValue();
2539  Actions.push_back(std::move(Ptr));
2540  return Val;
2541 }
2542 
2543 Value *TypePromotionTransaction::createSExt(Instruction *Inst,
2544  Value *Opnd, Type *Ty) {
2545  std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
2546  Value *Val = Ptr->getBuiltValue();
2547  Actions.push_back(std::move(Ptr));
2548  return Val;
2549 }
2550 
2551 Value *TypePromotionTransaction::createZExt(Instruction *Inst,
2552  Value *Opnd, Type *Ty) {
2553  std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
2554  Value *Val = Ptr->getBuiltValue();
2555  Actions.push_back(std::move(Ptr));
2556  return Val;
2557 }
2558 
2559 void TypePromotionTransaction::moveBefore(Instruction *Inst,
2560  Instruction *Before) {
2561  Actions.push_back(
2562  llvm::make_unique<TypePromotionTransaction::InstructionMoveBefore>(
2563  Inst, Before));
2564 }
2565 
2566 TypePromotionTransaction::ConstRestorationPt
2567 TypePromotionTransaction::getRestorationPoint() const {
2568  return !Actions.empty() ? Actions.back().get() : nullptr;
2569 }
2570 
2571 void TypePromotionTransaction::commit() {
2572  for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
2573  ++It)
2574  (*It)->commit();
2575  Actions.clear();
2576 }
2577 
2578 void TypePromotionTransaction::rollback(
2579  TypePromotionTransaction::ConstRestorationPt Point) {
2580  while (!Actions.empty() && Point != Actions.back().get()) {
2581  std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
2582  Curr->undo();
2583  }
2584 }
2585 
2586 namespace {
2587 
2588 /// A helper class for matching addressing modes.
2589 ///
2590 /// This encapsulates the logic for matching the target-legal addressing modes.
2591 class AddressingModeMatcher {
2592  SmallVectorImpl<Instruction*> &AddrModeInsts;
2593  const TargetLowering &TLI;
2594  const TargetRegisterInfo &TRI;
2595  const DataLayout &DL;
2596 
2597  /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
2598  /// the memory instruction that we're computing this address for.
2599  Type *AccessTy;
2600  unsigned AddrSpace;
2601  Instruction *MemoryInst;
2602 
2603  /// This is the addressing mode that we're building up. This is
2604  /// part of the return value of this addressing mode matching stuff.
2605  ExtAddrMode &AddrMode;
2606 
2607  /// The instructions inserted by other CodeGenPrepare optimizations.
2608  const SetOfInstrs &InsertedInsts;
2609 
2610  /// A map from the instructions to their type before promotion.
2611  InstrToOrigTy &PromotedInsts;
2612 
2613  /// The ongoing transaction where every action should be registered.
2614  TypePromotionTransaction &TPT;
2615 
2616  // A GEP which has too large offset to be folded into the addressing mode.
2617  std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
2618 
2619  /// This is set to true when we should not do profitability checks.
2620  /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
2621  bool IgnoreProfitability;
2622 
2623  AddressingModeMatcher(
2625  const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI,
2626  ExtAddrMode &AM, const SetOfInstrs &InsertedInsts,
2627  InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
2628  std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP)
2629  : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
2630  DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
2631  MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
2632  PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP) {
2633  IgnoreProfitability = false;
2634  }
2635 
2636 public:
2637  /// Find the maximal addressing mode that a load/store of V can fold,
2638  /// give an access type of AccessTy. This returns a list of involved
2639  /// instructions in AddrModeInsts.
2640  /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
2641  /// optimizations.
2642  /// \p PromotedInsts maps the instructions to their type before promotion.
2643  /// \p The ongoing transaction where every action should be registered.
2644  static ExtAddrMode
2645  Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
2646  SmallVectorImpl<Instruction *> &AddrModeInsts,
2647  const TargetLowering &TLI, const TargetRegisterInfo &TRI,
2648  const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
2649  TypePromotionTransaction &TPT,
2650  std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP) {
2651  ExtAddrMode Result;
2652 
2653  bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS,
2654  MemoryInst, Result, InsertedInsts,
2655  PromotedInsts, TPT, LargeOffsetGEP)
2656  .matchAddr(V, 0);
2657  (void)Success; assert(Success && "Couldn't select *anything*?");
2658  return Result;
2659  }
2660 
2661 private:
2662  bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
2663  bool matchAddr(Value *Addr, unsigned Depth);
2664  bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
2665  bool *MovedAway = nullptr);
2666  bool isProfitableToFoldIntoAddressingMode(Instruction *I,
2667  ExtAddrMode &AMBefore,
2668  ExtAddrMode &AMAfter);
2669  bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
2670  bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
2671  Value *PromotedOperand) const;
2672 };
2673 
2674 class PhiNodeSet;
2675 
2676 /// An iterator for PhiNodeSet.
2677 class PhiNodeSetIterator {
2678  PhiNodeSet * const Set;
2679  size_t CurrentIndex = 0;
2680 
2681 public:
2682  /// The constructor. Start should point to either a valid element, or be equal
2683  /// to the size of the underlying SmallVector of the PhiNodeSet.
2684  PhiNodeSetIterator(PhiNodeSet * const Set, size_t Start);
2685  PHINode * operator*() const;
2686  PhiNodeSetIterator& operator++();
2687  bool operator==(const PhiNodeSetIterator &RHS) const;
2688  bool operator!=(const PhiNodeSetIterator &RHS) const;
2689 };
2690 
2691 /// Keeps a set of PHINodes.
2692 ///
2693 /// This is a minimal set implementation for a specific use case:
2694 /// It is very fast when there are very few elements, but also provides good
2695 /// performance when there are many. It is similar to SmallPtrSet, but also
2696 /// provides iteration by insertion order, which is deterministic and stable
2697 /// across runs. It is also similar to SmallSetVector, but provides removing
2698 /// elements in O(1) time. This is achieved by not actually removing the element
2699 /// from the underlying vector, so comes at the cost of using more memory, but
2700 /// that is fine, since PhiNodeSets are used as short lived objects.
2701 class PhiNodeSet {
2702  friend class PhiNodeSetIterator;
2703 
2704  using MapType = SmallDenseMap<PHINode *, size_t, 32>;
2705  using iterator = PhiNodeSetIterator;
2706 
2707  /// Keeps the elements in the order of their insertion in the underlying
2708  /// vector. To achieve constant time removal, it never deletes any element.
2710 
2711  /// Keeps the elements in the underlying set implementation. This (and not the
2712  /// NodeList defined above) is the source of truth on whether an element
2713  /// is actually in the collection.
2714  MapType NodeMap;
2715 
2716  /// Points to the first valid (not deleted) element when the set is not empty
2717  /// and the value is not zero. Equals to the size of the underlying vector
2718  /// when the set is empty. When the value is 0, as in the beginning, the
2719  /// first element may or may not be valid.
2720  size_t FirstValidElement = 0;
2721 
2722 public:
2723  /// Inserts a new element to the collection.
2724  /// \returns true if the element is actually added, i.e. was not in the
2725  /// collection before the operation.
2726  bool insert(PHINode *Ptr) {
2727  if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
2728  NodeList.push_back(Ptr);
2729  return true;
2730  }
2731  return false;
2732  }
2733 
2734  /// Removes the element from the collection.
2735  /// \returns whether the element is actually removed, i.e. was in the
2736  /// collection before the operation.
2737  bool erase(PHINode *Ptr) {
2738  auto it = NodeMap.find(Ptr);
2739  if (it != NodeMap.end()) {
2740  NodeMap.erase(Ptr);
2741  SkipRemovedElements(FirstValidElement);
2742  return true;
2743  }
2744  return false;
2745  }
2746 
2747  /// Removes all elements and clears the collection.
2748  void clear() {
2749  NodeMap.clear();
2750  NodeList.clear();
2751  FirstValidElement = 0;
2752  }
2753 
2754  /// \returns an iterator that will iterate the elements in the order of
2755  /// insertion.
2756  iterator begin() {
2757  if (FirstValidElement == 0)
2758  SkipRemovedElements(FirstValidElement);
2759  return PhiNodeSetIterator(this, FirstValidElement);
2760  }
2761 
2762  /// \returns an iterator that points to the end of the collection.
2763  iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
2764 
2765  /// Returns the number of elements in the collection.
2766  size_t size() const {
2767  return NodeMap.size();
2768  }
2769 
2770  /// \returns 1 if the given element is in the collection, and 0 if otherwise.
2771  size_t count(PHINode *Ptr) const {
2772  return NodeMap.count(Ptr);
2773  }
2774 
2775 private:
2776  /// Updates the CurrentIndex so that it will point to a valid element.
2777  ///
2778  /// If the element of NodeList at CurrentIndex is valid, it does not
2779  /// change it. If there are no more valid elements, it updates CurrentIndex
2780  /// to point to the end of the NodeList.
2781  void SkipRemovedElements(size_t &CurrentIndex) {
2782  while (CurrentIndex < NodeList.size()) {
2783  auto it = NodeMap.find(NodeList[CurrentIndex]);
2784  // If the element has been deleted and added again later, NodeMap will
2785  // point to a different index, so CurrentIndex will still be invalid.
2786  if (it != NodeMap.end() && it->second == CurrentIndex)
2787  break;
2788  ++CurrentIndex;
2789  }
2790  }
2791 };
2792 
2793 PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
2794  : Set(Set), CurrentIndex(Start) {}
2795 
2797  assert(CurrentIndex < Set->NodeList.size() &&
2798  "PhiNodeSet access out of range");
2799  return Set->NodeList[CurrentIndex];
2800 }
2801 
2802 PhiNodeSetIterator& PhiNodeSetIterator::operator++() {
2803  assert(CurrentIndex < Set->NodeList.size() &&
2804  "PhiNodeSet access out of range");
2805  ++CurrentIndex;
2806  Set->SkipRemovedElements(CurrentIndex);
2807  return *this;
2808 }
2809 
2810 bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
2811  return CurrentIndex == RHS.CurrentIndex;
2812 }
2813 
2814 bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
2815  return !((*this) == RHS);
2816 }
2817 
2818 /// Keep track of simplification of Phi nodes.
2819 /// Accept the set of all phi nodes and erase phi node from this set
2820 /// if it is simplified.
2821 class SimplificationTracker {
2823  const SimplifyQuery &SQ;
2824  // Tracks newly created Phi nodes. The elements are iterated by insertion
2825  // order.
2826  PhiNodeSet AllPhiNodes;
2827  // Tracks newly created Select nodes.
2828  SmallPtrSet<SelectInst *, 32> AllSelectNodes;
2829 
2830 public:
2831  SimplificationTracker(const SimplifyQuery &sq)
2832  : SQ(sq) {}
2833 
2834  Value *Get(Value *V) {
2835  do {
2836  auto SV = Storage.find(V);
2837  if (SV == Storage.end())
2838  return V;
2839  V = SV->second;
2840  } while (true);
2841  }
2842 
2843  Value *Simplify(Value *Val) {
2844  SmallVector<Value *, 32> WorkList;
2845  SmallPtrSet<Value *, 32> Visited;
2846  WorkList.push_back(Val);
2847  while (!WorkList.empty()) {
2848  auto P = WorkList.pop_back_val();
2849  if (!Visited.insert(P).second)
2850  continue;
2851  if (auto *PI = dyn_cast<Instruction>(P))
2852  if (Value *V = SimplifyInstruction(cast<Instruction>(PI), SQ)) {
2853  for (auto *U : PI->users())
2854  WorkList.push_back(cast<Value>(U));
2855  Put(PI, V);
2856  PI->replaceAllUsesWith(V);
2857  if (auto *PHI = dyn_cast<PHINode>(PI))
2858  AllPhiNodes.erase(PHI);
2859  if (auto *Select = dyn_cast<SelectInst>(PI))
2860  AllSelectNodes.erase(Select);
2861  PI->eraseFromParent();
2862  }
2863  }
2864  return Get(Val);
2865  }
2866 
2867  void Put(Value *From, Value *To) {
2868  Storage.insert({ From, To });
2869  }
2870 
2871  void ReplacePhi(PHINode *From, PHINode *To) {
2872  Value* OldReplacement = Get(From);
2873  while (OldReplacement != From) {
2874  From = To;
2875  To = dyn_cast<PHINode>(OldReplacement);
2876  OldReplacement = Get(From);
2877  }
2878  assert(Get(To) == To && "Replacement PHI node is already replaced.");
2879  Put(From, To);
2880  From->replaceAllUsesWith(To);
2881  AllPhiNodes.erase(From);
2882  From->eraseFromParent();
2883  }
2884 
2885  PhiNodeSet& newPhiNodes() { return AllPhiNodes; }
2886 
2887  void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
2888 
2889  void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
2890 
2891  unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
2892 
2893  unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
2894 
2895  void destroyNewNodes(Type *CommonType) {
2896  // For safe erasing, replace the uses with dummy value first.
2897  auto Dummy = UndefValue::get(CommonType);
2898  for (auto I : AllPhiNodes) {
2900  I->eraseFromParent();
2901  }
2902  AllPhiNodes.clear();
2903  for (auto I : AllSelectNodes) {
2905  I->eraseFromParent();
2906  }
2907  AllSelectNodes.clear();
2908  }
2909 };
2910 
2911 /// A helper class for combining addressing modes.
2912 class AddressingModeCombiner {
2913  typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
2914  typedef std::pair<PHINode *, PHINode *> PHIPair;
2915 
2916 private:
2917  /// The addressing modes we've collected.
2918  SmallVector<ExtAddrMode, 16> AddrModes;
2919 
2920  /// The field in which the AddrModes differ, when we have more than one.
2921  ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
2922 
2923  /// Are the AddrModes that we have all just equal to their original values?
2924  bool AllAddrModesTrivial = true;
2925 
2926  /// Common Type for all different fields in addressing modes.
2927  Type *CommonType;
2928 
2929  /// SimplifyQuery for simplifyInstruction utility.
2930  const SimplifyQuery &SQ;
2931 
2932  /// Original Address.
2933  Value *Original;
2934 
2935 public:
2936  AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
2937  : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {}
2938 
2939  /// Get the combined AddrMode
2940  const ExtAddrMode &getAddrMode() const {
2941  return AddrModes[0];
2942  }
2943 
2944  /// Add a new AddrMode if it's compatible with the AddrModes we already
2945  /// have.
2946  /// \return True iff we succeeded in doing so.
2947  bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
2948  // Take note of if we have any non-trivial AddrModes, as we need to detect
2949  // when all AddrModes are trivial as then we would introduce a phi or select
2950  // which just duplicates what's already there.
2951  AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
2952 
2953  // If this is the first addrmode then everything is fine.
2954  if (AddrModes.empty()) {
2955  AddrModes.emplace_back(NewAddrMode);
2956  return true;
2957  }
2958 
2959  // Figure out how different this is from the other address modes, which we
2960  // can do just by comparing against the first one given that we only care
2961  // about the cumulative difference.
2962  ExtAddrMode::FieldName ThisDifferentField =
2963  AddrModes[0].compare(NewAddrMode);
2964  if (DifferentField == ExtAddrMode::NoField)
2965  DifferentField = ThisDifferentField;
2966  else if (DifferentField != ThisDifferentField)
2967  DifferentField = ExtAddrMode::MultipleFields;
2968 
2969  // If NewAddrMode differs in more than one dimension we cannot handle it.
2970  bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
2971 
2972  // If Scale Field is different then we reject.
2973  CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
2974 
2975  // We also must reject the case when base offset is different and
2976  // scale reg is not null, we cannot handle this case due to merge of
2977  // different offsets will be used as ScaleReg.
2978  CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
2979  !NewAddrMode.ScaledReg);
2980 
2981  // We also must reject the case when GV is different and BaseReg installed
2982  // due to we want to use base reg as a merge of GV values.
2983  CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
2984  !NewAddrMode.HasBaseReg);
2985 
2986  // Even if NewAddMode is the same we still need to collect it due to
2987  // original value is different. And later we will need all original values
2988  // as anchors during finding the common Phi node.
2989  if (CanHandle)
2990  AddrModes.emplace_back(NewAddrMode);
2991  else
2992  AddrModes.clear();
2993 
2994  return CanHandle;
2995  }
2996 
2997  /// Combine the addressing modes we've collected into a single
2998  /// addressing mode.
2999  /// \return True iff we successfully combined them or we only had one so
3000  /// didn't need to combine them anyway.
3001  bool combineAddrModes() {
3002  // If we have no AddrModes then they can't be combined.
3003  if (AddrModes.size() == 0)
3004  return false;
3005 
3006  // A single AddrMode can trivially be combined.
3007  if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
3008  return true;
3009 
3010  // If the AddrModes we collected are all just equal to the value they are
3011  // derived from then combining them wouldn't do anything useful.
3012  if (AllAddrModesTrivial)
3013  return false;
3014 
3015  if (!addrModeCombiningAllowed())
3016  return false;
3017 
3018  // Build a map between <original value, basic block where we saw it> to
3019  // value of base register.
3020  // Bail out if there is no common type.
3021  FoldAddrToValueMapping Map;
3022  if (!initializeMap(Map))
3023  return false;
3024 
3025  Value *CommonValue = findCommon(Map);
3026  if (CommonValue)
3027  AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
3028  return CommonValue != nullptr;
3029  }
3030 
3031 private:
3032  /// Initialize Map with anchor values. For address seen
3033  /// we set the value of different field saw in this address.
3034  /// At the same time we find a common type for different field we will
3035  /// use to create new Phi/Select nodes. Keep it in CommonType field.
3036  /// Return false if there is no common type found.
3037  bool initializeMap(FoldAddrToValueMapping &Map) {
3038  // Keep track of keys where the value is null. We will need to replace it
3039  // with constant null when we know the common type.
3040  SmallVector<Value *, 2> NullValue;
3041  Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
3042  for (auto &AM : AddrModes) {
3043  Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
3044  if (DV) {
3045  auto *Type = DV->getType();
3046  if (CommonType && CommonType != Type)
3047  return false;
3048  CommonType = Type;
3049  Map[AM.OriginalValue] = DV;
3050  } else {
3051  NullValue.push_back(AM.OriginalValue);
3052  }
3053  }
3054  assert(CommonType && "At least one non-null value must be!");
3055  for (auto *V : NullValue)
3056  Map[V] = Constant::getNullValue(CommonType);
3057  return true;
3058  }
3059 
3060  /// We have mapping between value A and other value B where B was a field in
3061  /// addressing mode represented by A. Also we have an original value C
3062  /// representing an address we start with. Traversing from C through phi and
3063  /// selects we ended up with A's in a map. This utility function tries to find
3064  /// a value V which is a field in addressing mode C and traversing through phi
3065  /// nodes and selects we will end up in corresponded values B in a map.
3066  /// The utility will create a new Phi/Selects if needed.
3067  // The simple example looks as follows:
3068  // BB1:
3069  // p1 = b1 + 40
3070  // br cond BB2, BB3
3071  // BB2:
3072  // p2 = b2 + 40
3073  // br BB3
3074  // BB3:
3075  // p = phi [p1, BB1], [p2, BB2]
3076  // v = load p
3077  // Map is
3078  // p1 -> b1
3079  // p2 -> b2
3080  // Request is
3081  // p -> ?
3082  // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
3083  Value *findCommon(FoldAddrToValueMapping &Map) {
3084  // Tracks the simplification of newly created phi nodes. The reason we use
3085  // this mapping is because we will add new created Phi nodes in AddrToBase.
3086  // Simplification of Phi nodes is recursive, so some Phi node may
3087  // be simplified after we added it to AddrToBase. In reality this
3088  // simplification is possible only if original phi/selects were not
3089  // simplified yet.
3090  // Using this mapping we can find the current value in AddrToBase.
3091  SimplificationTracker ST(SQ);
3092 
3093  // First step, DFS to create PHI nodes for all intermediate blocks.
3094  // Also fill traverse order for the second step.
3095  SmallVector<Value *, 32> TraverseOrder;
3096  InsertPlaceholders(Map, TraverseOrder, ST);
3097 
3098  // Second Step, fill new nodes by merged values and simplify if possible.
3099  FillPlaceholders(Map, TraverseOrder, ST);
3100 
3101  if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
3102  ST.destroyNewNodes(CommonType);
3103  return nullptr;
3104  }
3105 
3106  // Now we'd like to match New Phi nodes to existed ones.
3107  unsigned PhiNotMatchedCount = 0;
3108  if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
3109  ST.destroyNewNodes(CommonType);
3110  return nullptr;
3111  }
3112 
3113  auto *Result = ST.Get(Map.find(Original)->second);
3114  if (Result) {
3115  NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
3116  NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
3117  }
3118  return Result;
3119  }
3120 
3121  /// Try to match PHI node to Candidate.
3122  /// Matcher tracks the matched Phi nodes.
3123  bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
3124  SmallSetVector<PHIPair, 8> &Matcher,
3125  PhiNodeSet &PhiNodesToMatch) {
3126  SmallVector<PHIPair, 8> WorkList;
3127  Matcher.insert({ PHI, Candidate });
3128  WorkList.push_back({ PHI, Candidate });
3129  SmallSet<PHIPair, 8> Visited;
3130  while (!WorkList.empty()) {
3131  auto Item = WorkList.pop_back_val();
3132  if (!Visited.insert(Item).second)
3133  continue;
3134  // We iterate over all incoming values to Phi to compare them.
3135  // If values are different and both of them Phi and the first one is a
3136  // Phi we added (subject to match) and both of them is in the same basic
3137  // block then we can match our pair if values match. So we state that
3138  // these values match and add it to work list to verify that.
3139  for (auto B : Item.first->blocks()) {
3140  Value *FirstValue = Item.first->getIncomingValueForBlock(B);
3141  Value *SecondValue = Item.second->getIncomingValueForBlock(B);
3142  if (FirstValue == SecondValue)
3143  continue;
3144 
3145  PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
3146  PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
3147 
3148  // One of them is not Phi or
3149  // The first one is not Phi node from the set we'd like to match or
3150  // Phi nodes from different basic blocks then
3151  // we will not be able to match.
3152  if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
3153  FirstPhi->getParent() != SecondPhi->getParent())
3154  return false;
3155 
3156  // If we already matched them then continue.
3157  if (Matcher.count({ FirstPhi, SecondPhi }))
3158  continue;
3159  // So the values are different and does not match. So we need them to
3160  // match.
3161  Matcher.insert({ FirstPhi, SecondPhi });
3162  // But me must check it.
3163  WorkList.push_back({ FirstPhi, SecondPhi });
3164  }
3165  }
3166  return true;
3167  }
3168 
3169  /// For the given set of PHI nodes (in the SimplificationTracker) try
3170  /// to find their equivalents.
3171  /// Returns false if this matching fails and creation of new Phi is disabled.
3172  bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
3173  unsigned &PhiNotMatchedCount) {
3174  // Matched and PhiNodesToMatch iterate their elements in a deterministic
3175  // order, so the replacements (ReplacePhi) are also done in a deterministic
3176  // order.
3178  SmallPtrSet<PHINode *, 8> WillNotMatch;
3179  PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
3180  while (PhiNodesToMatch.size()) {
3181  PHINode *PHI = *PhiNodesToMatch.begin();
3182 
3183  // Add us, if no Phi nodes in the basic block we do not match.
3184  WillNotMatch.clear();
3185  WillNotMatch.insert(PHI);
3186 
3187  // Traverse all Phis until we found equivalent or fail to do that.
3188  bool IsMatched = false;
3189  for (auto &P : PHI->getParent()->phis()) {
3190  if (&P == PHI)
3191  continue;
3192  if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
3193  break;
3194  // If it does not match, collect all Phi nodes from matcher.
3195  // if we end up with no match, them all these Phi nodes will not match
3196  // later.
3197  for (auto M : Matched)
3198  WillNotMatch.insert(M.first);
3199  Matched.clear();
3200  }
3201  if (IsMatched) {
3202  // Replace all matched values and erase them.
3203  for (auto MV : Matched)
3204  ST.ReplacePhi(MV.first, MV.second);
3205  Matched.clear();
3206  continue;
3207  }
3208  // If we are not allowed to create new nodes then bail out.
3209  if (!AllowNewPhiNodes)
3210  return false;
3211  // Just remove all seen values in matcher. They will not match anything.
3212  PhiNotMatchedCount += WillNotMatch.size();
3213  for (auto *P : WillNotMatch)
3214  PhiNodesToMatch.erase(P);
3215  }
3216  return true;
3217  }
3218  /// Fill the placeholders with values from predecessors and simplify them.
3219  void FillPlaceholders(FoldAddrToValueMapping &Map,
3220  SmallVectorImpl<Value *> &TraverseOrder,
3221  SimplificationTracker &ST) {
3222  while (!TraverseOrder.empty()) {
3223  Value *Current = TraverseOrder.pop_back_val();
3224  assert(Map.find(Current) != Map.end() && "No node to fill!!!");
3225  Value *V = Map[Current];
3226 
3227  if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
3228  // CurrentValue also must be Select.
3229  auto *CurrentSelect = cast<SelectInst>(Current);
3230  auto *TrueValue = CurrentSelect->getTrueValue();
3231  assert(Map.find(TrueValue) != Map.end() && "No True Value!");
3232  Select->setTrueValue(ST.Get(Map[TrueValue]));
3233  auto *FalseValue = CurrentSelect->getFalseValue();
3234  assert(Map.find(FalseValue) != Map.end() && "No False Value!");
3235  Select->setFalseValue(ST.Get(Map[FalseValue]));
3236  } else {
3237  // Must be a Phi node then.
3238  PHINode *PHI = cast<PHINode>(V);
3239  auto *CurrentPhi = dyn_cast<PHINode>(Current);
3240  // Fill the Phi node with values from predecessors.
3241  for (auto B : predecessors(PHI->getParent())) {
3242  Value *PV = CurrentPhi->getIncomingValueForBlock(B);
3243  assert(Map.find(PV) != Map.end() && "No predecessor Value!");
3244  PHI->addIncoming(ST.Get(Map[PV]), B);
3245  }
3246  }
3247  Map[Current] = ST.Simplify(V);
3248  }
3249  }
3250 
3251  /// Starting from original value recursively iterates over def-use chain up to
3252  /// known ending values represented in a map. For each traversed phi/select
3253  /// inserts a placeholder Phi or Select.
3254  /// Reports all new created Phi/Select nodes by adding them to set.
3255  /// Also reports and order in what values have been traversed.
3256  void InsertPlaceholders(FoldAddrToValueMapping &Map,
3257  SmallVectorImpl<Value *> &TraverseOrder,
3258  SimplificationTracker &ST) {
3259  SmallVector<Value *, 32> Worklist;
3260  assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
3261  "Address must be a Phi or Select node");
3262  auto *Dummy = UndefValue::get(CommonType);
3263  Worklist.push_back(Original);
3264  while (!Worklist.empty()) {
3265  Value *Current = Worklist.pop_back_val();
3266  // if it is already visited or it is an ending value then skip it.
3267  if (Map.find(Current) != Map.end())
3268  continue;
3269  TraverseOrder.push_back(Current);
3270 
3271  // CurrentValue must be a Phi node or select. All others must be covered
3272  // by anchors.
3273  if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
3274  // Is it OK to get metadata from OrigSelect?!
3275  // Create a Select placeholder with dummy value.
3277  CurrentSelect->getCondition(), Dummy, Dummy,
3278  CurrentSelect->getName(), CurrentSelect, CurrentSelect);
3279  Map[Current] = Select;
3280  ST.insertNewSelect(Select);
3281  // We are interested in True and False values.
3282  Worklist.push_back(CurrentSelect->getTrueValue());
3283  Worklist.push_back(CurrentSelect->getFalseValue());
3284  } else {
3285  // It must be a Phi node then.
3286  PHINode *CurrentPhi = cast<PHINode>(Current);
3287  unsigned PredCount = CurrentPhi->getNumIncomingValues();
3288  PHINode *PHI =
3289  PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi);
3290  Map[Current] = PHI;
3291  ST.insertNewPhi(PHI);
3292  for (Value *P : CurrentPhi->incoming_values())
3293  Worklist.push_back(P);
3294  }
3295  }
3296  }
3297 
3298  bool addrModeCombiningAllowed() {
3300  return false;
3301  switch (DifferentField) {
3302  default:
3303  return false;
3304  case ExtAddrMode::BaseRegField:
3305  return AddrSinkCombineBaseReg;
3306  case ExtAddrMode::BaseGVField:
3307  return AddrSinkCombineBaseGV;
3308  case ExtAddrMode::BaseOffsField:
3309  return AddrSinkCombineBaseOffs;
3310  case ExtAddrMode::ScaledRegField:
3311  return AddrSinkCombineScaledReg;
3312  }
3313  }
3314 };
3315 } // end anonymous namespace
3316 
3317 /// Try adding ScaleReg*Scale to the current addressing mode.
3318 /// Return true and update AddrMode if this addr mode is legal for the target,
3319 /// false if not.
3320 bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
3321  unsigned Depth) {
3322  // If Scale is 1, then this is the same as adding ScaleReg to the addressing
3323  // mode. Just process that directly.
3324  if (Scale == 1)
3325  return matchAddr(ScaleReg, Depth);
3326 
3327  // If the scale is 0, it takes nothing to add this.
3328  if (Scale == 0)
3329  return true;
3330 
3331  // If we already have a scale of this value, we can add to it, otherwise, we
3332  // need an available scale field.
3333  if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
3334  return false;
3335 
3336  ExtAddrMode TestAddrMode = AddrMode;
3337 
3338  // Add scale to turn X*4+X*3 -> X*7. This could also do things like
3339  // [A+B + A*7] -> [B+A*8].
3340  TestAddrMode.Scale += Scale;
3341  TestAddrMode.ScaledReg = ScaleReg;
3342 
3343  // If the new address isn't legal, bail out.
3344  if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
3345  return false;
3346 
3347  // It was legal, so commit it.
3348  AddrMode = TestAddrMode;
3349 
3350  // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
3351  // to see if ScaleReg is actually X+C. If so, we can turn this into adding
3352  // X*Scale + C*Scale to addr mode.
3353  ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
3354  if (isa<Instruction>(ScaleReg) && // not a constant expr.
3355  match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
3356  TestAddrMode.ScaledReg = AddLHS;
3357  TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
3358 
3359  // If this addressing mode is legal, commit it and remember that we folded
3360  // this instruction.
3361  if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
3362  AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
3363  AddrMode = TestAddrMode;
3364  return true;
3365  }
3366  }
3367 
3368  // Otherwise, not (x+c)*scale, just return what we have.
3369  return true;
3370 }
3371 
3372 /// This is a little filter, which returns true if an addressing computation
3373 /// involving I might be folded into a load/store accessing it.
3374 /// This doesn't need to be perfect, but needs to accept at least
3375 /// the set of instructions that MatchOperationAddr can.
3377  switch (I->getOpcode()) {
3378  case Instruction::BitCast:
3379  case Instruction::AddrSpaceCast:
3380  // Don't touch identity bitcasts.
3381  if (I->getType() == I->getOperand(0)->getType())
3382  return false;
3383  return I->getType()->isIntOrPtrTy();
3384  case Instruction::PtrToInt:
3385  // PtrToInt is always a noop, as we know that the int type is pointer sized.
3386  return true;
3387  case Instruction::IntToPtr:
3388  // We know the input is intptr_t, so this is foldable.
3389  return true;
3390  case Instruction::Add:
3391  return true;
3392  case Instruction::Mul:
3393  case Instruction::Shl:
3394  // Can only handle X*C and X << C.
3395  return isa<ConstantInt>(I->getOperand(1));
3396  case Instruction::GetElementPtr:
3397  return true;
3398  default:
3399  return false;
3400  }
3401 }
3402 
3403 /// Check whether or not \p Val is a legal instruction for \p TLI.
3404 /// \note \p Val is assumed to be the product of some type promotion.
3405 /// Therefore if \p Val has an undefined state in \p TLI, this is assumed
3406 /// to be legal, as the non-promoted value would have had the same state.
3408  const DataLayout &DL, Value *Val) {
3409  Instruction *PromotedInst = dyn_cast<Instruction>(Val);
3410  if (!PromotedInst)
3411  return false;
3412  int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
3413  // If the ISDOpcode is undefined, it was undefined before the promotion.
3414  if (!ISDOpcode)
3415  return true;
3416  // Otherwise, check if the promoted instruction is legal or not.
3417  return TLI.isOperationLegalOrCustom(
3418  ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
3419 }
3420 
3421 namespace {
3422 
3423 /// Hepler class to perform type promotion.
3424 class TypePromotionHelper {
3425  /// Utility function to add a promoted instruction \p ExtOpnd to
3426  /// \p PromotedInsts and record the type of extension we have seen.
3427  static void addPromotedInst(InstrToOrigTy &PromotedInsts,
3428  Instruction *ExtOpnd,
3429  bool IsSExt) {
3430  ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
3431  InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);
3432  if (It != PromotedInsts.end()) {
3433  // If the new extension is same as original, the information in
3434  // PromotedInsts[ExtOpnd] is still correct.
3435  if (It->second.getInt() == ExtTy)
3436  return;
3437 
3438  // Now the new extension is different from old extension, we make
3439  // the type information invalid by setting extension type to
3440  // BothExtension.
3441  ExtTy = BothExtension;
3442  }
3443  PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy);
3444  }
3445 
3446  /// Utility function to query the original type of instruction \p Opnd
3447  /// with a matched extension type. If the extension doesn't match, we
3448  /// cannot use the information we had on the original type.
3449  /// BothExtension doesn't match any extension type.
3450  static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
3451  Instruction *Opnd,
3452  bool IsSExt) {
3453  ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
3454  InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
3455  if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
3456  return It->second.getPointer();
3457  return nullptr;
3458  }
3459 
3460  /// Utility function to check whether or not a sign or zero extension
3461  /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
3462  /// either using the operands of \p Inst or promoting \p Inst.
3463  /// The type of the extension is defined by \p IsSExt.
3464  /// In other words, check if:
3465  /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
3466  /// #1 Promotion applies:
3467  /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
3468  /// #2 Operand reuses:
3469  /// ext opnd1 to ConsideredExtType.
3470  /// \p PromotedInsts maps the instructions to their type before promotion.
3471  static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
3472  const InstrToOrigTy &PromotedInsts, bool IsSExt);
3473 
3474  /// Utility function to determine if \p OpIdx should be promoted when
3475  /// promoting \p Inst.
3476  static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
3477  return !(isa<SelectInst>(Inst) && OpIdx == 0);
3478  }
3479 
3480  /// Utility function to promote the operand of \p Ext when this
3481  /// operand is a promotable trunc or sext or zext.
3482  /// \p PromotedInsts maps the instructions to their type before promotion.
3483  /// \p CreatedInstsCost[out] contains the cost of all instructions
3484  /// created to promote the operand of Ext.
3485  /// Newly added extensions are inserted in \p Exts.
3486  /// Newly added truncates are inserted in \p Truncs.
3487  /// Should never be called directly.
3488  /// \return The promoted value which is used instead of Ext.
3489  static Value *promoteOperandForTruncAndAnyExt(
3490  Instruction *Ext, TypePromotionTransaction &TPT,
3491  InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3493  SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
3494 
3495  /// Utility function to promote the operand of \p Ext when this
3496  /// operand is promotable and is not a supported trunc or sext.
3497  /// \p PromotedInsts maps the instructions to their type before promotion.
3498  /// \p CreatedInstsCost[out] contains the cost of all the instructions
3499  /// created to promote the operand of Ext.
3500  /// Newly added extensions are inserted in \p Exts.
3501  /// Newly added truncates are inserted in \p Truncs.
3502  /// Should never be called directly.
3503  /// \return The promoted value which is used instead of Ext.
3504  static Value *promoteOperandForOther(Instruction *Ext,
3505  TypePromotionTransaction &TPT,
3506  InstrToOrigTy &PromotedInsts,
3507  unsigned &CreatedInstsCost,
3510  const TargetLowering &TLI, bool IsSExt);
3511 
3512  /// \see promoteOperandForOther.
3513  static Value *signExtendOperandForOther(
3514  Instruction *Ext, TypePromotionTransaction &TPT,
3515  InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3517  SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3518  return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
3519  Exts, Truncs, TLI, true);
3520  }
3521 
3522  /// \see promoteOperandForOther.
3523  static Value *zeroExtendOperandForOther(
3524  Instruction *Ext, TypePromotionTransaction &TPT,
3525  InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3527  SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3528  return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
3529  Exts, Truncs, TLI, false);
3530  }
3531 
3532 public:
3533  /// Type for the utility function that promotes the operand of Ext.
3534  using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
3535  InstrToOrigTy &PromotedInsts,
3536  unsigned &CreatedInstsCost,
3539  const TargetLowering &TLI);
3540 
3541  /// Given a sign/zero extend instruction \p Ext, return the appropriate
3542  /// action to promote the operand of \p Ext instead of using Ext.
3543  /// \return NULL if no promotable action is possible with the current
3544  /// sign extension.
3545  /// \p InsertedInsts keeps track of all the instructions inserted by the
3546  /// other CodeGenPrepare optimizations. This information is important
3547  /// because we do not want to promote these instructions as CodeGenPrepare
3548  /// will reinsert them later. Thus creating an infinite loop: create/remove.
3549  /// \p PromotedInsts maps the instructions to their type before promotion.
3550  static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
3551  const TargetLowering &TLI,
3552  const InstrToOrigTy &PromotedInsts);
3553 };
3554 
3555 } // end anonymous namespace
3556 
3557 bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
3558  Type *ConsideredExtType,
3559  const InstrToOrigTy &PromotedInsts,
3560  bool IsSExt) {
3561  // The promotion helper does not know how to deal with vector types yet.
3562  // To be able to fix that, we would need to fix the places where we
3563  // statically extend, e.g., constants and such.
3564  if (Inst->getType()->isVectorTy())
3565  return false;
3566 
3567  // We can always get through zext.
3568  if (isa<ZExtInst>(Inst))
3569  return true;
3570 
3571  // sext(sext) is ok too.
3572  if (IsSExt && isa<SExtInst>(Inst))
3573  return true;
3574 
3575  // We can get through binary operator, if it is legal. In other words, the
3576  // binary operator must have a nuw or nsw flag.
3577  const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
3578  if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
3579  ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
3580  (IsSExt && BinOp->hasNoSignedWrap())))
3581  return true;
3582 
3583  // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
3584  if ((Inst->getOpcode() == Instruction::And ||
3585  Inst->getOpcode() == Instruction::Or))
3586  return true;
3587 
3588  // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
3589  if (Inst->getOpcode() == Instruction::Xor) {
3590  const ConstantInt *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1));
3591  // Make sure it is not a NOT.
3592  if (Cst && !Cst->getValue().isAllOnesValue())
3593  return true;
3594  }
3595 
3596  // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
3597  // It may change a poisoned value into a regular value, like
3598  // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
3599  // poisoned value regular value
3600  // It should be OK since undef covers valid value.
3601  if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
3602  return true;
3603 
3604  // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
3605  // It may change a poisoned value into a regular value, like
3606  // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
3607  // poisoned value regular value
3608  // It should be OK since undef covers valid value.
3609  if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
3610  const Instruction *ExtInst =
3611  dyn_cast<const Instruction>(*Inst->user_begin());
3612  if (ExtInst->hasOneUse()) {
3613  const Instruction *AndInst =
3614  dyn_cast<const Instruction>(*ExtInst->user_begin());
3615  if (AndInst && AndInst->getOpcode() == Instruction::And) {
3616  const ConstantInt *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
3617  if (Cst &&
3618  Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
3619  return true;
3620  }
3621  }
3622  }
3623 
3624  // Check if we can do the following simplification.
3625  // ext(trunc(opnd)) --> ext(opnd)
3626  if (!isa<TruncInst>(Inst))
3627  return false;
3628 
3629  Value *OpndVal = Inst->getOperand(0);
3630  // Check if we can use this operand in the extension.
3631  // If the type is larger than the result type of the extension, we cannot.
3632  if (!OpndVal->getType()->isIntegerTy() ||
3633  OpndVal->getType()->getIntegerBitWidth() >
3634  ConsideredExtType->getIntegerBitWidth())
3635  return false;
3636 
3637  // If the operand of the truncate is not an instruction, we will not have
3638  // any information on the dropped bits.
3639  // (Actually we could for constant but it is not worth the extra logic).
3640  Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
3641  if (!Opnd)
3642  return false;
3643 
3644  // Check if the source of the type is narrow enough.
3645  // I.e., check that trunc just drops extended bits of the same kind of
3646  // the extension.
3647  // #1 get the type of the operand and check the kind of the extended bits.
3648  const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
3649  if (OpndType)
3650  ;
3651  else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
3652  OpndType = Opnd->getOperand(0)->getType();
3653  else
3654  return false;
3655 
3656  // #2 check that the truncate just drops extended bits.
3657  return Inst->getType()->getIntegerBitWidth() >=
3658  OpndType->getIntegerBitWidth();
3659 }
3660 
3661 TypePromotionHelper::Action TypePromotionHelper::getAction(
3662  Instruction *Ext, const SetOfInstrs &InsertedInsts,
3663  const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
3664  assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
3665  "Unexpected instruction type");
3666  Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
3667  Type *ExtTy = Ext->getType();
3668  bool IsSExt = isa<SExtInst>(Ext);
3669  // If the operand of the extension is not an instruction, we cannot
3670  // get through.
3671  // If it, check we can get through.
3672  if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
3673  return nullptr;
3674 
3675  // Do not promote if the operand has been added by codegenprepare.
3676  // Otherwise, it means we are undoing an optimization that is likely to be
3677  // redone, thus causing potential infinite loop.
3678  if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
3679  return nullptr;
3680 
3681  // SExt or Trunc instructions.
3682  // Return the related handler.
3683  if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
3684  isa<ZExtInst>(ExtOpnd))
3685  return promoteOperandForTruncAndAnyExt;
3686 
3687  // Regular instruction.
3688  // Abort early if we will have to insert non-free instructions.
3689  if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
3690  return nullptr;
3691  return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
3692 }
3693 
3694 Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
3695  Instruction *SExt, TypePromotionTransaction &TPT,
3696  InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3698  SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3699  // By construction, the operand of SExt is an instruction. Otherwise we cannot
3700  // get through it and this method should not be called.
3701  Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
3702  Value *ExtVal = SExt;
3703  bool HasMergedNonFreeExt = false;
3704  if (isa<ZExtInst>(SExtOpnd)) {
3705  // Replace s|zext(zext(opnd))
3706  // => zext(opnd).
3707  HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
3708  Value *ZExt =
3709  TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
3710  TPT.replaceAllUsesWith(SExt, ZExt);
3711  TPT.eraseInstruction(SExt);
3712  ExtVal = ZExt;
3713  } else {
3714  // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
3715  // => z|sext(opnd).
3716  TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
3717  }
3718  CreatedInstsCost = 0;
3719 
3720  // Remove dead code.
3721  if (SExtOpnd->use_empty())
3722  TPT.eraseInstruction(SExtOpnd);
3723 
3724  // Check if the extension is still needed.
3725  Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
3726  if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
3727  if (ExtInst) {
3728  if (Exts)
3729  Exts->push_back(ExtInst);
3730  CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
3731  }
3732  return ExtVal;
3733  }
3734 
3735  // At this point we have: ext ty opnd to ty.
3736  // Reassign the uses of ExtInst to the opnd and remove ExtInst.
3737  Value *NextVal = ExtInst->getOperand(0);
3738  TPT.eraseInstruction(ExtInst, NextVal);
3739  return NextVal;
3740 }
3741 
3742 Value *TypePromotionHelper::promoteOperandForOther(
3743  Instruction *Ext, TypePromotionTransaction &TPT,
3744  InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3746  SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
3747  bool IsSExt) {
3748  // By construction, the operand of Ext is an instruction. Otherwise we cannot
3749  // get through it and this method should not be called.
3750  Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
3751  CreatedInstsCost = 0;
3752  if (!ExtOpnd->hasOneUse()) {
3753  // ExtOpnd will be promoted.
3754  // All its uses, but Ext, will need to use a truncated value of the
3755  // promoted version.
3756  // Create the truncate now.
3757  Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
3758  if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
3759  // Insert it just after the definition.
3760  ITrunc->moveAfter(ExtOpnd);
3761  if (Truncs)
3762  Truncs->push_back(ITrunc);
3763  }
3764 
3765  TPT.replaceAllUsesWith(ExtOpnd, Trunc);
3766  // Restore the operand of Ext (which has been replaced by the previous call
3767  // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
3768  TPT.setOperand(Ext, 0, ExtOpnd);
3769  }
3770 
3771  // Get through the Instruction:
3772  // 1. Update its type.
3773  // 2. Replace the uses of Ext by Inst.
3774  // 3. Extend each operand that needs to be extended.
3775 
3776  // Remember the original type of the instruction before promotion.
3777  // This is useful to know that the high bits are sign extended bits.
3778  addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
3779  // Step #1.
3780  TPT.mutateType(ExtOpnd, Ext->getType());
3781  // Step #2.
3782  TPT.replaceAllUsesWith(Ext, ExtOpnd);
3783  // Step #3.
3784  Instruction *ExtForOpnd = Ext;
3785 
3786  LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
3787  for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
3788  ++OpIdx) {
3789  LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
3790  if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
3791  !shouldExtOperand(ExtOpnd, OpIdx)) {
3792  LLVM_DEBUG(dbgs() << "No need to propagate\n");
3793  continue;
3794  }
3795  // Check if we can statically extend the operand.
3796  Value *Opnd = ExtOpnd->getOperand(OpIdx);
3797  if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
3798  LLVM_DEBUG(dbgs() << "Statically extend\n");
3799  unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
3800  APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
3801  : Cst->getValue().zext(BitWidth);
3802  TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
3803  continue;
3804  }
3805  // UndefValue are typed, so we have to statically sign extend them.
3806  if (isa<UndefValue>(Opnd)) {
3807  LLVM_DEBUG(dbgs() << "Statically extend\n");
3808  TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
3809  continue;
3810  }
3811 
3812  // Otherwise we have to explicitly sign extend the operand.
3813  // Check if Ext was reused to extend an operand.
3814  if (!ExtForOpnd) {
3815  // If yes, create a new one.
3816  LLVM_DEBUG(dbgs() << "More operands to ext\n");
3817  Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
3818  : TPT.createZExt(Ext, Opnd, Ext->getType());
3819  if (!isa<Instruction>(ValForExtOpnd)) {
3820  TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
3821  continue;
3822  }
3823  ExtForOpnd = cast<Instruction>(ValForExtOpnd);
3824  }
3825  if (Exts)
3826  Exts->push_back(ExtForOpnd);
3827  TPT.setOperand(ExtForOpnd, 0, Opnd);
3828 
3829  // Move the sign extension before the insertion point.
3830  TPT.moveBefore(ExtForOpnd, ExtOpnd);
3831  TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
3832  CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
3833  // If more sext are required, new instructions will have to be created.
3834  ExtForOpnd = nullptr;
3835  }
3836  if (ExtForOpnd == Ext) {
3837  LLVM_DEBUG(dbgs() << "Extension is useless now\n");
3838  TPT.eraseInstruction(Ext);
3839  }
3840  return ExtOpnd;
3841 }
3842 
3843 /// Check whether or not promoting an instruction to a wider type is profitable.
3844 /// \p NewCost gives the cost of extension instructions created by the
3845 /// promotion.
3846 /// \p OldCost gives the cost of extension instructions before the promotion
3847 /// plus the number of instructions that have been
3848 /// matched in the addressing mode the promotion.
3849 /// \p PromotedOperand is the value that has been promoted.
3850 /// \return True if the promotion is profitable, false otherwise.
3851 bool AddressingModeMatcher::isPromotionProfitable(
3852  unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
3853  LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
3854  << '\n');
3855  // The cost of the new extensions is greater than the cost of the
3856  // old extension plus what we folded.
3857  // This is not profitable.
3858  if (NewCost > OldCost)
3859  return false;
3860  if (NewCost < OldCost)
3861  return true;
3862  // The promotion is neutral but it may help folding the sign extension in
3863  // loads for instance.
3864  // Check that we did not create an illegal instruction.
3865  return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
3866 }
3867 
3868 /// Given an instruction or constant expr, see if we can fold the operation
3869 /// into the addressing mode. If so, update the addressing mode and return
3870 /// true, otherwise return false without modifying AddrMode.
3871 /// If \p MovedAway is not NULL, it contains the information of whether or
3872 /// not AddrInst has to be folded into the addressing mode on success.
3873 /// If \p MovedAway == true, \p AddrInst will not be part of the addressing
3874 /// because it has been moved away.
3875 /// Thus AddrInst must not be added in the matched instructions.
3876 /// This state can happen when AddrInst is a sext, since it may be moved away.
3877 /// Therefore, AddrInst may not be valid when MovedAway is true and it must
3878 /// not be referenced anymore.
3879 bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
3880  unsigned Depth,
3881  bool *MovedAway) {
3882  // Avoid exponential behavior on extremely deep expression trees.
3883  if (Depth >= 5) return false;
3884 
3885  // By default, all matched instructions stay in place.
3886  if (MovedAway)
3887  *MovedAway = false;
3888 
3889  switch (Opcode) {
3890  case Instruction::PtrToInt:
3891  // PtrToInt is always a noop, as we know that the int type is pointer sized.
3892  return matchAddr(AddrInst->getOperand(0), Depth);
3893  case Instruction::IntToPtr: {
3894  auto AS = AddrInst->getType()->getPointerAddressSpace();
3895  auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
3896  // This inttoptr is a no-op if the integer type is pointer sized.
3897  if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
3898  return matchAddr(AddrInst->getOperand(0), Depth);
3899  return false;
3900  }
3901  case Instruction::BitCast:
3902  // BitCast is always a noop, and we can handle it as long as it is
3903  // int->int or pointer->pointer (we don't want int<->fp or something).
3904  if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
3905  // Don't touch identity bitcasts. These were probably put here by LSR,
3906  // and we don't want to mess around with them. Assume it knows what it
3907  // is doing.
3908  AddrInst->getOperand(0)->getType() != AddrInst->getType())
3909  return matchAddr(AddrInst->getOperand(0), Depth);
3910  return false;
3911  case Instruction::AddrSpaceCast: {
3912  unsigned SrcAS
3913  = AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
3914  unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
3915  if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
3916  return matchAddr(AddrInst->getOperand(0), Depth);
3917  return false;
3918  }
3919  case Instruction::Add: {
3920  // Check to see if we can merge in the RHS then the LHS. If so, we win.
3921  ExtAddrMode BackupAddrMode = AddrMode;
3922  unsigned OldSize = AddrModeInsts.size();
3923  // Start a transaction at this point.
3924  // The LHS may match but not the RHS.
3925  // Therefore, we need a higher level restoration point to undo partially
3926  // matched operation.
3927  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
3928  TPT.getRestorationPoint();
3929 
3930  if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
3931  matchAddr(AddrInst->getOperand(0), Depth+1))
3932  return true;
3933 
3934  // Restore the old addr mode info.
3935  AddrMode = BackupAddrMode;
3936  AddrModeInsts.resize(OldSize);
3937  TPT.rollback(LastKnownGood);
3938 
3939  // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
3940  if (matchAddr(AddrInst->getOperand(0), Depth+1) &&
3941  matchAddr(AddrInst->getOperand(1), Depth+1))
3942  return true;
3943 
3944  // Otherwise we definitely can't merge the ADD in.
3945  AddrMode = BackupAddrMode;
3946  AddrModeInsts.resize(OldSize);
3947  TPT.rollback(LastKnownGood);
3948  break;
3949  }
3950  //case Instruction::Or:
3951  // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
3952  //break;
3953  case Instruction::Mul:
3954  case Instruction::Shl: {
3955  // Can only handle X*C and X << C.
3956  ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
3957  if (!RHS || RHS->getBitWidth() > 64)
3958  return false;
3959  int64_t Scale = RHS->getSExtValue();
3960  if (Opcode == Instruction::Shl)
3961  Scale = 1LL << Scale;
3962 
3963  return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
3964  }
3965  case Instruction::GetElementPtr: {
3966  // Scan the GEP. We check it if it contains constant offsets and at most
3967  // one variable offset.
3968  int VariableOperand = -1;
3969  unsigned VariableScale = 0;
3970 
3971  int64_t ConstantOffset = 0;
3972  gep_type_iterator GTI = gep_type_begin(AddrInst);
3973  for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
3974  if (StructType *STy = GTI.getStructTypeOrNull()) {
3975  const StructLayout *SL = DL.getStructLayout(STy);
3976  unsigned Idx =
3977  cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
3978  ConstantOffset += SL->getElementOffset(Idx);
3979  } else {
3980  uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
3981  if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
3982  const APInt &CVal = CI->getValue();
3983  if (CVal.getMinSignedBits() <= 64) {
3984  ConstantOffset += CVal.getSExtValue() * TypeSize;
3985  continue;
3986  }
3987  }
3988  if (TypeSize) { // Scales of zero don't do anything.
3989  // We only allow one variable index at the moment.
3990  if (VariableOperand != -1)
3991  return false;
3992 
3993  // Remember the variable index.
3994  VariableOperand = i;
3995  VariableScale = TypeSize;
3996  }
3997  }
3998  }
3999 
4000  // A common case is for the GEP to only do a constant offset. In this case,
4001  // just add it to the disp field and check validity.
4002  if (VariableOperand == -1) {
4003  AddrMode.BaseOffs += ConstantOffset;
4004  if (ConstantOffset == 0 ||
4005  TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
4006  // Check to see if we can fold the base pointer in too.
4007  if (matchAddr(AddrInst->getOperand(0), Depth+1))
4008  return true;
4009  } else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
4010  TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
4011  ConstantOffset > 0) {
4012  // Record GEPs with non-zero offsets as candidates for splitting in the
4013  // event that the offset cannot fit into the r+i addressing mode.
4014  // Simple and common case that only one GEP is used in calculating the
4015  // address for the memory access.
4016  Value *Base = AddrInst->getOperand(0);
4017  auto *BaseI = dyn_cast<Instruction>(Base);
4018  auto *GEP = cast<GetElementPtrInst>(AddrInst);
4019  if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
4020  (BaseI && !isa<CastInst>(BaseI) &&
4021  !isa<GetElementPtrInst>(BaseI))) {
4022  // If the base is an instruction, make sure the GEP is not in the same
4023  // basic block as the base. If the base is an argument or global
4024  // value, make sure the GEP is not in the entry block. Otherwise,
4025  // instruction selection can undo the split. Also make sure the
4026  // parent block allows inserting non-PHI instructions before the
4027  // terminator.
4028  BasicBlock *Parent =
4029  BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock();
4030  if (GEP->getParent() != Parent && !Parent->getTerminator()->isEHPad())
4031  LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
4032  }
4033  }
4034  AddrMode.BaseOffs -= ConstantOffset;
4035  return false;
4036  }
4037 
4038  // Save the valid addressing mode in case we can't match.
4039  ExtAddrMode BackupAddrMode = AddrMode;
4040  unsigned OldSize = AddrModeInsts.size();
4041 
4042  // See if the scale and offset amount is valid for this target.
4043  AddrMode.BaseOffs += ConstantOffset;
4044 
4045  // Match the base operand of the GEP.
4046  if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
4047  // If it couldn't be matched, just stuff the value in a register.
4048  if (AddrMode.HasBaseReg) {
4049  AddrMode = BackupAddrMode;
4050  AddrModeInsts.resize(OldSize);
4051  return false;
4052  }
4053  AddrMode.HasBaseReg = true;
4054  AddrMode.BaseReg = AddrInst->getOperand(0);
4055  }
4056 
4057  // Match the remaining variable portion of the GEP.
4058  if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
4059  Depth)) {
4060  // If it couldn't be matched, try stuffing the base into a register
4061  // instead of matching it, and retrying the match of the scale.
4062  AddrMode = BackupAddrMode;
4063  AddrModeInsts.resize(OldSize);
4064  if (AddrMode.HasBaseReg)
4065  return false;
4066  AddrMode.HasBaseReg = true;
4067  AddrMode.BaseReg = AddrInst->getOperand(0);
4068  AddrMode.BaseOffs += ConstantOffset;
4069  if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
4070  VariableScale, Depth)) {
4071  // If even that didn't work, bail.
4072  AddrMode = BackupAddrMode;
4073  AddrModeInsts.resize(OldSize);
4074  return false;
4075  }
4076  }
4077 
4078  return true;
4079  }
4080  case Instruction::SExt:
4081  case Instruction::ZExt: {
4082  Instruction *Ext = dyn_cast<Instruction>(AddrInst);
4083  if (!Ext)
4084  return false;
4085 
4086  // Try to move this ext out of the way of the addressing mode.
4087  // Ask for a method for doing so.
4088  TypePromotionHelper::Action TPH =
4089  TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
4090  if (!TPH)
4091  return false;
4092 
4093  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4094  TPT.getRestorationPoint();
4095  unsigned CreatedInstsCost = 0;
4096  unsigned ExtCost = !TLI.isExtFree(Ext);
4097  Value *PromotedOperand =
4098  TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
4099  // SExt has been moved away.
4100  // Thus either it will be rematched later in the recursive calls or it is
4101  // gone. Anyway, we must not fold it into the addressing mode at this point.
4102  // E.g.,
4103  // op = add opnd, 1
4104  // idx = ext op
4105  // addr = gep base, idx
4106  // is now:
4107  // promotedOpnd = ext opnd <- no match here
4108  // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
4109  // addr = gep base, op <- match
4110  if (MovedAway)
4111  *MovedAway = true;
4112 
4113  assert(PromotedOperand &&
4114  "TypePromotionHelper should have filtered out those cases");
4115 
4116  ExtAddrMode BackupAddrMode = AddrMode;
4117  unsigned OldSize = AddrModeInsts.size();
4118 
4119  if (!matchAddr(PromotedOperand, Depth) ||
4120  // The total of the new cost is equal to the cost of the created
4121  // instructions.
4122  // The total of the old cost is equal to the cost of the extension plus
4123  // what we have saved in the addressing mode.
4124  !isPromotionProfitable(CreatedInstsCost,
4125  ExtCost + (AddrModeInsts.size() - OldSize),
4126  PromotedOperand)) {
4127  AddrMode = BackupAddrMode;
4128  AddrModeInsts.resize(OldSize);
4129  LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
4130  TPT.rollback(LastKnownGood);
4131  return false;
4132  }
4133  return true;
4134  }
4135  }
4136  return false;
4137 }
4138 
4139 /// If we can, try to add the value of 'Addr' into the current addressing mode.
4140 /// If Addr can't be added to AddrMode this returns false and leaves AddrMode
4141 /// unmodified. This assumes that Addr is either a pointer type or intptr_t
4142 /// for the target.
4143 ///
4144 bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
4145  // Start a transaction at this point that we will rollback if the matching
4146  // fails.
4147  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4148  TPT.getRestorationPoint();
4149  if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
4150  // Fold in immediates if legal for the target.
4151  AddrMode.BaseOffs += CI->getSExtValue();
4152  if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4153  return true;
4154  AddrMode.BaseOffs -= CI->getSExtValue();
4155  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
4156  // If this is a global variable, try to fold it into the addressing mode.
4157  if (!AddrMode.BaseGV) {
4158  AddrMode.BaseGV = GV;
4159  if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4160  return true;
4161  AddrMode.BaseGV = nullptr;
4162  }
4163  } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
4164  ExtAddrMode BackupAddrMode = AddrMode;
4165  unsigned OldSize = AddrModeInsts.size();
4166 
4167  // Check to see if it is possible to fold this operation.
4168  bool MovedAway = false;
4169  if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
4170  // This instruction may have been moved away. If so, there is nothing
4171  // to check here.
4172  if (MovedAway)
4173  return true;
4174  // Okay, it's possible to fold this. Check to see if it is actually
4175  // *profitable* to do so. We use a simple cost model to avoid increasing
4176  // register pressure too much.
4177  if (I->hasOneUse() ||
4178  isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
4179  AddrModeInsts.push_back(I);
4180  return true;
4181  }
4182 
4183  // It isn't profitable to do this, roll back.
4184  //cerr << "NOT FOLDING: " << *I;
4185  AddrMode = BackupAddrMode;
4186  AddrModeInsts.resize(OldSize);
4187  TPT.rollback(LastKnownGood);
4188  }
4189  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
4190  if (matchOperationAddr(CE, CE->getOpcode(), Depth))
4191  return true;
4192  TPT.rollback(LastKnownGood);
4193  } else if (isa<ConstantPointerNull>(Addr)) {
4194  // Null pointer gets folded without affecting the addressing mode.
4195  return true;
4196  }
4197 
4198  // Worse case, the target should support [reg] addressing modes. :)
4199  if (!AddrMode.HasBaseReg) {
4200  AddrMode.HasBaseReg = true;
4201  AddrMode.BaseReg = Addr;
4202  // Still check for legality in case the target supports [imm] but not [i+r].
4203  if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4204  return true;
4205  AddrMode.HasBaseReg = false;
4206  AddrMode.BaseReg = nullptr;
4207  }
4208 
4209  // If the base register is already taken, see if we can do [r+r].
4210  if (AddrMode.Scale == 0) {
4211  AddrMode.Scale = 1;
4212  AddrMode.ScaledReg = Addr;
4213  if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4214  return true;
4215  AddrMode.Scale = 0;
4216  AddrMode.ScaledReg = nullptr;
4217  }
4218  // Couldn't match.
4219  TPT.rollback(LastKnownGood);
4220  return false;
4221 }
4222 
4223 /// Check to see if all uses of OpVal by the specified inline asm call are due
4224 /// to memory operands. If so, return true, otherwise return false.
4225 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
4226  const TargetLowering &TLI,
4227  const TargetRegisterInfo &TRI) {
4228  const Function *F = CI->getFunction();
4229  TargetLowering::AsmOperandInfoVector TargetConstraints =
4231  ImmutableCallSite(CI));
4232 
4233  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
4234  TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
4235 
4236  // Compute the constraint code and ConstraintType to use.
4237  TLI.ComputeConstraintToUse(OpInfo, SDValue());
4238 
4239  // If this asm operand is our Value*, and if it isn't an indirect memory
4240  // operand, we can't fold it!
4241  if (OpInfo.CallOperandVal == OpVal &&
4243  !OpInfo.isIndirect))
4244  return false;
4245  }
4246 
4247  return true;
4248 }
4249 
4250 // Max number of memory uses to look at before aborting the search to conserve
4251 // compile time.
4252 static constexpr int MaxMemoryUsesToScan = 20;
4253 
4254 /// Recursively walk all the uses of I until we find a memory use.
4255 /// If we find an obviously non-foldable instruction, return true.
4256 /// Add the ultimately found memory instructions to MemoryUses.
4257 static bool FindAllMemoryUses(
4258  Instruction *I,
4259  SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
4260  SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
4261  const TargetRegisterInfo &TRI, int SeenInsts = 0) {
4262  // If we already considered this instruction, we're done.
4263  if (!ConsideredInsts.insert(I).second)
4264  return false;
4265 
4266  // If this is an obviously unfoldable instruction, bail out.
4267  if (!MightBeFoldableInst(I))
4268  return true;
4269 
4270  const bool OptSize = I->getFunction()->optForSize();
4271 
4272  // Loop over all the uses, recursively processing them.
4273  for (Use &U : I->uses()) {
4274  // Conservatively return true if we're seeing a large number or a deep chain
4275  // of users. This avoids excessive compilation times in pathological cases.
4276  if (SeenInsts++ >= MaxMemoryUsesToScan)
4277  return true;
4278 
4279  Instruction *UserI = cast<Instruction>(U.getUser());
4280  if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
4281  MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
4282  continue;
4283  }
4284 
4285  if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
4286  unsigned opNo = U.getOperandNo();
4287  if (opNo != StoreInst::getPointerOperandIndex())
4288  return true; // Storing addr, not into addr.
4289  MemoryUses.push_back(std::make_pair(SI, opNo));
4290  continue;
4291  }
4292 
4293  if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
4294  unsigned opNo = U.getOperandNo();
4296  return true; // Storing addr, not into addr.
4297  MemoryUses.push_back(std::make_pair(RMW, opNo));
4298  continue;
4299  }
4300 
4301  if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
4302  unsigned opNo = U.getOperandNo();
4304  return true; // Storing addr, not into addr.
4305  MemoryUses.push_back(std::make_pair(CmpX, opNo));
4306  continue;
4307  }
4308 
4309  if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
4310  // If this is a cold call, we can sink the addressing calculation into
4311  // the cold path. See optimizeCallInst
4312  if (!OptSize && CI->hasFnAttr(Attribute::Cold))
4313  continue;
4314 
4316  if (!IA) return true;
4317 
4318  // If this is a memory operand, we're cool, otherwise bail out.
4319  if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
4320  return true;
4321  continue;
4322  }
4323 
4324  if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI,
4325  SeenInsts))
4326  return true;
4327  }
4328 
4329  return false;
4330 }
4331 
4332 /// Return true if Val is already known to be live at the use site that we're
4333 /// folding it into. If so, there is no cost to include it in the addressing
4334 /// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
4335 /// instruction already.
4336 bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
4337  Value *KnownLive2) {
4338  // If Val is either of the known-live values, we know it is live!
4339  if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
4340  return true;
4341 
4342  // All values other than instructions and arguments (e.g. constants) are live.
4343  if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
4344 
4345  // If Val is a constant sized alloca in the entry block, it is live, this is
4346  // true because it is just a reference to the stack/frame pointer, which is
4347  // live for the whole function.
4348  if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
4349  if (AI->isStaticAlloca())
4350  return true;
4351 
4352  // Check to see if this value is already used in the memory instruction's
4353  // block. If so, it's already live into the block at the very least, so we
4354  // can reasonably fold it.
4355  return Val->isUsedInBasicBlock(MemoryInst->getParent());
4356 }
4357 
4358 /// It is possible for the addressing mode of the machine to fold the specified
4359 /// instruction into a load or store that ultimately uses it.
4360 /// However, the specified instruction has multiple uses.
4361 /// Given this, it may actually increase register pressure to fold it
4362 /// into the load. For example, consider this code:
4363 ///
4364 /// X = ...
4365 /// Y = X+1
4366 /// use(Y) -> nonload/store
4367 /// Z = Y+1
4368 /// load Z
4369 ///
4370 /// In this case, Y has multiple uses, and can be folded into the load of Z
4371 /// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
4372 /// be live at the use(Y) line. If we don't fold Y into load Z, we use one
4373 /// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
4374 /// number of computations either.
4375 ///
4376 /// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
4377 /// X was live across 'load Z' for other reasons, we actually *would* want to
4378 /// fold the addressing mode in the Z case. This would make Y die earlier.
4379 bool AddressingModeMatcher::
4380 isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
4381  ExtAddrMode &AMAfter) {
4382  if (IgnoreProfitability) return true;
4383 
4384  // AMBefore is the addressing mode before this instruction was folded into it,
4385  // and AMAfter is the addressing mode after the instruction was folded. Get
4386  // the set of registers referenced by AMAfter and subtract out those
4387  // referenced by AMBefore: this is the set of values which folding in this
4388  // address extends the lifetime of.
4389  //
4390  // Note that there are only two potential values being referenced here,
4391  // BaseReg and ScaleReg (global addresses are always available, as are any
4392  // folded immediates).
4393  Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
4394 
4395  // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
4396  // lifetime wasn't extended by adding this instruction.
4397  if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
4398  BaseReg = nullptr;
4399  if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
4400  ScaledReg = nullptr;
4401 
4402  // If folding this instruction (and it's subexprs) didn't extend any live
4403  // ranges, we're ok with it.
4404  if (!BaseReg && !ScaledReg)
4405  return true;
4406 
4407  // If all uses of this instruction can have the address mode sunk into them,
4408  // we can remove the addressing mode and effectively trade one live register
4409  // for another (at worst.) In this context, folding an addressing mode into
4410  // the use is just a particularly nice way of sinking it.
4412  SmallPtrSet<Instruction*, 16> ConsideredInsts;
4413  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI))
4414  return false; // Has a non-memory, non-foldable use!
4415 
4416  // Now that we know that all uses of this instruction are part of a chain of
4417  // computation involving only operations that could theoretically be folded
4418  // into a memory use, loop over each of these memory operation uses and see
4419  // if they could *actually* fold the instruction. The assumption is that
4420  // addressing modes are cheap and that duplicating the computation involved
4421  // many times is worthwhile, even on a fastpath. For sinking candidates
4422  // (i.e. cold call sites), this serves as a way to prevent excessive code
4423  // growth since most architectures have some reasonable small and fast way to
4424  // compute an effective address. (i.e LEA on x86)
4425  SmallVector<Instruction*, 32> MatchedAddrModeInsts;
4426  for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
4427  Instruction *User = MemoryUses[i].first;
4428  unsigned OpNo = MemoryUses[i].second;
4429 
4430  // Get the access type of this use. If the use isn't a pointer, we don't
4431  // know what it accesses.
4432  Value *Address = User->getOperand(OpNo);
4433  PointerType *AddrTy = dyn_cast<PointerType>(Address->getType());
4434  if (!AddrTy)
4435  return false;
4436  Type *AddressAccessTy = AddrTy->getElementType();
4437  unsigned AS = AddrTy->getAddressSpace();
4438 
4439  // Do a match against the root of this address, ignoring profitability. This
4440  // will tell us if the addressing mode for the memory operation will
4441  // *actually* cover the shared instruction.
4442  ExtAddrMode Result;
4443  std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
4444  0);
4445  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4446  TPT.getRestorationPoint();
4447  AddressingModeMatcher Matcher(
4448  MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result,
4449  InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
4450  Matcher.IgnoreProfitability = true;
4451  bool Success = Matcher.matchAddr(Address, 0);
4452  (void)Success; assert(Success && "Couldn't select *anything*?");
4453 
4454  // The match was to check the profitability, the changes made are not
4455  // part of the original matcher. Therefore, they should be dropped
4456  // otherwise the original matcher will not present the right state.
4457  TPT.rollback(LastKnownGood);
4458 
4459  // If the match didn't cover I, then it won't be shared by it.
4460  if (!is_contained(MatchedAddrModeInsts, I))
4461  return false;
4462 
4463  MatchedAddrModeInsts.clear();
4464  }
4465 
4466  return true;
4467 }
4468 
4469 /// Return true if the specified values are defined in a
4470 /// different basic block than BB.
4471 static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
4472  if (Instruction *I = dyn_cast<Instruction>(V))
4473  return I->getParent() != BB;
4474  return false;
4475 }
4476 
4477 /// Sink addressing mode computation immediate before MemoryInst if doing so
4478 /// can be done without increasing register pressure. The need for the
4479 /// register pressure constraint means this can end up being an all or nothing
4480 /// decision for all uses of the same addressing computation.
4481 ///
4482 /// Load and Store Instructions often have addressing modes that can do
4483 /// significant amounts of computation. As such, instruction selection will try
4484 /// to get the load or store to do as much computation as possible for the
4485 /// program. The problem is that isel can only see within a single block. As
4486 /// such, we sink as much legal addressing mode work into the block as possible.
4487 ///
4488 /// This method is used to optimize both load/store and inline asms with memory
4489 /// operands. It's also used to sink addressing computations feeding into cold
4490 /// call sites into their (cold) basic block.
4491 ///
4492 /// The motivation for handling sinking into cold blocks is that doing so can
4493 /// both enable other address mode sinking (by satisfying the register pressure
4494 /// constraint above), and reduce register pressure globally (by removing the
4495 /// addressing mode computation from the fast path entirely.).
4496 bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
4497  Type *AccessTy, unsigned AddrSpace) {
4498  Value *Repl = Addr;
4499 
4500  // Try to collapse single-value PHI nodes. This is necessary to undo
4501  // unprofitable PRE transformations.
4502  SmallVector<Value*, 8> worklist;
4503  SmallPtrSet<Value*, 16> Visited;
4504  worklist.push_back(Addr);
4505 
4506  // Use a worklist to iteratively look through PHI and select nodes, and
4507  // ensure that the addressing mode obtained from the non-PHI/select roots of
4508  // the graph are compatible.
4509  bool PhiOrSelectSeen = false;
4510  SmallVector<Instruction*, 16> AddrModeInsts;
4511  const SimplifyQuery SQ(*DL, TLInfo);
4512  AddressingModeCombiner AddrModes(SQ, Addr);
4513  TypePromotionTransaction TPT(RemovedInsts);
4514  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4515  TPT.getRestorationPoint();
4516  while (!worklist.empty()) {
4517  Value *V = worklist.back();
4518  worklist.pop_back();
4519 
4520  // We allow traversing cyclic Phi nodes.
4521  // In case of success after this loop we ensure that traversing through
4522  // Phi nodes ends up with all cases to compute address of the form
4523  // BaseGV + Base + Scale * Index + Offset
4524  // where Scale and Offset are constans and BaseGV, Base and Index
4525  // are exactly the same Values in all cases.
4526  // It means that BaseGV, Scale and Offset dominate our memory instruction
4527  // and have the same value as they had in address computation represented
4528  // as Phi. So we can safely sink address computation to memory instruction.
4529  if (!Visited.insert(V).second)
4530  continue;
4531 
4532  // For a PHI node, push all of its incoming values.
4533  if (PHINode *P = dyn_cast<PHINode>(V)) {
4534  for (Value *IncValue : P->incoming_values())
4535  worklist.push_back(IncValue);
4536  PhiOrSelectSeen = true;
4537  continue;
4538  }
4539  // Similar for select.
4540  if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
4541  worklist.push_back(SI->getFalseValue());
4542  worklist.push_back(SI->getTrueValue());
4543  PhiOrSelectSeen = true;
4544  continue;
4545  }
4546 
4547  // For non-PHIs, determine the addressing mode being computed. Note that
4548  // the result may differ depending on what other uses our candidate
4549  // addressing instructions might have.
4550  AddrModeInsts.clear();
4551  std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
4552  0);
4553  ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
4554  V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
4555  InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
4556 
4557  GetElementPtrInst *GEP = LargeOffsetGEP.first;
4558  if (GEP && GEP->getParent() != MemoryInst->getParent() &&
4559  !NewGEPBases.count(GEP)) {
4560  // If splitting the underlying data structure can reduce the offset of a
4561  // GEP, collect the GEP. Skip the GEPs that are the new bases of
4562  // previously split data structures.
4563  LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
4564  if (LargeOffsetGEPID.find(GEP) == LargeOffsetGEPID.end())
4565  LargeOffsetGEPID[GEP] = LargeOffsetGEPID.size();
4566  }
4567 
4568  NewAddrMode.OriginalValue = V;
4569  if (!AddrModes.addNewAddrMode(NewAddrMode))
4570  break;
4571  }
4572 
4573  // Try to combine the AddrModes we've collected. If we couldn't collect any,
4574  // or we have multiple but either couldn't combine them or combining them
4575  // wouldn't do anything useful, bail out now.
4576  if (!AddrModes.combineAddrModes()) {
4577  TPT.rollback(LastKnownGood);
4578  return false;
4579  }
4580  TPT.commit();
4581 
4582  // Get the combined AddrMode (or the only AddrMode, if we only had one).
4583  ExtAddrMode AddrMode = AddrModes.getAddrMode();
4584 
4585  // If all the instructions matched are already in this BB, don't do anything.
4586  // If we saw a Phi node then it is not local definitely, and if we saw a select
4587  // then we want to push the address calculation past it even if it's already
4588  // in this BB.
4589  if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
4590  return IsNonLocalValue(V, MemoryInst->getParent());
4591  })) {
4592  LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
4593  << "\n");
4594  return false;
4595  }
4596 
4597  // Insert this computation right after this user. Since our caller is
4598  // scanning from the top of the BB to the bottom, reuse of the expr are
4599  // guaranteed to happen later.
4600  IRBuilder<> Builder(MemoryInst);
4601 
4602  // Now that we determined the addressing expression we want to use and know
4603  // that we have to sink it into this block. Check to see if we have already
4604  // done this for some other load/store instr in this block. If so, reuse
4605  // the computation. Before attempting reuse, check if the address is valid
4606  // as it may have been erased.
4607 
4608  WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
4609 
4610  Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
4611  if (SunkAddr) {
4612  LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
4613  << " for " << *MemoryInst << "\n");
4614  if (SunkAddr->getType() != Addr->getType())
4615  SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
4616  } else if (AddrSinkUsingGEPs ||
4617  (!AddrSinkUsingGEPs.getNumOccurrences() && TM && TTI->useAA())) {
4618  // By default, we use the GEP-based method when AA is used later. This
4619  // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
4620  LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
4621  << " for " << *MemoryInst << "\n");
4622  Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
4623  Value *ResultPtr = nullptr, *ResultIndex = nullptr;
4624 
4625  // First, find the pointer.
4626  if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
4627  ResultPtr = AddrMode.BaseReg;
4628  AddrMode.BaseReg = nullptr;
4629  }
4630 
4631  if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
4632  // We can't add more than one pointer together, nor can we scale a
4633  // pointer (both of which seem meaningless).
4634  if (ResultPtr || AddrMode.Scale != 1)
4635  return false;
4636 
4637  ResultPtr = AddrMode.ScaledReg;
4638  AddrMode.Scale = 0;
4639  }
4640 
4641  // It is only safe to sign extend the BaseReg if we know that the math
4642  // required to create it did not overflow before we extend it. Since
4643  // the original IR value was tossed in favor of a constant back when
4644  // the AddrMode was created we need to bail out gracefully if widths
4645  // do not match instead of extending it.
4646  //
4647  // (See below for code to add the scale.)
4648  if (AddrMode.Scale) {
4649  Type *ScaledRegTy = AddrMode.ScaledReg->getType();
4650  if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
4651  cast<IntegerType>(ScaledRegTy)->getBitWidth())
4652  return false;
4653  }
4654 
4655  if (AddrMode.BaseGV) {
4656  if (ResultPtr)
4657  return false;
4658 
4659  ResultPtr = AddrMode.BaseGV;
4660  }
4661 
4662  // If the real base value actually came from an inttoptr, then the matcher
4663  // will look through it and provide only the integer value. In that case,
4664  // use it here.
4665  if (!DL->isNonIntegralPointerType(Addr->getType())) {
4666  const auto getResultPtr = [MemoryInst, Addr,
4667  &Builder](Value *Reg) -> Value * {
4668  BasicBlock *BB = MemoryInst->getParent();
4669  for (User *U : Reg->users())
4670  if (auto *I2P = dyn_cast<IntToPtrInst>(U))
4671  if (I2P->getType() == Addr->getType() && I2P->getParent() == BB) {
4672  auto *RegInst = dyn_cast<Instruction>(Reg);
4673  if (RegInst && RegInst->getParent() == BB &&
4674  !isa<PHINode>(RegInst) && !RegInst->isEHPad())
4675  I2P->moveAfter(RegInst);
4676  else
4677  I2P->moveBefore(*BB, BB->getFirstInsertionPt());
4678  return I2P;
4679  }
4680  return Builder.CreateIntToPtr(Reg, Addr->getType(), "sunkaddr");
4681  };
4682  if (!ResultPtr && AddrMode.BaseReg) {
4683  ResultPtr = getResultPtr(AddrMode.BaseReg);
4684  AddrMode.BaseReg = nullptr;
4685  } else if (!ResultPtr && AddrMode.Scale == 1) {
4686  ResultPtr = getResultPtr(AddrMode.ScaledReg);
4687  AddrMode.Scale = 0;
4688  }
4689  }
4690 
4691  if (!ResultPtr &&
4692  !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
4693  SunkAddr = Constant::getNullValue(Addr->getType());
4694  } else if (!ResultPtr) {
4695  return false;
4696  } else {
4697  Type *I8PtrTy =
4698  Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
4699  Type *I8Ty = Builder.getInt8Ty();
4700 
4701  // Start with the base register. Do this first so that subsequent address
4702  // matching finds it last, which will prevent it from trying to match it
4703  // as the scaled value in case it happens to be a mul. That would be
4704  // problematic if we've sunk a different mul for the scale, because then
4705  // we'd end up sinking both muls.
4706  if (AddrMode.BaseReg) {
4707  Value *V = AddrMode.BaseReg;
4708  if (V->getType() != IntPtrTy)
4709  V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
4710 
4711  ResultIndex = V;
4712  }
4713 
4714  // Add the scale value.
4715  if (AddrMode.Scale) {
4716  Value *V = AddrMode.ScaledReg;
4717  if (V->getType() == IntPtrTy) {
4718  // done.
4719  } else {
4720  assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
4721  cast<IntegerType>(V->getType())->getBitWidth() &&
4722  "We can't transform if ScaledReg is too narrow");
4723  V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
4724  }
4725 
4726  if (AddrMode.Scale != 1)
4727  V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
4728  "sunkaddr");
4729  if (ResultIndex)
4730  ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
4731  else
4732  ResultIndex = V;
4733  }
4734 
4735  // Add in the Base Offset if present.
4736  if (AddrMode.BaseOffs) {
4737  Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
4738  if (ResultIndex) {
4739  // We need to add this separately from the scale above to help with
4740  // SDAG consecutive load/store merging.
4741  if (ResultPtr->getType() != I8PtrTy)
4742  ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
4743  ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
4744  }
4745 
4746  ResultIndex = V;
4747  }
4748 
4749  if (!ResultIndex) {
4750  SunkAddr = ResultPtr;
4751  } else {
4752  if (ResultPtr->getType() != I8PtrTy)
4753  ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
4754  SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
4755  }
4756 
4757  if (SunkAddr->getType() != Addr->getType())
4758  SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
4759  }
4760  } else {
4761  // We'd require a ptrtoint/inttoptr down the line, which we can't do for
4762  // non-integral pointers, so in that case bail out now.
4763  Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
4764  Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
4765  PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
4766  PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
4767  if (DL->isNonIntegralPointerType(Addr->getType()) ||
4768  (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
4769  (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
4770  (AddrMode.BaseGV &&
4771  DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
4772  return false;
4773 
4774  LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
4775  << " for " << *MemoryInst << "\n");
4776  Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
4777  Value *Result = nullptr;
4778 
4779  // Start with the base register. Do this first so that subsequent address
4780  // matching finds it last, which will prevent it from trying to match it
4781  // as the scaled value in case it happens to be a mul. That would be
4782  // problematic if we've sunk a different mul for the scale, because then
4783  // we'd end up sinking both muls.
4784  if (AddrMode.BaseReg) {
4785  Value *V = AddrMode.BaseReg;
4786  if (V->getType()->isPointerTy())
4787  V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
4788  if (V->getType() != IntPtrTy)
4789  V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
4790  Result = V;
4791  }
4792 
4793  // Add the scale value.
4794  if (AddrMode.Scale) {
4795  Value *V = AddrMode.ScaledReg;
4796  if (V->getType() == IntPtrTy) {
4797  // done.
4798  } else if (V->getType()->isPointerTy()) {
4799  V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
4800  } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
4801  cast<IntegerType>(V->getType())->getBitWidth()) {
4802  V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
4803  } else {
4804  // It is only safe to sign extend the BaseReg if we know that the math
4805  // required to create it did not overflow before we extend it. Since
4806  // the original IR value was tossed in favor of a constant back when
4807  // the AddrMode was created we need to bail out gracefully if widths
4808  // do not match instead of extending it.
4809  Instruction *I = dyn_cast_or_null<Instruction>(Result);
4810  if (I && (Result != AddrMode.BaseReg))
4811  I->eraseFromParent();
4812  return false;
4813  }
4814  if (AddrMode.Scale != 1)
4815  V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
4816  "sunkaddr");
4817  if (Result)
4818  Result = Builder.CreateAdd(Result, V, "sunkaddr");
4819  else
4820  Result = V;
4821  }
4822 
4823  // Add in the BaseGV if present.
4824  if (AddrMode.BaseGV) {
4825  Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
4826  if (Result)
4827  Result = Builder.CreateAdd(Result, V, "sunkaddr");
4828  else
4829  Result = V;
4830  }
4831 
4832  // Add in the Base Offset if present.
4833  if (AddrMode.BaseOffs) {
4834  Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
4835  if (Result)
4836  Result = Builder.CreateAdd(Result, V, "sunkaddr");
4837  else
4838  Result = V;
4839  }
4840 
4841  if (!Result)
4842  SunkAddr = Constant::getNullValue(Addr->getType());
4843  else
4844  SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
4845  }
4846 
4847  MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
4848  // Store the newly computed address into the cache. In the case we reused a
4849  // value, this should be idempotent.
4850  SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
4851 
4852  // If we have no uses, recursively delete the value and all dead instructions
4853  // using it.
4854  if (Repl->use_empty()) {
4855  // This can cause recursive deletion, which can invalidate our iterator.
4856  // Use a WeakTrackingVH to hold onto it in case this happens.
4857  Value *CurValue = &*CurInstIterator;
4858  WeakTrackingVH IterHandle(CurValue);
4859  BasicBlock *BB = CurInstIterator->getParent();
4860 
4862 
4863  if (IterHandle != CurValue) {
4864  // If the iterator instruction was recursively deleted, start over at the
4865  // start of the block.
4866  CurInstIterator = BB->begin();
4867  SunkAddrs.clear();
4868  }
4869  }
4870  ++NumMemoryInsts;
4871  return true;
4872 }
4873 
4874 /// If there are any memory operands, use OptimizeMemoryInst to sink their
4875 /// address computing into the block when possible / profitable.
4876 bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
4877  bool MadeChange = false;
4878 
4879  const TargetRegisterInfo *TRI =
4880  TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
4881  TargetLowering::AsmOperandInfoVector TargetConstraints =
4882  TLI->ParseConstraints(*DL, TRI, CS);
4883  unsigned ArgNo = 0;
4884  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
4885  TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
4886 
4887  // Compute the constraint code and ConstraintType to use.
4888  TLI->ComputeConstraintToUse(OpInfo, SDValue());
4889 
4890  if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
4891  OpInfo.isIndirect) {
4892  Value *OpVal = CS->getArgOperand(ArgNo++);
4893  MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
4894  } else if (OpInfo.Type == InlineAsm::isInput)
4895  ArgNo++;
4896  }
4897 
4898  return MadeChange;
4899 }
4900 
4901 /// Check if all the uses of \p Val are equivalent (or free) zero or
4902 /// sign extensions.
4903 static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
4904  assert(!Val->use_empty() && "Input must have at least one use");
4905  const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
4906  bool IsSExt = isa<SExtInst>(FirstUser);
4907  Type *ExtTy = FirstUser->getType();
4908  for (const User *U : Val->users()) {
4909  const Instruction *UI = cast<Instruction>(U);
4910  if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
4911  return false;
4912  Type *CurTy = UI->getType();
4913  // Same input and output types: Same instruction after CSE.
4914  if (CurTy == ExtTy)
4915  continue;
4916 
4917  // If IsSExt is true, we are in this situation:
4918  // a = Val
4919  // b = sext ty1 a to ty2
4920  // c = sext ty1 a to ty3
4921  // Assuming ty2 is shorter than ty3, this could be turned into:
4922  // a = Val
4923  // b = sext ty1 a to ty2
4924  // c = sext ty2 b to ty3
4925  // However, the last sext is not free.
4926  if (IsSExt)
4927  return false;
4928 
4929  // This is a ZExt, maybe this is free to extend from one type to another.
4930  // In that case, we would not account for a different use.
4931  Type *NarrowTy;
4932  Type *LargeTy;
4933  if (ExtTy->getScalarType()->getIntegerBitWidth() >
4934  CurTy->getScalarType()->getIntegerBitWidth()) {
4935  NarrowTy = CurTy;
4936  LargeTy = ExtTy;
4937  } else {
4938  NarrowTy = ExtTy;
4939  LargeTy = CurTy;
4940  }
4941 
4942  if (!TLI.isZExtFree(NarrowTy, LargeTy))
4943  return false;
4944  }
4945  // All uses are the same or can be derived from one another for free.
4946  return true;
4947 }
4948 
4949 /// Try to speculatively promote extensions in \p Exts and continue
4950 /// promoting through newly promoted operands recursively as far as doing so is
4951 /// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
4952 /// When some promotion happened, \p TPT contains the proper state to revert
4953 /// them.
4954 ///
4955 /// \return true if some promotion happened, false otherwise.
4956 bool CodeGenPrepare::tryToPromoteExts(
4957  TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
4958  SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
4959  unsigned CreatedInstsCost) {
4960  bool Promoted = false;
4961 
4962  // Iterate over all the extensions to try to promote them.
4963  for (auto I : Exts) {
4964  // Early check if we directly have ext(load).
4965  if (isa<LoadInst>(I->getOperand(0))) {
4966  ProfitablyMovedExts.push_back(I);
4967  continue;
4968  }
4969 
4970  // Check whether or not we want to do any promotion. The reason we have
4971  // this check inside the for loop is to catch the case where an extension
4972  // is directly fed by a load because in such case the extension can be moved
4973  // up without any promotion on its operands.
4974  if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
4975  return false;
4976 
4977  // Get the action to perform the promotion.
4978  TypePromotionHelper::Action TPH =
4979  TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
4980  // Check if we can promote.
4981  if (!TPH) {
4982  // Save the current extension as we cannot move up through its operand.
4983  ProfitablyMovedExts.push_back(I);
4984  continue;
4985  }
4986 
4987  // Save the current state.
4988  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4989  TPT.getRestorationPoint();
4991  unsigned NewCreatedInstsCost = 0;
4992  unsigned ExtCost = !TLI->isExtFree(I);
4993  // Promote.
4994  Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
4995  &NewExts, nullptr, *TLI);
4996  assert(PromotedVal &&
4997  "TypePromotionHelper should have filtered out those cases");
4998 
4999  // We would be able to merge only one extension in a load.
5000  // Therefore, if we have more than 1 new extension we heuristically
5001  // cut this search path, because it means we degrade the code quality.
5002  // With exactly 2, the transformation is neutral, because we will merge
5003  // one extension but leave one. However, we optimistically keep going,
5004  // because the new extension may be removed too.
5005  long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
5006  // FIXME: It would be possible to propagate a negative value instead of
5007  // conservatively ceiling it to 0.
5008  TotalCreatedInstsCost =
5009  std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
5010  if (!StressExtLdPromotion &&
5011  (TotalCreatedInstsCost > 1 ||
5012  !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
5013  // This promotion is not profitable, rollback to the previous state, and
5014  // save the current extension in ProfitablyMovedExts as the latest
5015  // speculative promotion turned out to be unprofitable.
5016  TPT.rollback(LastKnownGood);
5017  ProfitablyMovedExts.push_back(I);
5018  continue;
5019  }
5020  // Continue promoting NewExts as far as doing so is profitable.
5021  SmallVector<Instruction *, 2> NewlyMovedExts;
5022  (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
5023  bool NewPromoted = false;
5024  for (auto ExtInst : NewlyMovedExts) {
5025  Instruction *MovedExt = cast<Instruction>(ExtInst);
5026  Value *ExtOperand = MovedExt->getOperand(0);
5027  // If we have reached to a load, we need this extra profitability check
5028  // as it could potentially be merged into an ext(load).
5029  if (isa<LoadInst>(ExtOperand) &&
5030  !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
5031  (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
5032  continue;
5033 
5034  ProfitablyMovedExts.push_back(MovedExt);
5035  NewPromoted = true;
5036  }
5037 
5038  // If none of speculative promotions for NewExts is profitable, rollback
5039  // and save the current extension (I) as the last profitable extension.
5040  if (!NewPromoted) {
5041  TPT.rollback(LastKnownGood);
5042  ProfitablyMovedExts.push_back(I);
5043  continue;
5044  }
5045  // The promotion is profitable.
5046  Promoted = true;
5047  }
5048  return Promoted;
5049 }
5050 
5051 /// Merging redundant sexts when one is dominating the other.
5052 bool CodeGenPrepare::mergeSExts(Function &F) {
5053  DominatorTree DT(F);
5054  bool Changed = false;
5055  for (auto &Entry : ValToSExtendedUses) {
5056  SExts &Insts = Entry.second;
5057  SExts CurPts;
5058  for (Instruction *Inst : Insts) {
5059  if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
5060  Inst->getOperand(0) != Entry.first)
5061  continue;
5062  bool inserted = false;
5063  for (auto &Pt : CurPts) {
5064  if (DT.dominates(Inst, Pt)) {
5065  Pt->replaceAllUsesWith(Inst);
5066  RemovedInsts.insert(Pt);
5067  Pt->removeFromParent();
5068  Pt = Inst;
5069  inserted = true;
5070  Changed = true;
5071  break;
5072  }
5073  if (!DT.dominates(Pt, Inst))
5074  // Give up if we need to merge in a common dominator as the
5075  // experiments show it is not profitable.
5076  continue;
5077  Inst->replaceAllUsesWith(Pt);
5078  RemovedInsts.insert(Inst);
5079  Inst->removeFromParent();
5080  inserted = true;
5081  Changed = true;
5082  break;
5083  }
5084  if (!inserted)
5085  CurPts.push_back(Inst);
5086  }
5087  }
5088  return Changed;
5089 }
5090 
5091 // Spliting large data structures so that the GEPs accessing them can have
5092 // smaller offsets so that they can be sunk to the same blocks as their users.
5093 // For example, a large struct starting from %base is splitted into two parts
5094 // where the second part starts from %new_base.
5095 //
5096 // Before:
5097 // BB0:
5098 // %base =
5099 //
5100 // BB1:
5101 // %gep0 = gep %base, off0
5102 // %gep1 = gep %base, off1
5103 // %gep2 = gep %base, off2
5104 //
5105 // BB2:
5106 // %load1 = load %gep0
5107 // %load2 = load %gep1
5108 // %load3 = load %gep2
5109 //
5110 // After:
5111 // BB0:
5112 // %base =
5113 // %new_base = gep %base, off0
5114 //
5115 // BB1:
5116 // %new_gep0 = %new_base
5117 // %new_gep1 = gep %new_base, off1 - off0
5118 // %new_gep2 = gep %new_base, off2 - off0
5119 //
5120 // BB2:
5121 // %load1 = load i32, i32* %new_gep0
5122 // %load2 = load i32, i32* %new_gep1
5123 // %load3 = load i32, i32* %new_gep2
5124 //
5125 // %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
5126 // their offsets are smaller enough to fit into the addressing mode.
5127 bool CodeGenPrepare::splitLargeGEPOffsets() {
5128  bool Changed = false;
5129  for (auto &Entry : LargeOffsetGEPMap) {
5130  Value *OldBase = Entry.first;
5132  &LargeOffsetGEPs = Entry.second;
5133  auto compareGEPOffset =
5134  [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
5135  const std::pair<GetElementPtrInst *, int64_t> &RHS) {
5136  if (LHS.first == RHS.first)
5137  return false;
5138  if (LHS.second != RHS.second)
5139  return LHS.second < RHS.second;
5140  return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
5141  };
5142  // Sorting all the GEPs of the same data structures based on the offsets.
5143  llvm::sort(LargeOffsetGEPs, compareGEPOffset);
5144  LargeOffsetGEPs.erase(
5145  std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()),
5146  LargeOffsetGEPs.end());
5147  // Skip if all the GEPs have the same offsets.
5148  if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
5149  continue;
5150  GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
5151  int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
5152  Value *NewBaseGEP = nullptr;
5153 
5154  auto LargeOffsetGEP = LargeOffsetGEPs.begin();
5155  while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
5156  GetElementPtrInst *GEP = LargeOffsetGEP->first;
5157  int64_t Offset = LargeOffsetGEP->second;
5158  if (Offset != BaseOffset) {
5160  AddrMode.BaseOffs = Offset - BaseOffset;
5161  // The result type of the GEP might not be the type of the memory
5162  // access.
5163  if (!TLI->isLegalAddressingMode(*DL, AddrMode,
5164  GEP->getResultElementType(),
5165  GEP->getAddressSpace())) {
5166  // We need to create a new base if the offset to the current base is
5167  // too large to fit into the addressing mode. So, a very large struct
5168  // may be splitted into several parts.
5169  BaseGEP = GEP;
5170  BaseOffset = Offset;
5171  NewBaseGEP = nullptr;
5172  }
5173  }
5174 
5175  // Generate a new GEP to replace the current one.
5176  LLVMContext &Ctx = GEP->getContext();
5177  Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
5178  Type *I8PtrTy =
5180  Type *I8Ty = Type::getInt8Ty(Ctx);
5181 
5182  if (!NewBaseGEP) {
5183  // Create a new base if we don't have one yet. Find the insertion
5184  // pointer for the new base first.
5185  BasicBlock::iterator NewBaseInsertPt;
5186  BasicBlock *NewBaseInsertBB;
5187  if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
5188  // If the base of the struct is an instruction, the new base will be
5189  // inserted close to it.
5190  NewBaseInsertBB = BaseI->getParent();
5191  if (isa<PHINode>(BaseI))
5192  NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5193  else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
5194  NewBaseInsertBB =
5195  SplitEdge(NewBaseInsertBB, Invoke->getNormalDest());
5196  NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5197  } else
5198  NewBaseInsertPt = std::next(BaseI->getIterator());
5199  } else {
5200  // If the current base is an argument or global value, the new base
5201  // will be inserted to the entry block.
5202  NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
5203  NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5204  }
5205  IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
5206  // Create a new base.
5207  Value *BaseIndex = ConstantInt::get(IntPtrTy, BaseOffset);
5208  NewBaseGEP = OldBase;
5209  if (NewBaseGEP->getType() != I8PtrTy)
5210  NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
5211  NewBaseGEP =
5212  NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
5213  NewGEPBases.insert(NewBaseGEP);
5214  }
5215 
5216  IRBuilder<> Builder(GEP);
5217  Value *NewGEP = NewBaseGEP;
5218  if (Offset == BaseOffset) {
5219  if (GEP->getType() != I8PtrTy)
5220  NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
5221  } else {
5222  // Calculate the new offset for the new GEP.
5223  Value *Index = ConstantInt::get(IntPtrTy, Offset - BaseOffset);
5224  NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index);
5225 
5226  if (GEP->getType() != I8PtrTy)
5227  NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
5228  }
5229  GEP->replaceAllUsesWith(NewGEP);
5230  LargeOffsetGEPID.erase(GEP);
5231  LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
5232  GEP->eraseFromParent();
5233  Changed = true;
5234  }
5235  }
5236  return Changed;
5237 }
5238 
5239 /// Return true, if an ext(load) can be formed from an extension in
5240 /// \p MovedExts.
5241 bool CodeGenPrepare::canFormExtLd(
5242  const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
5243  Instruction *&Inst, bool HasPromoted) {
5244  for (auto *MovedExtInst : MovedExts) {
5245  if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
5246  LI = cast<LoadInst>(MovedExtInst->getOperand(0));
5247  Inst = MovedExtInst;
5248  break;
5249  }
5250  }
5251  if (!LI)
5252  return false;
5253 
5254  // If they're already in the same block, there's nothing to do.
5255  // Make the cheap checks first if we did not promote.
5256  // If we promoted, we need to check if it is indeed profitable.
5257  if (!HasPromoted && LI->getParent() == Inst->getParent())
5258  return false;
5259 
5260  return TLI->isExtLoad(LI, Inst, *DL);
5261 }
5262 
5263 /// Move a zext or sext fed by a load into the same basic block as the load,
5264 /// unless conditions are unfavorable. This allows SelectionDAG to fold the
5265 /// extend into the load.
5266 ///
5267 /// E.g.,
5268 /// \code
5269 /// %ld = load i32* %addr
5270 /// %add = add nuw i32 %ld, 4
5271 /// %zext = zext i32 %add to i64
5272 // \endcode
5273 /// =>
5274 /// \code
5275 /// %ld = load i32* %addr
5276 /// %zext = zext i32 %ld to i64
5277 /// %add = add nuw i64 %zext, 4
5278 /// \encode
5279 /// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
5280 /// allow us to match zext(load i32*) to i64.
5281 ///
5282 /// Also, try to promote the computations used to obtain a sign extended
5283 /// value used into memory accesses.
5284 /// E.g.,
5285 /// \code
5286 /// a = add nsw i32 b, 3
5287 /// d = sext i32 a to i64
5288 /// e = getelementptr ..., i64 d
5289 /// \endcode
5290 /// =>
5291 /// \code
5292 /// f = sext i32 b to i64
5293 /// a = add nsw i64 f, 3
5294 /// e = getelementptr ..., i64 a
5295 /// \endcode
5296 ///
5297 /// \p Inst[in/out] the extension may be modified during the process if some
5298 /// promotions apply.
5299 bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
5300  // ExtLoad formation and address type promotion infrastructure requires TLI to
5301  // be effective.
5302  if (!TLI)
5303  return false;
5304 
5305  bool AllowPromotionWithoutCommonHeader = false;
5306  /// See if it is an interesting sext operations for the address type
5307  /// promotion before trying to promote it, e.g., the ones with the right
5308  /// type and used in memory accesses.
5309  bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
5310  *Inst, AllowPromotionWithoutCommonHeader);
5311  TypePromotionTransaction TPT(RemovedInsts);
5312  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5313  TPT.getRestorationPoint();
5315  SmallVector<Instruction *, 2> SpeculativelyMovedExts;
5316  Exts.push_back(Inst);
5317 
5318  bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
5319 
5320  // Look for a load being extended.
5321  LoadInst *LI = nullptr;
5322  Instruction *ExtFedByLoad;
5323 
5324  // Try to promote a chain of computation if it allows to form an extended
5325  // load.
5326  if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
5327  assert(LI && ExtFedByLoad && "Expect a valid load and extension");
5328  TPT.commit();
5329  // Move the extend into the same block as the load
5330  ExtFedByLoad->moveAfter(LI);
5331  // CGP does not check if the zext would be speculatively executed when moved
5332  // to the same basic block as the load. Preserving its original location
5333  // would pessimize the debugging experience, as well as negatively impact
5334  // the quality of sample pgo. We don't want to use "line 0" as that has a
5335  // size cost in the line-table section and logically the zext can be seen as
5336  // part of the load. Therefore we conservatively reuse the same debug
5337  // location for the load and the zext.
5338  ExtFedByLoad->setDebugLoc(LI->getDebugLoc());
5339  ++NumExtsMoved;
5340  Inst = ExtFedByLoad;
5341  return true;
5342  }
5343 
5344  // Continue promoting SExts if known as considerable depending on targets.
5345  if (ATPConsiderable &&
5346  performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
5347  HasPromoted, TPT, SpeculativelyMovedExts))
5348  return true;
5349 
5350  TPT.rollback(LastKnownGood);
5351  return false;
5352 }
5353 
5354 // Perform address type promotion if doing so is profitable.
5355 // If AllowPromotionWithoutCommonHeader == false, we should find other sext
5356 // instructions that sign extended the same initial value. However, if
5357 // AllowPromotionWithoutCommonHeader == true, we expect promoting the
5358 // extension is just profitable.
5359 bool CodeGenPrepare::performAddressTypePromotion(
5360  Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
5361  bool HasPromoted, TypePromotionTransaction &TPT,
5362  SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
5363  bool Promoted = false;
5364  SmallPtrSet<Instruction *, 1> UnhandledExts;
5365  bool AllSeenFirst = true;
5366  for (auto I : SpeculativelyMovedExts) {
5367  Value *HeadOfChain = I->getOperand(0);
5369  SeenChainsForSExt.find(HeadOfChain);
5370  // If there is an unhandled SExt which has the same header, try to promote
5371  // it as well.
5372  if (AlreadySeen != SeenChainsForSExt.end()) {
5373  if (AlreadySeen->second != nullptr)
5374  UnhandledExts.insert(AlreadySeen->second);
5375  AllSeenFirst = false;
5376  }
5377  }
5378 
5379  if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
5380  SpeculativelyMovedExts.size() == 1)) {
5381  TPT.commit();
5382  if (HasPromoted)
5383  Promoted = true;
5384  for (auto I : SpeculativelyMovedExts) {
5385  Value *HeadOfChain = I->getOperand(0);
5386  SeenChainsForSExt[HeadOfChain] = nullptr;
5387  ValToSExtendedUses[HeadOfChain].push_back(I);
5388  }
5389  // Update Inst as promotion happen.
5390  Inst = SpeculativelyMovedExts.pop_back_val();
5391  } else {
5392  // This is the first chain visited from the header, keep the current chain
5393  // as unhandled. Defer to promote this until we encounter another SExt
5394  // chain derived from the same header.
5395  for (auto I : SpeculativelyMovedExts) {
5396  Value *HeadOfChain = I->getOperand(0);
5397  SeenChainsForSExt[HeadOfChain] = Inst;
5398  }
5399  return false;
5400  }
5401 
5402  if (!AllSeenFirst && !UnhandledExts.empty())
5403  for (auto VisitedSExt : UnhandledExts) {
5404  if (RemovedInsts.count(VisitedSExt))
5405  continue;
5406  TypePromotionTransaction TPT(RemovedInsts);
5409  Exts.push_back(VisitedSExt);
5410  bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
5411  TPT.commit();
5412  if (HasPromoted)
5413  Promoted = true;
5414  for (auto I : Chains) {
5415  Value *HeadOfChain = I->getOperand(0);
5416  // Mark this as handled.
5417  SeenChainsForSExt[HeadOfChain] = nullptr;
5418  ValToSExtendedUses[HeadOfChain].push_back(I);
5419  }
5420  }
5421  return Promoted;
5422 }
5423 
5424 bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
5425  BasicBlock *DefBB = I->getParent();
5426 
5427  // If the result of a {s|z}ext and its source are both live out, rewrite all
5428  // other uses of the source with result of extension.
5429  Value *Src = I->getOperand(0);
5430  if (Src->hasOneUse())
5431  return false;
5432 
5433  // Only do this xform if truncating is free.
5434  if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
5435  return false;
5436 
5437  // Only safe to perform the optimization if the source is also defined in
5438  // this block.
5439  if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
5440  return false;
5441 
5442  bool DefIsLiveOut = false;
5443  for (User *U : I->users()) {
5444  Instruction *UI = cast<Instruction>(U);
5445 
5446  // Figure out which BB this ext is used in.
5447  BasicBlock *UserBB = UI->getParent();
5448  if (UserBB == DefBB) continue;
5449  DefIsLiveOut = true;
5450  break;
5451  }
5452  if (!DefIsLiveOut)
5453  return false;
5454 
5455  // Make sure none of the uses are PHI nodes.
5456  for (User *U : Src->users()) {
5457  Instruction *UI = cast<Instruction>(U);
5458  BasicBlock *UserBB = UI->getParent();
5459  if (UserBB == DefBB) continue;
5460  // Be conservative. We don't want this xform to end up introducing
5461  // reloads just before load / store instructions.
5462  if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
5463  return false;
5464  }
5465 
5466  // InsertedTruncs - Only insert one trunc in each block once.
5467  DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
5468 
5469  bool MadeChange = false;
5470  for (Use &U : Src->uses()) {
5471  Instruction *User = cast<Instruction>(U.getUser());
5472 
5473  // Figure out which BB this ext is used in.
5474  BasicBlock *UserBB = User->getParent();
5475  if (UserBB == DefBB) continue;
5476 
5477  // Both src and def are live in this block. Rewrite the use.
5478  Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
5479 
5480  if (!InsertedTrunc) {
5481  BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
5482  assert(InsertPt != UserBB->end());
5483  InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt);
5484  InsertedInsts.insert(InsertedTrunc);
5485  }
5486 
5487  // Replace a use of the {s|z}ext source with a use of the result.
5488  U = InsertedTrunc;
5489  ++NumExtUses;
5490  MadeChange = true;
5491  }
5492 
5493  return MadeChange;
5494 }
5495 
5496 // Find loads whose uses only use some of the loaded value's bits. Add an "and"
5497 // just after the load if the target can fold this into one extload instruction,
5498 // with the hope of eliminating some of the other later "and" instructions using
5499 // the loaded value. "and"s that are made trivially redundant by the insertion
5500 // of the new "and" are removed by this function, while others (e.g. those whose
5501 // path from the load goes through a phi) are left for isel to potentially
5502 // remove.
5503 //
5504 // For example:
5505 //
5506 // b0:
5507 // x = load i32
5508 // ...
5509 // b1:
5510 // y = and x, 0xff
5511 // z = use y
5512 //
5513 // becomes:
5514 //
5515 // b0:
5516 // x = load i32
5517 // x' = and x, 0xff
5518 // ...
5519 // b1:
5520 // z = use x'
5521 //
5522 // whereas:
5523 //
5524 // b0:
5525 // x1 = load i32
5526 // ...
5527 // b1:
5528 // x2 = load i32
5529 // ...
5530 // b2:
5531 // x = phi x1, x2
5532 // y = and x, 0xff
5533 //
5534 // becomes (after a call to optimizeLoadExt for each load):
5535 //
5536 // b0:
5537 // x1 = load i32
5538 // x1' = and x1, 0xff
5539 // ...
5540 // b1:
5541 // x2 = load i32
5542 // x2' = and x2, 0xff
5543 // ...
5544 // b2:
5545 // x = phi x1', x2'
5546 // y = and x, 0xff
5547 bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
5548  if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
5549  return false;
5550 
5551  // Skip loads we've already transformed.
5552  if (Load->hasOneUse() &&
5553  InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
5554  return false;
5555 
5556  // Look at all uses of Load, looking through phis, to determine how many bits
5557  // of the loaded value are needed.
5560  SmallVector<Instruction *, 8> AndsToMaybeRemove;
5561  for (auto *U : Load->users())
5562  WorkList.push_back(cast<Instruction>(U));
5563 
5564  EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
5565  unsigned BitWidth = LoadResultVT.getSizeInBits();
5566  APInt DemandBits(BitWidth, 0);
5567  APInt WidestAndBits(BitWidth, 0);
5568 
5569  while (!WorkList.empty()) {
5570  Instruction *I = WorkList.back();
5571  WorkList.pop_back();
5572 
5573  // Break use-def graph loops.
5574  if (!Visited.insert(I).second)
5575  continue;
5576 
5577  // For a PHI node, push all of its users.
5578  if (auto *Phi = dyn_cast<PHINode>(I)) {
5579  for (auto *U : Phi->users())
5580  WorkList.push_back(cast<Instruction>(U));
5581  continue;
5582  }
5583 
5584  switch (I->getOpcode()) {
5585  case Instruction::And: {
5586  auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
5587  if (!AndC)
5588  return false;
5589  APInt AndBits = AndC->getValue();
5590  DemandBits |= AndBits;
5591  // Keep track of the widest and mask we see.
5592  if (AndBits.ugt(WidestAndBits))
5593  WidestAndBits = AndBits;
5594  if (AndBits == WidestAndBits && I->getOperand(0) == Load)
5595  AndsToMaybeRemove.push_back(I);
5596  break;
5597  }
5598 
5599  case Instruction::Shl: {
5600  auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
5601  if (!ShlC)
5602  return false;
5603  uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
5604  DemandBits.setLowBits(BitWidth - ShiftAmt);
5605  break;
5606  }
5607 
5608  case Instruction::Trunc: {
5609  EVT TruncVT = TLI->getValueType(*DL, I->getType());
5610  unsigned TruncBitWidth = TruncVT.getSizeInBits();
5611  DemandBits.setLowBits(TruncBitWidth);
5612  break;
5613  }
5614 
5615  default:
5616  return false;
5617  }
5618  }
5619 
5620  uint32_t ActiveBits = DemandBits.getActiveBits();
5621  // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
5622  // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
5623  // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
5624  // (and (load x) 1) is not matched as a single instruction, rather as a LDR
5625  // followed by an AND.
5626  // TODO: Look into removing this restriction by fixing backends to either
5627  // return false for isLoadExtLegal for i1 or have them select this pattern to
5628  // a single instruction.
5629  //
5630  // Also avoid hoisting if we didn't see any ands with the exact DemandBits
5631  // mask, since these are the only ands that will be removed by isel.
5632  if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
5633  WidestAndBits != DemandBits)
5634  return false;
5635 
5636  LLVMContext &Ctx = Load->getType()->getContext();
5637  Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
5638  EVT TruncVT = TLI->getValueType(*DL, TruncTy);
5639 
5640  // Reject cases that won't be matched as extloads.
5641  if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
5642  !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
5643  return false;
5644 
5645  IRBuilder<> Builder(Load->getNextNode());
5646  auto *NewAnd = dyn_cast<Instruction>(
5647  Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
5648  // Mark this instruction as "inserted by CGP", so that other
5649  // optimizations don't touch it.
5650  InsertedInsts.insert(NewAnd);
5651 
5652  // Replace all uses of load with new and (except for the use of load in the
5653  // new and itself).
5654  Load->replaceAllUsesWith(NewAnd);
5655  NewAnd->setOperand(0, Load);
5656 
5657  // Remove any and instructions that are now redundant.
5658  for (auto *And : AndsToMaybeRemove)
5659  // Check that the and mask is the same as the one we decided to put on the
5660  // new and.
5661  if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
5662  And->replaceAllUsesWith(NewAnd);
5663  if (&*CurInstIterator == And)
5664  CurInstIterator = std::next(And->getIterator());
5665  And->eraseFromParent();
5666  ++NumAndUses;
5667  }
5668 
5669  ++NumAndsAdded;
5670  return true;
5671 }
5672 
5673 /// Check if V (an operand of a select instruction) is an expensive instruction
5674 /// that is only used once.
5675 static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {
5676  auto *I = dyn_cast<Instruction>(V);
5677  // If it's safe to speculatively execute, then it should not have side
5678  // effects; therefore, it's safe to sink and possibly *not* execute.
5679  return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
5681 }
5682 
5683 /// Returns true if a SelectInst should be turned into an explicit branch.
5685  const TargetLowering *TLI,
5686  SelectInst *SI) {
5687  // If even a predictable select is cheap, then a branch can't be cheaper.
5688  if (!TLI->isPredictableSelectExpensive())
5689  return false;
5690 
5691  // FIXME: This should use the same heuristics as IfConversion to determine
5692  // whether a select is better represented as a branch.
5693 
5694  // If metadata tells us that the select condition is obviously predictable,
5695  // then we want to replace the select with a branch.
5696  uint64_t TrueWeight, FalseWeight;
5697  if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
5698  uint64_t Max = std::max(TrueWeight, FalseWeight);
5699  uint64_t Sum = TrueWeight + FalseWeight;
5700  if (Sum != 0) {
5701  auto Probability = BranchProbability::getBranchProbability(Max, Sum);
5702  if (Probability > TLI->getPredictableBranchThreshold())
5703  return true;
5704  }
5705  }
5706 
5707  CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
5708 
5709  // If a branch is predictable, an out-of-order CPU can avoid blocking on its
5710  // comparison condition. If the compare has more than one use, there's
5711  // probably another cmov or setcc around, so it's not worth emitting a branch.
5712  if (!Cmp || !Cmp->hasOneUse())
5713  return false;
5714 
5715  // If either operand of the select is expensive and only needed on one side
5716  // of the select, we should form a branch.
5717  if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
5718  sinkSelectOperand(TTI, SI->getFalseValue()))
5719  return true;
5720 
5721  return false;
5722 }
5723 
5724 /// If \p isTrue is true, return the true value of \p SI, otherwise return
5725 /// false value of \p SI. If the true/false value of \p SI is defined by any
5726 /// select instructions in \p Selects, look through the defining select
5727 /// instruction until the true/false value is not defined in \p Selects.
5729  SelectInst *SI, bool isTrue,
5730  const SmallPtrSet<const Instruction *, 2> &Selects) {
5731  Value *V;
5732 
5733  for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
5734  DefSI = dyn_cast<SelectInst>(V)) {
5735  assert(DefSI->getCondition() == SI->getCondition() &&
5736  "The condition of DefSI does not match with SI");
5737  V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
5738  }
5739  return V;
5740 }
5741 
5742 /// If we have a SelectInst that will likely profit from branch prediction,
5743 /// turn it into a branch.
5744 bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
5745  // If branch conversion isn't desirable, exit early.
5746  if (DisableSelectToBranch || OptSize || !TLI)
5747  return false;
5748 
5749  // Find all consecutive select instructions that share the same condition.
5751  ASI.push_back(SI);
5753  It != SI->getParent()->end(); ++It) {
5754  SelectInst *I = dyn_cast<SelectInst>(&*It);
5755  if (I && SI->getCondition() == I->getCondition()) {
5756  ASI.push_back(I);
5757  } else {
5758  break;
5759  }
5760  }
5761 
5762  SelectInst *LastSI = ASI.back();
5763  // Increment the current iterator to skip all the rest of select instructions
5764  // because they will be either "not lowered" or "all lowered" to branch.
5765  CurInstIterator = std::next(LastSI->getIterator());
5766 
5767  bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
5768 
5769  // Can we convert the 'select' to CF ?
5770  if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
5771  return false;
5772 
5774  if (VectorCond)
5775  SelectKind = TargetLowering::VectorMaskSelect;
5776  else if (SI->getType()->isVectorTy())
5778  else
5779  SelectKind = TargetLowering::ScalarValSelect;
5780 
5781  if (TLI->isSelectSupported(SelectKind) &&
5782  !isFormingBranchFromSelectProfitable(TTI, TLI, SI))
5783  return false;
5784 
5785  ModifiedDT = true;
5786 
5787  // Transform a sequence like this:
5788  // start:
5789  // %cmp = cmp uge i32 %a, %b
5790  // %sel = select i1 %cmp, i32 %c, i32 %d
5791  //
5792  // Into:
5793  // start:
5794  // %cmp = cmp uge i32 %a, %b
5795  // br i1 %cmp, label %select.true, label %select.false
5796  // select.true:
5797  // br label %select.end
5798  // select.false:
5799  // br label %select.end
5800  // select.end:
5801  // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
5802  //
5803  // In addition, we may sink instructions that produce %c or %d from
5804  // the entry block into the destination(s) of the new branch.
5805  // If the true or false blocks do not contain a sunken instruction, that
5806  // block and its branch may be optimized away. In that case, one side of the
5807  // first branch will point directly to select.end, and the corresponding PHI
5808  // predecessor block will be the start block.
5809 
5810  // First, we split the block containing the select into 2 blocks.
5811  BasicBlock *StartBlock = SI->getParent();
5812  BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
5813  BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
5814 
5815  // Delete the unconditional branch that was just created by the split.
5816  StartBlock->getTerminator()->eraseFromParent();
5817 
5818  // These are the new basic blocks for the conditional branch.
5819  // At least one will become an actual new basic block.
5820  BasicBlock *TrueBlock = nullptr;
5821  BasicBlock *FalseBlock = nullptr;
5822  BranchInst *TrueBranch = nullptr;
5823  BranchInst *FalseBranch = nullptr;
5824 
5825  // Sink expensive instructions into the conditional blocks to avoid executing
5826  // them speculatively.
5827  for (SelectInst *SI : ASI) {
5828  if (sinkSelectOperand(TTI, SI->getTrueValue())) {