LLVM  15.0.0git
SelectOptimize.cpp
Go to the documentation of this file.
1 //===--- SelectOptimize.cpp - Convert select to branches if profitable ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass converts selects to conditional jumps when profitable.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ADT/Optional.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/CodeGen/Passes.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/InitializePasses.h"
33 #include "llvm/Pass.h"
37 #include <algorithm>
38 #include <memory>
39 #include <queue>
40 #include <stack>
41 #include <string>
42 
43 using namespace llvm;
44 
45 #define DEBUG_TYPE "select-optimize"
46 
47 STATISTIC(NumSelectOptAnalyzed,
48  "Number of select groups considered for conversion to branch");
49 STATISTIC(NumSelectConvertedExpColdOperand,
50  "Number of select groups converted due to expensive cold operand");
51 STATISTIC(NumSelectConvertedHighPred,
52  "Number of select groups converted due to high-predictability");
53 STATISTIC(NumSelectUnPred,
54  "Number of select groups not converted due to unpredictability");
55 STATISTIC(NumSelectColdBB,
56  "Number of select groups not converted due to cold basic block");
57 STATISTIC(NumSelectConvertedLoop,
58  "Number of select groups converted due to loop-level analysis");
59 STATISTIC(NumSelectsConverted, "Number of selects converted");
60 
62  "cold-operand-threshold",
63  cl::desc("Maximum frequency of path for an operand to be considered cold."),
64  cl::init(20), cl::Hidden);
65 
67  "cold-operand-max-cost-multiplier",
68  cl::desc("Maximum cost multiplier of TCC_expensive for the dependence "
69  "slice of a cold operand to be considered inexpensive."),
70  cl::init(1), cl::Hidden);
71 
72 static cl::opt<unsigned>
73  GainGradientThreshold("select-opti-loop-gradient-gain-threshold",
74  cl::desc("Gradient gain threshold (%)."),
75  cl::init(25), cl::Hidden);
76 
77 static cl::opt<unsigned>
78  GainCycleThreshold("select-opti-loop-cycle-gain-threshold",
79  cl::desc("Minimum gain per loop (in cycles) threshold."),
80  cl::init(4), cl::Hidden);
81 
83  "select-opti-loop-relative-gain-threshold",
84  cl::desc(
85  "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
86  cl::init(8), cl::Hidden);
87 
89  "mispredict-default-rate", cl::Hidden, cl::init(25),
90  cl::desc("Default mispredict rate (initialized to 25%)."));
91 
92 static cl::opt<bool>
93  DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden,
94  cl::init(false),
95  cl::desc("Disable loop-level heuristics."));
96 
97 namespace {
98 
99 class SelectOptimize : public FunctionPass {
100  const TargetMachine *TM = nullptr;
101  const TargetSubtargetInfo *TSI;
102  const TargetLowering *TLI = nullptr;
103  const TargetTransformInfo *TTI = nullptr;
104  const LoopInfo *LI;
105  DominatorTree *DT;
106  std::unique_ptr<BlockFrequencyInfo> BFI;
107  std::unique_ptr<BranchProbabilityInfo> BPI;
108  ProfileSummaryInfo *PSI;
110  TargetSchedModel TSchedModel;
111 
112 public:
113  static char ID;
114 
115  SelectOptimize() : FunctionPass(ID) {
117  }
118 
119  bool runOnFunction(Function &F) override;
120 
121  void getAnalysisUsage(AnalysisUsage &AU) const override {
128  }
129 
130 private:
131  // Select groups consist of consecutive select instructions with the same
132  // condition.
133  using SelectGroup = SmallVector<SelectInst *, 2>;
134  using SelectGroups = SmallVector<SelectGroup, 2>;
135 
137 
138  struct CostInfo {
139  /// Predicated cost (with selects as conditional moves).
140  Scaled64 PredCost;
141  /// Non-predicated cost (with selects converted to branches).
142  Scaled64 NonPredCost;
143  };
144 
145  // Converts select instructions of a function to conditional jumps when deemed
146  // profitable. Returns true if at least one select was converted.
147  bool optimizeSelects(Function &F);
148 
149  // Heuristics for determining which select instructions can be profitably
150  // conveted to branches. Separate heuristics for selects in inner-most loops
151  // and the rest of code regions (base heuristics for non-inner-most loop
152  // regions).
153  void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups);
154  void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups);
155 
156  // Converts to branches the select groups that were deemed
157  // profitable-to-convert.
158  void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
159 
160  // Splits selects of a given basic block into select groups.
161  void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
162 
163  // Determines for which select groups it is profitable converting to branches
164  // (base and inner-most-loop heuristics).
165  void findProfitableSIGroupsBase(SelectGroups &SIGroups,
166  SelectGroups &ProfSIGroups);
167  void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups,
168  SelectGroups &ProfSIGroups);
169 
170  // Determines if a select group should be converted to a branch (base
171  // heuristics).
172  bool isConvertToBranchProfitableBase(const SmallVector<SelectInst *, 2> &ASI);
173 
174  // Returns true if there are expensive instructions in the cold value
175  // operand's (if any) dependence slice of any of the selects of the given
176  // group.
177  bool hasExpensiveColdOperand(const SmallVector<SelectInst *, 2> &ASI);
178 
179  // For a given source instruction, collect its backwards dependence slice
180  // consisting of instructions exclusively computed for producing the operands
181  // of the source instruction.
182  void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice,
183  bool ForSinking = false);
184 
185  // Returns true if the condition of the select is highly predictable.
186  bool isSelectHighlyPredictable(const SelectInst *SI);
187 
188  // Loop-level checks to determine if a non-predicated version (with branches)
189  // of the given loop is more profitable than its predicated version.
190  bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]);
191 
192  // Computes instruction and loop-critical-path costs for both the predicated
193  // and non-predicated version of the given loop.
194  bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups,
196  CostInfo *LoopCost);
197 
198  // Returns a set of all the select instructions in the given select groups.
199  SmallPtrSet<const Instruction *, 2> getSIset(const SelectGroups &SIGroups);
200 
201  // Returns the latency cost of a given instruction.
202  Optional<uint64_t> computeInstCost(const Instruction *I);
203 
204  // Returns the misprediction cost of a given select when converted to branch.
205  Scaled64 getMispredictionCost(const SelectInst *SI, const Scaled64 CondCost);
206 
207  // Returns the cost of a branch when the prediction is correct.
208  Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
209  const SelectInst *SI);
210 
211  // Returns true if the target architecture supports lowering a given select.
212  bool isSelectKindSupported(SelectInst *SI);
213 };
214 } // namespace
215 
216 char SelectOptimize::ID = 0;
217 
218 INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
219  false)
226 INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
227  false)
228 
229 FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); }
230 
232  TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
233  TSI = TM->getSubtargetImpl(F);
234  TLI = TSI->getTargetLowering();
235 
236  // If none of the select types is supported then skip this pass.
237  // This is an optimization pass. Legality issues will be handled by
238  // instruction selection.
239  if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) &&
240  !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) &&
241  !TLI->isSelectSupported(TargetLowering::VectorMaskSelect))
242  return false;
243 
244  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
245  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
246  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
247  BPI.reset(new BranchProbabilityInfo(F, *LI));
248  BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
249  PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
250  ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
251  TSchedModel.init(TSI);
252 
253  // When optimizing for size, selects are preferable over branches.
254  if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get()))
255  return false;
256 
257  return optimizeSelects(F);
258 }
259 
260 bool SelectOptimize::optimizeSelects(Function &F) {
261  // Determine for which select groups it is profitable converting to branches.
262  SelectGroups ProfSIGroups;
263  // Base heuristics apply only to non-loops and outer loops.
264  optimizeSelectsBase(F, ProfSIGroups);
265  // Separate heuristics for inner-most loops.
266  optimizeSelectsInnerLoops(F, ProfSIGroups);
267 
268  // Convert to branches the select groups that were deemed
269  // profitable-to-convert.
270  convertProfitableSIGroups(ProfSIGroups);
271 
272  // Code modified if at least one select group was converted.
273  return !ProfSIGroups.empty();
274 }
275 
276 void SelectOptimize::optimizeSelectsBase(Function &F,
277  SelectGroups &ProfSIGroups) {
278  // Collect all the select groups.
279  SelectGroups SIGroups;
280  for (BasicBlock &BB : F) {
281  // Base heuristics apply only to non-loops and outer loops.
282  Loop *L = LI->getLoopFor(&BB);
283  if (L && L->isInnermost())
284  continue;
285  collectSelectGroups(BB, SIGroups);
286  }
287 
288  // Determine for which select groups it is profitable converting to branches.
289  findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
290 }
291 
292 void SelectOptimize::optimizeSelectsInnerLoops(Function &F,
293  SelectGroups &ProfSIGroups) {
294  SmallVector<Loop *, 4> Loops(LI->begin(), LI->end());
295  // Need to check size on each iteration as we accumulate child loops.
296  for (unsigned long i = 0; i < Loops.size(); ++i)
297  for (Loop *ChildL : Loops[i]->getSubLoops())
298  Loops.push_back(ChildL);
299 
300  for (Loop *L : Loops) {
301  if (!L->isInnermost())
302  continue;
303 
304  SelectGroups SIGroups;
305  for (BasicBlock *BB : L->getBlocks())
306  collectSelectGroups(*BB, SIGroups);
307 
308  findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
309  }
310 }
311 
312 /// If \p isTrue is true, return the true value of \p SI, otherwise return
313 /// false value of \p SI. If the true/false value of \p SI is defined by any
314 /// select instructions in \p Selects, look through the defining select
315 /// instruction until the true/false value is not defined in \p Selects.
316 static Value *
318  const SmallPtrSet<const Instruction *, 2> &Selects) {
319  Value *V = nullptr;
320  for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
321  DefSI = dyn_cast<SelectInst>(V)) {
322  assert(DefSI->getCondition() == SI->getCondition() &&
323  "The condition of DefSI does not match with SI");
324  V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
325  }
326  assert(V && "Failed to get select true/false value");
327  return V;
328 }
329 
330 void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
331  for (SelectGroup &ASI : ProfSIGroups) {
332  // The code transformation here is a modified version of the sinking
333  // transformation in CodeGenPrepare::optimizeSelectInst with a more
334  // aggressive strategy of which instructions to sink.
335  //
336  // TODO: eliminate the redundancy of logic transforming selects to branches
337  // by removing CodeGenPrepare::optimizeSelectInst and optimizing here
338  // selects for all cases (with and without profile information).
339 
340  // Transform a sequence like this:
341  // start:
342  // %cmp = cmp uge i32 %a, %b
343  // %sel = select i1 %cmp, i32 %c, i32 %d
344  //
345  // Into:
346  // start:
347  // %cmp = cmp uge i32 %a, %b
348  // %cmp.frozen = freeze %cmp
349  // br i1 %cmp.frozen, label %select.true, label %select.false
350  // select.true:
351  // br label %select.end
352  // select.false:
353  // br label %select.end
354  // select.end:
355  // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
356  //
357  // %cmp should be frozen, otherwise it may introduce undefined behavior.
358  // In addition, we may sink instructions that produce %c or %d into the
359  // destination(s) of the new branch.
360  // If the true or false blocks do not contain a sunken instruction, that
361  // block and its branch may be optimized away. In that case, one side of the
362  // first branch will point directly to select.end, and the corresponding PHI
363  // predecessor block will be the start block.
364 
365  // Find all the instructions that can be soundly sunk to the true/false
366  // blocks. These are instructions that are computed solely for producing the
367  // operands of the select instructions in the group and can be sunk without
368  // breaking the semantics of the LLVM IR (e.g., cannot sink instructions
369  // with side effects).
370  SmallVector<std::stack<Instruction *>, 2> TrueSlices, FalseSlices;
371  typedef std::stack<Instruction *>::size_type StackSizeType;
372  StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;
373  for (SelectInst *SI : ASI) {
374  // For each select, compute the sinkable dependence chains of the true and
375  // false operands.
376  if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue())) {
377  std::stack<Instruction *> TrueSlice;
378  getExclBackwardsSlice(TI, TrueSlice, true);
379  maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
380  TrueSlices.push_back(TrueSlice);
381  }
382  if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue())) {
383  std::stack<Instruction *> FalseSlice;
384  getExclBackwardsSlice(FI, FalseSlice, true);
385  maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
386  FalseSlices.push_back(FalseSlice);
387  }
388  }
389  // In the case of multiple select instructions in the same group, the order
390  // of non-dependent instructions (instructions of different dependence
391  // slices) in the true/false blocks appears to affect performance.
392  // Interleaving the slices seems to experimentally be the optimal approach.
393  // This interleaving scheduling allows for more ILP (with a natural downside
394  // of increasing a bit register pressure) compared to a simple ordering of
395  // one whole chain after another. One would expect that this ordering would
396  // not matter since the scheduling in the backend of the compiler would
397  // take care of it, but apparently the scheduler fails to deliver optimal
398  // ILP with a naive ordering here.
399  SmallVector<Instruction *, 2> TrueSlicesInterleaved, FalseSlicesInterleaved;
400  for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {
401  for (auto &S : TrueSlices) {
402  if (!S.empty()) {
403  TrueSlicesInterleaved.push_back(S.top());
404  S.pop();
405  }
406  }
407  }
408  for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {
409  for (auto &S : FalseSlices) {
410  if (!S.empty()) {
411  FalseSlicesInterleaved.push_back(S.top());
412  S.pop();
413  }
414  }
415  }
416 
417  // We split the block containing the select(s) into two blocks.
418  SelectInst *SI = ASI.front();
419  SelectInst *LastSI = ASI.back();
420  BasicBlock *StartBlock = SI->getParent();
421  BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
422  BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
423  BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
424  // Delete the unconditional branch that was just created by the split.
425  StartBlock->getTerminator()->eraseFromParent();
426 
427  // Move any debug/pseudo instructions that were in-between the select
428  // group to the newly-created end block.
429  SmallVector<Instruction *, 2> DebugPseudoINS;
430  auto DIt = SI->getIterator();
431  while (&*DIt != LastSI) {
432  if (DIt->isDebugOrPseudoInst())
433  DebugPseudoINS.push_back(&*DIt);
434  DIt++;
435  }
436  for (auto DI : DebugPseudoINS) {
437  DI->moveBefore(&*EndBlock->getFirstInsertionPt());
438  }
439 
440  // These are the new basic blocks for the conditional branch.
441  // At least one will become an actual new basic block.
442  BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;
443  BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr;
444  if (!TrueSlicesInterleaved.empty()) {
445  TrueBlock = BasicBlock::Create(LastSI->getContext(), "select.true.sink",
446  EndBlock->getParent(), EndBlock);
447  TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
448  TrueBranch->setDebugLoc(LastSI->getDebugLoc());
449  for (Instruction *TrueInst : TrueSlicesInterleaved)
450  TrueInst->moveBefore(TrueBranch);
451  }
452  if (!FalseSlicesInterleaved.empty()) {
453  FalseBlock = BasicBlock::Create(LastSI->getContext(), "select.false.sink",
454  EndBlock->getParent(), EndBlock);
455  FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
456  FalseBranch->setDebugLoc(LastSI->getDebugLoc());
457  for (Instruction *FalseInst : FalseSlicesInterleaved)
458  FalseInst->moveBefore(FalseBranch);
459  }
460  // If there was nothing to sink, then arbitrarily choose the 'false' side
461  // for a new input value to the PHI.
462  if (TrueBlock == FalseBlock) {
463  assert(TrueBlock == nullptr &&
464  "Unexpected basic block transform while optimizing select");
465 
466  FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
467  EndBlock->getParent(), EndBlock);
468  auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
469  FalseBranch->setDebugLoc(SI->getDebugLoc());
470  }
471 
472  // Insert the real conditional branch based on the original condition.
473  // If we did not create a new block for one of the 'true' or 'false' paths
474  // of the condition, it means that side of the branch goes to the end block
475  // directly and the path originates from the start block from the point of
476  // view of the new PHI.
477  BasicBlock *TT, *FT;
478  if (TrueBlock == nullptr) {
479  TT = EndBlock;
480  FT = FalseBlock;
481  TrueBlock = StartBlock;
482  } else if (FalseBlock == nullptr) {
483  TT = TrueBlock;
484  FT = EndBlock;
485  FalseBlock = StartBlock;
486  } else {
487  TT = TrueBlock;
488  FT = FalseBlock;
489  }
490  IRBuilder<> IB(SI);
491  auto *CondFr =
492  IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
493  IB.CreateCondBr(CondFr, TT, FT, SI);
494 
496  INS.insert(ASI.begin(), ASI.end());
497  // Use reverse iterator because later select may use the value of the
498  // earlier select, and we need to propagate value through earlier select
499  // to get the PHI operand.
500  for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
501  SelectInst *SI = *It;
502  // The select itself is replaced with a PHI Node.
503  PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
504  PN->takeName(SI);
505  PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
506  PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
507  PN->setDebugLoc(SI->getDebugLoc());
508 
509  SI->replaceAllUsesWith(PN);
510  SI->eraseFromParent();
511  INS.erase(SI);
512  ++NumSelectsConverted;
513  }
514  }
515 }
516 
517 void SelectOptimize::collectSelectGroups(BasicBlock &BB,
518  SelectGroups &SIGroups) {
519  BasicBlock::iterator BBIt = BB.begin();
520  while (BBIt != BB.end()) {
521  Instruction *I = &*BBIt++;
522  if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
523  SelectGroup SIGroup;
524  SIGroup.push_back(SI);
525  while (BBIt != BB.end()) {
526  Instruction *NI = &*BBIt;
527  SelectInst *NSI = dyn_cast<SelectInst>(NI);
528  if (NSI && SI->getCondition() == NSI->getCondition()) {
529  SIGroup.push_back(NSI);
530  } else if (!NI->isDebugOrPseudoInst()) {
531  // Debug/pseudo instructions should be skipped and not prevent the
532  // formation of a select group.
533  break;
534  }
535  ++BBIt;
536  }
537 
538  // If the select type is not supported, no point optimizing it.
539  // Instruction selection will take care of it.
540  if (!isSelectKindSupported(SI))
541  continue;
542 
543  SIGroups.push_back(SIGroup);
544  }
545  }
546 }
547 
548 void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups,
549  SelectGroups &ProfSIGroups) {
550  for (SelectGroup &ASI : SIGroups) {
551  ++NumSelectOptAnalyzed;
552  if (isConvertToBranchProfitableBase(ASI))
553  ProfSIGroups.push_back(ASI);
554  }
555 }
556 
557 void SelectOptimize::findProfitableSIGroupsInnerLoops(
558  const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
559  NumSelectOptAnalyzed += SIGroups.size();
560  // For each select group in an inner-most loop,
561  // a branch is more preferable than a select/conditional-move if:
562  // i) conversion to branches for all the select groups of the loop satisfies
563  // loop-level heuristics including reducing the loop's critical path by
564  // some threshold (see SelectOptimize::checkLoopHeuristics); and
565  // ii) the total cost of the select group is cheaper with a branch compared
566  // to its predicated version. The cost is in terms of latency and the cost
567  // of a select group is the cost of its most expensive select instruction
568  // (assuming infinite resources and thus fully leveraging available ILP).
569 
571  CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
572  {Scaled64::getZero(), Scaled64::getZero()}};
573  if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
574  !checkLoopHeuristics(L, LoopCost)) {
575  return;
576  }
577 
578  for (SelectGroup &ASI : SIGroups) {
579  // Assuming infinite resources, the cost of a group of instructions is the
580  // cost of the most expensive instruction of the group.
581  Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
582  for (SelectInst *SI : ASI) {
583  SelectCost = std::max(SelectCost, InstCostMap[SI].PredCost);
584  BranchCost = std::max(BranchCost, InstCostMap[SI].NonPredCost);
585  }
586  if (BranchCost < SelectCost) {
587  OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", ASI.front());
588  OR << "Profitable to convert to branch (loop analysis). BranchCost="
589  << BranchCost.toString() << ", SelectCost=" << SelectCost.toString()
590  << ". ";
591  ORE->emit(OR);
592  ++NumSelectConvertedLoop;
593  ProfSIGroups.push_back(ASI);
594  } else {
595  OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front());
596  ORmiss << "Select is more profitable (loop analysis). BranchCost="
597  << BranchCost.toString()
598  << ", SelectCost=" << SelectCost.toString() << ". ";
599  ORE->emit(ORmiss);
600  }
601  }
602 }
603 
604 bool SelectOptimize::isConvertToBranchProfitableBase(
605  const SmallVector<SelectInst *, 2> &ASI) {
606  SelectInst *SI = ASI.front();
607  OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI);
608  OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI);
609 
610  // Skip cold basic blocks. Better to optimize for size for cold blocks.
611  if (PSI->isColdBlock(SI->getParent(), BFI.get())) {
612  ++NumSelectColdBB;
613  ORmiss << "Not converted to branch because of cold basic block. ";
614  ORE->emit(ORmiss);
615  return false;
616  }
617 
618  // If unpredictable, branch form is less profitable.
619  if (SI->getMetadata(LLVMContext::MD_unpredictable)) {
620  ++NumSelectUnPred;
621  ORmiss << "Not converted to branch because of unpredictable branch. ";
622  ORE->emit(ORmiss);
623  return false;
624  }
625 
626  // If highly predictable, branch form is more profitable, unless a
627  // predictable select is inexpensive in the target architecture.
628  if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) {
629  ++NumSelectConvertedHighPred;
630  OR << "Converted to branch because of highly predictable branch. ";
631  ORE->emit(OR);
632  return true;
633  }
634 
635  // Look for expensive instructions in the cold operand's (if any) dependence
636  // slice of any of the selects in the group.
637  if (hasExpensiveColdOperand(ASI)) {
638  ++NumSelectConvertedExpColdOperand;
639  OR << "Converted to branch because of expensive cold operand.";
640  ORE->emit(OR);
641  return true;
642  }
643 
644  ORmiss << "Not profitable to convert to branch (base heuristic).";
645  ORE->emit(ORmiss);
646  return false;
647 }
648 
650  uint64_t Denominator) {
651  return (Numerator + (Denominator / 2)) / Denominator;
652 }
653 
654 bool SelectOptimize::hasExpensiveColdOperand(
655  const SmallVector<SelectInst *, 2> &ASI) {
656  bool ColdOperand = false;
657  uint64_t TrueWeight, FalseWeight, TotalWeight;
658  if (ASI.front()->extractProfMetadata(TrueWeight, FalseWeight)) {
659  uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
660  TotalWeight = TrueWeight + FalseWeight;
661  // Is there a path with frequency <ColdOperandThreshold% (default:20%) ?
662  ColdOperand = TotalWeight * ColdOperandThreshold > 100 * MinWeight;
663  } else if (PSI->hasProfileSummary()) {
664  OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front());
665  ORmiss << "Profile data available but missing branch-weights metadata for "
666  "select instruction. ";
667  ORE->emit(ORmiss);
668  }
669  if (!ColdOperand)
670  return false;
671  // Check if the cold path's dependence slice is expensive for any of the
672  // selects of the group.
673  for (SelectInst *SI : ASI) {
674  Instruction *ColdI = nullptr;
675  uint64_t HotWeight;
676  if (TrueWeight < FalseWeight) {
677  ColdI = dyn_cast<Instruction>(SI->getTrueValue());
678  HotWeight = FalseWeight;
679  } else {
680  ColdI = dyn_cast<Instruction>(SI->getFalseValue());
681  HotWeight = TrueWeight;
682  }
683  if (ColdI) {
684  std::stack<Instruction *> ColdSlice;
685  getExclBackwardsSlice(ColdI, ColdSlice);
686  InstructionCost SliceCost = 0;
687  while (!ColdSlice.empty()) {
688  SliceCost += TTI->getInstructionCost(ColdSlice.top(),
690  ColdSlice.pop();
691  }
692  // The colder the cold value operand of the select is the more expensive
693  // the cmov becomes for computing the cold value operand every time. Thus,
694  // the colder the cold operand is the more its cost counts.
695  // Get nearest integer cost adjusted for coldness.
696  InstructionCost AdjSliceCost =
697  divideNearest(SliceCost * HotWeight, TotalWeight);
698  if (AdjSliceCost >=
700  return true;
701  }
702  }
703  return false;
704 }
705 
706 // For a given source instruction, collect its backwards dependence slice
707 // consisting of instructions exclusively computed for the purpose of producing
708 // the operands of the source instruction. As an approximation
709 // (sufficiently-accurate in practice), we populate this set with the
710 // instructions of the backwards dependence slice that only have one-use and
711 // form an one-use chain that leads to the source instruction.
712 void SelectOptimize::getExclBackwardsSlice(Instruction *I,
713  std::stack<Instruction *> &Slice,
714  bool ForSinking) {
716  std::queue<Instruction *> Worklist;
717  Worklist.push(I);
718  while (!Worklist.empty()) {
719  Instruction *II = Worklist.front();
720  Worklist.pop();
721 
722  // Avoid cycles.
723  if (!Visited.insert(II).second)
724  continue;
725 
726  if (!II->hasOneUse())
727  continue;
728 
729  // Cannot soundly sink instructions with side-effects.
730  // Terminator or phi instructions cannot be sunk.
731  // Avoid sinking other select instructions (should be handled separetely).
732  if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() ||
733  isa<SelectInst>(II) || isa<PHINode>(II)))
734  continue;
735 
736  // Avoid considering instructions with less frequency than the source
737  // instruction (i.e., avoid colder code regions of the dependence slice).
738  if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent()))
739  continue;
740 
741  // Eligible one-use instruction added to the dependence slice.
742  Slice.push(II);
743 
744  // Explore all the operands of the current instruction to expand the slice.
745  for (unsigned k = 0; k < II->getNumOperands(); ++k)
746  if (auto *OpI = dyn_cast<Instruction>(II->getOperand(k)))
747  Worklist.push(OpI);
748  }
749 }
750 
751 bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) {
752  uint64_t TrueWeight, FalseWeight;
753  if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
754  uint64_t Max = std::max(TrueWeight, FalseWeight);
755  uint64_t Sum = TrueWeight + FalseWeight;
756  if (Sum != 0) {
757  auto Probability = BranchProbability::getBranchProbability(Max, Sum);
758  if (Probability > TTI->getPredictableBranchThreshold())
759  return true;
760  }
761  }
762  return false;
763 }
764 
765 bool SelectOptimize::checkLoopHeuristics(const Loop *L,
766  const CostInfo LoopCost[2]) {
767  // Loop-level checks to determine if a non-predicated version (with branches)
768  // of the loop is more profitable than its predicated version.
769 
771  return true;
772 
773  OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti",
774  L->getHeader()->getFirstNonPHI());
775 
776  if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
777  LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
778  ORmissL << "No select conversion in the loop due to no reduction of loop's "
779  "critical path. ";
780  ORE->emit(ORmissL);
781  return false;
782  }
783 
784  Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
785  LoopCost[1].PredCost - LoopCost[1].NonPredCost};
786 
787  // Profitably converting to branches need to reduce the loop's critical path
788  // by at least some threshold (absolute gain of GainCycleThreshold cycles and
789  // relative gain of 12.5%).
790  if (Gain[1] < Scaled64::get(GainCycleThreshold) ||
791  Gain[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) {
792  Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost;
793  ORmissL << "No select conversion in the loop due to small reduction of "
794  "loop's critical path. Gain="
795  << Gain[1].toString()
796  << ", RelativeGain=" << RelativeGain.toString() << "%. ";
797  ORE->emit(ORmissL);
798  return false;
799  }
800 
801  // If the loop's critical path involves loop-carried dependences, the gradient
802  // of the gain needs to be at least GainGradientThreshold% (defaults to 25%).
803  // This check ensures that the latency reduction for the loop's critical path
804  // keeps decreasing with sufficient rate beyond the two analyzed loop
805  // iterations.
806  if (Gain[1] > Gain[0]) {
807  Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) /
808  (LoopCost[1].PredCost - LoopCost[0].PredCost);
809  if (GradientGain < Scaled64::get(GainGradientThreshold)) {
810  ORmissL << "No select conversion in the loop due to small gradient gain. "
811  "GradientGain="
812  << GradientGain.toString() << "%. ";
813  ORE->emit(ORmissL);
814  return false;
815  }
816  }
817  // If the gain decreases it is not profitable to convert.
818  else if (Gain[1] < Gain[0]) {
819  ORmissL
820  << "No select conversion in the loop due to negative gradient gain. ";
821  ORE->emit(ORmissL);
822  return false;
823  }
824 
825  // Non-predicated version of the loop is more profitable than its
826  // predicated version.
827  return true;
828 }
829 
830 // Computes instruction and loop-critical-path costs for both the predicated
831 // and non-predicated version of the given loop.
832 // Returns false if unable to compute these costs due to invalid cost of loop
833 // instruction(s).
834 bool SelectOptimize::computeLoopCosts(
835  const Loop *L, const SelectGroups &SIGroups,
836  DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {
837  const auto &SIset = getSIset(SIGroups);
838  // Compute instruction and loop-critical-path costs across two iterations for
839  // both predicated and non-predicated version.
840  const unsigned Iterations = 2;
841  for (unsigned Iter = 0; Iter < Iterations; ++Iter) {
842  // Cost of the loop's critical path.
843  CostInfo &MaxCost = LoopCost[Iter];
844  for (BasicBlock *BB : L->getBlocks()) {
845  for (const Instruction &I : *BB) {
846  if (I.isDebugOrPseudoInst())
847  continue;
848  // Compute the predicated and non-predicated cost of the instruction.
849  Scaled64 IPredCost = Scaled64::getZero(),
850  INonPredCost = Scaled64::getZero();
851 
852  // Assume infinite resources that allow to fully exploit the available
853  // instruction-level parallelism.
854  // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost)
855  for (const Use &U : I.operands()) {
856  auto UI = dyn_cast<Instruction>(U.get());
857  if (!UI)
858  continue;
859  if (InstCostMap.count(UI)) {
860  IPredCost = std::max(IPredCost, InstCostMap[UI].PredCost);
861  INonPredCost = std::max(INonPredCost, InstCostMap[UI].NonPredCost);
862  }
863  }
864  auto ILatency = computeInstCost(&I);
865  if (!ILatency) {
866  OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I);
867  ORmissL << "Invalid instruction cost preventing analysis and "
868  "optimization of the inner-most loop containing this "
869  "instruction. ";
870  ORE->emit(ORmissL);
871  return false;
872  }
873  IPredCost += Scaled64::get(ILatency.getValue());
874  INonPredCost += Scaled64::get(ILatency.getValue());
875 
876  // For a select that can be converted to branch,
877  // compute its cost as a branch (non-predicated cost).
878  //
879  // BranchCost = PredictedPathCost + MispredictCost
880  // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb
881  // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate
882  if (SIset.contains(&I)) {
883  auto SI = dyn_cast<SelectInst>(&I);
884 
885  Scaled64 TrueOpCost = Scaled64::getZero(),
886  FalseOpCost = Scaled64::getZero();
887  if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue()))
888  if (InstCostMap.count(TI))
889  TrueOpCost = InstCostMap[TI].NonPredCost;
890  if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue()))
891  if (InstCostMap.count(FI))
892  FalseOpCost = InstCostMap[FI].NonPredCost;
893  Scaled64 PredictedPathCost =
894  getPredictedPathCost(TrueOpCost, FalseOpCost, SI);
895 
896  Scaled64 CondCost = Scaled64::getZero();
897  if (auto *CI = dyn_cast<Instruction>(SI->getCondition()))
898  if (InstCostMap.count(CI))
899  CondCost = InstCostMap[CI].NonPredCost;
900  Scaled64 MispredictCost = getMispredictionCost(SI, CondCost);
901 
902  INonPredCost = PredictedPathCost + MispredictCost;
903  }
904 
905  InstCostMap[&I] = {IPredCost, INonPredCost};
906  MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
907  MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
908  }
909  }
910  }
911  return true;
912 }
913 
915 SelectOptimize::getSIset(const SelectGroups &SIGroups) {
917  for (const SelectGroup &ASI : SIGroups)
918  for (const SelectInst *SI : ASI)
919  SIset.insert(SI);
920  return SIset;
921 }
922 
923 Optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) {
924  InstructionCost ICost =
926  if (auto OC = ICost.getValue())
927  return Optional<uint64_t>(*OC);
928  return Optional<uint64_t>(None);
929 }
930 
932 SelectOptimize::getMispredictionCost(const SelectInst *SI,
933  const Scaled64 CondCost) {
934  uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
935 
936  // Account for the default misprediction rate when using a branch
937  // (conservatively set to 25% by default).
938  uint64_t MispredictRate = MispredictDefaultRate;
939  // If the select condition is obviously predictable, then the misprediction
940  // rate is zero.
941  if (isSelectHighlyPredictable(SI))
942  MispredictRate = 0;
943 
944  // CondCost is included to account for cases where the computation of the
945  // condition is part of a long dependence chain (potentially loop-carried)
946  // that would delay detection of a misprediction and increase its cost.
947  Scaled64 MispredictCost =
948  std::max(Scaled64::get(MispredictPenalty), CondCost) *
949  Scaled64::get(MispredictRate);
950  MispredictCost /= Scaled64::get(100);
951 
952  return MispredictCost;
953 }
954 
955 // Returns the cost of a branch when the prediction is correct.
956 // TrueCost * TrueProbability + FalseCost * FalseProbability.
958 SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
959  const SelectInst *SI) {
960  Scaled64 PredPathCost;
961  uint64_t TrueWeight, FalseWeight;
962  if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
963  uint64_t SumWeight = TrueWeight + FalseWeight;
964  if (SumWeight != 0) {
965  PredPathCost = TrueCost * Scaled64::get(TrueWeight) +
966  FalseCost * Scaled64::get(FalseWeight);
967  PredPathCost /= Scaled64::get(SumWeight);
968  return PredPathCost;
969  }
970  }
971  // Without branch weight metadata, we assume 75% for the one path and 25% for
972  // the other, and pick the result with the biggest cost.
973  PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,
974  FalseCost * Scaled64::get(3) + TrueCost);
975  PredPathCost /= Scaled64::get(4);
976  return PredPathCost;
977 }
978 
979 bool SelectOptimize::isSelectKindSupported(SelectInst *SI) {
980  bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
981  if (VectorCond)
982  return false;
984  if (SI->getType()->isVectorTy())
986  else
987  SelectKind = TargetLowering::ScalarValSelect;
988  return TLI->isSelectSupported(SelectKind);
989 }
i
i
Definition: README.txt:29
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::Instruction::isTerminator
bool isTerminator() const
Definition: Instruction.h:160
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:735
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:264
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
Optional.h
llvm::InstructionCost::getValue
Optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
Definition: InstructionCost.h:87
llvm::ARM::PredBlockMask::TT
@ TT
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:667
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::TargetTransformInfo::TCK_Latency
@ TCK_Latency
The latency of instruction.
Definition: TargetTransformInfo.h:213
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
Pass.h
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
SizeOpts.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::IRBuilder<>
ColdOperandMaxCostMultiplier
static cl::opt< unsigned > ColdOperandMaxCostMultiplier("cold-operand-max-cost-multiplier", cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " "slice of a cold operand to be considered inexpensive."), cl::init(1), cl::Hidden)
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1287
selects
Optimize selects
Definition: SelectOptimize.cpp:226
getTrueOrFalseValue
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
Definition: SelectOptimize.cpp:317
llvm::BasicBlock::splitBasicBlock
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:378
llvm::Optional< uint64_t >
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:147
llvm::BranchProbability::getBranchProbability
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Definition: BranchProbability.cpp:53
GainRelativeThreshold
static cl::opt< unsigned > GainRelativeThreshold("select-opti-loop-relative-gain-threshold", cl::desc("Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), cl::init(8), cl::Hidden)
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::TargetTransformInfo::getPredictableBranchThreshold
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
Definition: TargetTransformInfo.cpp:231
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::Instruction::mayHaveSideEffects
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.cpp:695
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
Instruction.h
TargetLowering.h
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::shouldOptimizeForSize
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
Definition: MachineSizeOpts.cpp:183
llvm::SelectInst::getCondition
const Value * getCondition() const
Definition: Instructions.h:1771
TargetMachine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3412
llvm::BranchProbabilityInfo
Analysis providing branch probability information.
Definition: BranchProbabilityInfo.h:113
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:246
false
Definition: StackSlotColoring.cpp:141
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:187
llvm::Instruction
Definition: Instruction.h:42
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:209
llvm::None
const NoneType None
Definition: None.h:24
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
Passes.h
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1392
TargetSchedule.h
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:77
BranchProbabilityInfo.h
llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:30
uint64_t
ProfileSummaryInfo.h
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2541
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2801
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3142
llvm::DenseMap
Definition: DenseMap.h:716
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::ProfileSummaryInfoWrapperPass
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:193
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:364
TargetPassConfig.h
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::SystemZISD::OC
@ OC
Definition: SystemZISelLowering.h:122
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1724
DisableLoopLevelHeuristics
static cl::opt< bool > DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, cl::init(false), cl::desc("Disable loop-level heuristics."))
ColdOperandThreshold
static cl::opt< unsigned > ColdOperandThreshold("cold-operand-threshold", cl::desc("Maximum frequency of path for an operand to be considered cold."), cl::init(20), cl::Hidden)
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::LoopInfo
Definition: LoopInfo.h:1102
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::ScaledNumber::toString
std::string toString(unsigned Precision=DefaultPrecision)
Convert to a decimal representation in a string.
Definition: ScaledNumber.h:595
GainGradientThreshold
static cl::opt< unsigned > GainGradientThreshold("select-opti-loop-gradient-gain-threshold", cl::desc("Gradient gain threshold (%)."), cl::init(25), cl::Hidden)
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:97
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
TargetSubtargetInfo.h
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, false) INITIALIZE_PASS_END(SelectOptimize
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:429
ScaledNumber.h
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:60
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:309
GainCycleThreshold
static cl::opt< unsigned > GainCycleThreshold("select-opti-loop-cycle-gain-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:181
llvm::ScaledNumber< uint64_t >
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::createSelectOptimizePass
FunctionPass * createSelectOptimizePass()
This pass converts conditional moves to conditional jumps when profitable.
Definition: SelectOptimize.cpp:229
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2693
llvm::TargetLoweringBase::ScalarValSelect
@ ScalarValSelect
Definition: TargetLowering.h:238
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SelectOptimize.cpp:45
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const Instruction *I, enum TargetCostKind kind) const
Query the cost of a specified instruction.
Definition: TargetTransformInfo.h:225
llvm::Instruction::isDebugOrPseudoInst
bool isDebugOrPseudoInst() const
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
Definition: Instruction.cpp:735
llvm::TargetLoweringBase::VectorMaskSelect
@ VectorMaskSelect
Definition: TargetLowering.h:241
llvm::initializeSelectOptimizePass
void initializeSelectOptimizePass(PassRegistry &)
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:690
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::pdb::DbgHeaderType::Max
@ Max
SmallVector.h
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:367
llvm::TargetLoweringBase::ScalarCondVectorVal
@ ScalarCondVectorVal
Definition: TargetLowering.h:239
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2651
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
MispredictDefaultRate
static cl::opt< unsigned > MispredictDefaultRate("mispredict-default-rate", cl::Hidden, cl::init(25), cl::desc("Default mispredict rate (initialized to 25%)."))
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:378
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:405
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3086
llvm::divideNearest
uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator)
Returns the integer nearest(Numerator / Denominator).
Definition: MathExtras.h:781
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::TargetLoweringBase::SelectSupportKind
SelectSupportKind
Enum that describes what type of support for selects the target has.
Definition: TargetLowering.h:237
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38