LLVM 18.0.0git
SelectOptimize.cpp
Go to the documentation of this file.
1//===--- SelectOptimize.cpp - Convert select to branches if profitable ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass converts selects to conditional jumps when profitable.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/Statistic.h"
21#include "llvm/CodeGen/Passes.h"
26#include "llvm/IR/BasicBlock.h"
27#include "llvm/IR/Dominators.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/Instruction.h"
34#include "llvm/Pass.h"
38#include <algorithm>
39#include <memory>
40#include <queue>
41#include <stack>
42#include <string>
43
44using namespace llvm;
45
46#define DEBUG_TYPE "select-optimize"
47
48STATISTIC(NumSelectOptAnalyzed,
49 "Number of select groups considered for conversion to branch");
50STATISTIC(NumSelectConvertedExpColdOperand,
51 "Number of select groups converted due to expensive cold operand");
52STATISTIC(NumSelectConvertedHighPred,
53 "Number of select groups converted due to high-predictability");
54STATISTIC(NumSelectUnPred,
55 "Number of select groups not converted due to unpredictability");
56STATISTIC(NumSelectColdBB,
57 "Number of select groups not converted due to cold basic block");
58STATISTIC(NumSelectConvertedLoop,
59 "Number of select groups converted due to loop-level analysis");
60STATISTIC(NumSelectsConverted, "Number of selects converted");
61
63 "cold-operand-threshold",
64 cl::desc("Maximum frequency of path for an operand to be considered cold."),
65 cl::init(20), cl::Hidden);
66
68 "cold-operand-max-cost-multiplier",
69 cl::desc("Maximum cost multiplier of TCC_expensive for the dependence "
70 "slice of a cold operand to be considered inexpensive."),
72
74 GainGradientThreshold("select-opti-loop-gradient-gain-threshold",
75 cl::desc("Gradient gain threshold (%)."),
76 cl::init(25), cl::Hidden);
77
79 GainCycleThreshold("select-opti-loop-cycle-gain-threshold",
80 cl::desc("Minimum gain per loop (in cycles) threshold."),
82
84 "select-opti-loop-relative-gain-threshold",
86 "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
88
90 "mispredict-default-rate", cl::Hidden, cl::init(25),
91 cl::desc("Default mispredict rate (initialized to 25%)."));
92
93static cl::opt<bool>
94 DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden,
95 cl::init(false),
96 cl::desc("Disable loop-level heuristics."));
97
98namespace {
99
100class SelectOptimize : public FunctionPass {
101 const TargetMachine *TM = nullptr;
102 const TargetSubtargetInfo *TSI = nullptr;
103 const TargetLowering *TLI = nullptr;
104 const TargetTransformInfo *TTI = nullptr;
105 const LoopInfo *LI = nullptr;
106 DominatorTree *DT = nullptr;
107 std::unique_ptr<BlockFrequencyInfo> BFI;
108 std::unique_ptr<BranchProbabilityInfo> BPI;
109 ProfileSummaryInfo *PSI = nullptr;
110 OptimizationRemarkEmitter *ORE = nullptr;
111 TargetSchedModel TSchedModel;
112
113public:
114 static char ID;
115
116 SelectOptimize() : FunctionPass(ID) {
118 }
119
120 bool runOnFunction(Function &F) override;
121
122 void getAnalysisUsage(AnalysisUsage &AU) const override {
129 }
130
131private:
132 // Select groups consist of consecutive select instructions with the same
133 // condition.
134 using SelectGroup = SmallVector<SelectInst *, 2>;
135 using SelectGroups = SmallVector<SelectGroup, 2>;
136
138
139 struct CostInfo {
140 /// Predicated cost (with selects as conditional moves).
141 Scaled64 PredCost;
142 /// Non-predicated cost (with selects converted to branches).
143 Scaled64 NonPredCost;
144 };
145
146 // Converts select instructions of a function to conditional jumps when deemed
147 // profitable. Returns true if at least one select was converted.
148 bool optimizeSelects(Function &F);
149
150 // Heuristics for determining which select instructions can be profitably
151 // conveted to branches. Separate heuristics for selects in inner-most loops
152 // and the rest of code regions (base heuristics for non-inner-most loop
153 // regions).
154 void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups);
155 void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups);
156
157 // Converts to branches the select groups that were deemed
158 // profitable-to-convert.
159 void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
160
161 // Splits selects of a given basic block into select groups.
162 void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
163
164 // Determines for which select groups it is profitable converting to branches
165 // (base and inner-most-loop heuristics).
166 void findProfitableSIGroupsBase(SelectGroups &SIGroups,
167 SelectGroups &ProfSIGroups);
168 void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups,
169 SelectGroups &ProfSIGroups);
170
171 // Determines if a select group should be converted to a branch (base
172 // heuristics).
173 bool isConvertToBranchProfitableBase(const SmallVector<SelectInst *, 2> &ASI);
174
175 // Returns true if there are expensive instructions in the cold value
176 // operand's (if any) dependence slice of any of the selects of the given
177 // group.
178 bool hasExpensiveColdOperand(const SmallVector<SelectInst *, 2> &ASI);
179
180 // For a given source instruction, collect its backwards dependence slice
181 // consisting of instructions exclusively computed for producing the operands
182 // of the source instruction.
183 void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice,
184 Instruction *SI, bool ForSinking = false);
185
186 // Returns true if the condition of the select is highly predictable.
187 bool isSelectHighlyPredictable(const SelectInst *SI);
188
189 // Loop-level checks to determine if a non-predicated version (with branches)
190 // of the given loop is more profitable than its predicated version.
191 bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]);
192
193 // Computes instruction and loop-critical-path costs for both the predicated
194 // and non-predicated version of the given loop.
195 bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups,
197 CostInfo *LoopCost);
198
199 // Returns a set of all the select instructions in the given select groups.
200 SmallPtrSet<const Instruction *, 2> getSIset(const SelectGroups &SIGroups);
201
202 // Returns the latency cost of a given instruction.
203 std::optional<uint64_t> computeInstCost(const Instruction *I);
204
205 // Returns the misprediction cost of a given select when converted to branch.
206 Scaled64 getMispredictionCost(const SelectInst *SI, const Scaled64 CondCost);
207
208 // Returns the cost of a branch when the prediction is correct.
209 Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
210 const SelectInst *SI);
211
212 // Returns true if the target architecture supports lowering a given select.
213 bool isSelectKindSupported(SelectInst *SI);
214};
215} // namespace
216
217char SelectOptimize::ID = 0;
218
219INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
220 false)
227INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
228 false)
229
230FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); }
231
232bool SelectOptimize::runOnFunction(Function &F) {
233 TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
234 TSI = TM->getSubtargetImpl(F);
235 TLI = TSI->getTargetLowering();
236
237 // If none of the select types is supported then skip this pass.
238 // This is an optimization pass. Legality issues will be handled by
239 // instruction selection.
240 if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) &&
241 !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) &&
242 !TLI->isSelectSupported(TargetLowering::VectorMaskSelect))
243 return false;
244
245 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
246
248 return false;
249
250 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
251 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
252 BPI.reset(new BranchProbabilityInfo(F, *LI));
253 BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
254 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
255 ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
256 TSchedModel.init(TSI);
257
258 // When optimizing for size, selects are preferable over branches.
259 if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get()))
260 return false;
261
262 return optimizeSelects(F);
263}
264
265bool SelectOptimize::optimizeSelects(Function &F) {
266 // Determine for which select groups it is profitable converting to branches.
267 SelectGroups ProfSIGroups;
268 // Base heuristics apply only to non-loops and outer loops.
269 optimizeSelectsBase(F, ProfSIGroups);
270 // Separate heuristics for inner-most loops.
271 optimizeSelectsInnerLoops(F, ProfSIGroups);
272
273 // Convert to branches the select groups that were deemed
274 // profitable-to-convert.
275 convertProfitableSIGroups(ProfSIGroups);
276
277 // Code modified if at least one select group was converted.
278 return !ProfSIGroups.empty();
279}
280
281void SelectOptimize::optimizeSelectsBase(Function &F,
282 SelectGroups &ProfSIGroups) {
283 // Collect all the select groups.
284 SelectGroups SIGroups;
285 for (BasicBlock &BB : F) {
286 // Base heuristics apply only to non-loops and outer loops.
287 Loop *L = LI->getLoopFor(&BB);
288 if (L && L->isInnermost())
289 continue;
290 collectSelectGroups(BB, SIGroups);
291 }
292
293 // Determine for which select groups it is profitable converting to branches.
294 findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
295}
296
297void SelectOptimize::optimizeSelectsInnerLoops(Function &F,
298 SelectGroups &ProfSIGroups) {
299 SmallVector<Loop *, 4> Loops(LI->begin(), LI->end());
300 // Need to check size on each iteration as we accumulate child loops.
301 for (unsigned long i = 0; i < Loops.size(); ++i)
302 for (Loop *ChildL : Loops[i]->getSubLoops())
303 Loops.push_back(ChildL);
304
305 for (Loop *L : Loops) {
306 if (!L->isInnermost())
307 continue;
308
309 SelectGroups SIGroups;
310 for (BasicBlock *BB : L->getBlocks())
311 collectSelectGroups(*BB, SIGroups);
312
313 findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
314 }
315}
316
317/// If \p isTrue is true, return the true value of \p SI, otherwise return
318/// false value of \p SI. If the true/false value of \p SI is defined by any
319/// select instructions in \p Selects, look through the defining select
320/// instruction until the true/false value is not defined in \p Selects.
321static Value *
324 Value *V = nullptr;
325 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
326 DefSI = dyn_cast<SelectInst>(V)) {
327 assert(DefSI->getCondition() == SI->getCondition() &&
328 "The condition of DefSI does not match with SI");
329 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
330 }
331 assert(V && "Failed to get select true/false value");
332 return V;
333}
334
335void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
336 for (SelectGroup &ASI : ProfSIGroups) {
337 // The code transformation here is a modified version of the sinking
338 // transformation in CodeGenPrepare::optimizeSelectInst with a more
339 // aggressive strategy of which instructions to sink.
340 //
341 // TODO: eliminate the redundancy of logic transforming selects to branches
342 // by removing CodeGenPrepare::optimizeSelectInst and optimizing here
343 // selects for all cases (with and without profile information).
344
345 // Transform a sequence like this:
346 // start:
347 // %cmp = cmp uge i32 %a, %b
348 // %sel = select i1 %cmp, i32 %c, i32 %d
349 //
350 // Into:
351 // start:
352 // %cmp = cmp uge i32 %a, %b
353 // %cmp.frozen = freeze %cmp
354 // br i1 %cmp.frozen, label %select.true, label %select.false
355 // select.true:
356 // br label %select.end
357 // select.false:
358 // br label %select.end
359 // select.end:
360 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
361 //
362 // %cmp should be frozen, otherwise it may introduce undefined behavior.
363 // In addition, we may sink instructions that produce %c or %d into the
364 // destination(s) of the new branch.
365 // If the true or false blocks do not contain a sunken instruction, that
366 // block and its branch may be optimized away. In that case, one side of the
367 // first branch will point directly to select.end, and the corresponding PHI
368 // predecessor block will be the start block.
369
370 // Find all the instructions that can be soundly sunk to the true/false
371 // blocks. These are instructions that are computed solely for producing the
372 // operands of the select instructions in the group and can be sunk without
373 // breaking the semantics of the LLVM IR (e.g., cannot sink instructions
374 // with side effects).
375 SmallVector<std::stack<Instruction *>, 2> TrueSlices, FalseSlices;
376 typedef std::stack<Instruction *>::size_type StackSizeType;
377 StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;
378 for (SelectInst *SI : ASI) {
379 // For each select, compute the sinkable dependence chains of the true and
380 // false operands.
381 if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue())) {
382 std::stack<Instruction *> TrueSlice;
383 getExclBackwardsSlice(TI, TrueSlice, SI, true);
384 maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
385 TrueSlices.push_back(TrueSlice);
386 }
387 if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue())) {
388 std::stack<Instruction *> FalseSlice;
389 getExclBackwardsSlice(FI, FalseSlice, SI, true);
390 maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
391 FalseSlices.push_back(FalseSlice);
392 }
393 }
394 // In the case of multiple select instructions in the same group, the order
395 // of non-dependent instructions (instructions of different dependence
396 // slices) in the true/false blocks appears to affect performance.
397 // Interleaving the slices seems to experimentally be the optimal approach.
398 // This interleaving scheduling allows for more ILP (with a natural downside
399 // of increasing a bit register pressure) compared to a simple ordering of
400 // one whole chain after another. One would expect that this ordering would
401 // not matter since the scheduling in the backend of the compiler would
402 // take care of it, but apparently the scheduler fails to deliver optimal
403 // ILP with a naive ordering here.
404 SmallVector<Instruction *, 2> TrueSlicesInterleaved, FalseSlicesInterleaved;
405 for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {
406 for (auto &S : TrueSlices) {
407 if (!S.empty()) {
408 TrueSlicesInterleaved.push_back(S.top());
409 S.pop();
410 }
411 }
412 }
413 for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {
414 for (auto &S : FalseSlices) {
415 if (!S.empty()) {
416 FalseSlicesInterleaved.push_back(S.top());
417 S.pop();
418 }
419 }
420 }
421
422 // We split the block containing the select(s) into two blocks.
423 SelectInst *SI = ASI.front();
424 SelectInst *LastSI = ASI.back();
425 BasicBlock *StartBlock = SI->getParent();
426 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
427 BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
428 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
429 // Delete the unconditional branch that was just created by the split.
430 StartBlock->getTerminator()->eraseFromParent();
431
432 // Move any debug/pseudo instructions that were in-between the select
433 // group to the newly-created end block.
434 SmallVector<Instruction *, 2> DebugPseudoINS;
435 auto DIt = SI->getIterator();
436 while (&*DIt != LastSI) {
437 if (DIt->isDebugOrPseudoInst())
438 DebugPseudoINS.push_back(&*DIt);
439 DIt++;
440 }
441 for (auto *DI : DebugPseudoINS) {
442 DI->moveBeforePreserving(&*EndBlock->getFirstInsertionPt());
443 }
444
445 // These are the new basic blocks for the conditional branch.
446 // At least one will become an actual new basic block.
447 BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;
448 BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr;
449 if (!TrueSlicesInterleaved.empty()) {
450 TrueBlock = BasicBlock::Create(LastSI->getContext(), "select.true.sink",
451 EndBlock->getParent(), EndBlock);
452 TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
453 TrueBranch->setDebugLoc(LastSI->getDebugLoc());
454 for (Instruction *TrueInst : TrueSlicesInterleaved)
455 TrueInst->moveBefore(TrueBranch);
456 }
457 if (!FalseSlicesInterleaved.empty()) {
458 FalseBlock = BasicBlock::Create(LastSI->getContext(), "select.false.sink",
459 EndBlock->getParent(), EndBlock);
460 FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
461 FalseBranch->setDebugLoc(LastSI->getDebugLoc());
462 for (Instruction *FalseInst : FalseSlicesInterleaved)
463 FalseInst->moveBefore(FalseBranch);
464 }
465 // If there was nothing to sink, then arbitrarily choose the 'false' side
466 // for a new input value to the PHI.
467 if (TrueBlock == FalseBlock) {
468 assert(TrueBlock == nullptr &&
469 "Unexpected basic block transform while optimizing select");
470
471 FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
472 EndBlock->getParent(), EndBlock);
473 auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
474 FalseBranch->setDebugLoc(SI->getDebugLoc());
475 }
476
477 // Insert the real conditional branch based on the original condition.
478 // If we did not create a new block for one of the 'true' or 'false' paths
479 // of the condition, it means that side of the branch goes to the end block
480 // directly and the path originates from the start block from the point of
481 // view of the new PHI.
482 BasicBlock *TT, *FT;
483 if (TrueBlock == nullptr) {
484 TT = EndBlock;
485 FT = FalseBlock;
486 TrueBlock = StartBlock;
487 } else if (FalseBlock == nullptr) {
488 TT = TrueBlock;
489 FT = EndBlock;
490 FalseBlock = StartBlock;
491 } else {
492 TT = TrueBlock;
493 FT = FalseBlock;
494 }
495 IRBuilder<> IB(SI);
496 auto *CondFr =
497 IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
498 IB.CreateCondBr(CondFr, TT, FT, SI);
499
501 INS.insert(ASI.begin(), ASI.end());
502 // Use reverse iterator because later select may use the value of the
503 // earlier select, and we need to propagate value through earlier select
504 // to get the PHI operand.
505 for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
506 SelectInst *SI = *It;
507 // The select itself is replaced with a PHI Node.
508 PHINode *PN = PHINode::Create(SI->getType(), 2, "");
509 PN->insertBefore(EndBlock->begin());
510 PN->takeName(SI);
511 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
512 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
513 PN->setDebugLoc(SI->getDebugLoc());
514
515 SI->replaceAllUsesWith(PN);
516 SI->eraseFromParent();
517 INS.erase(SI);
518 ++NumSelectsConverted;
519 }
520 }
521}
522
523static bool isSpecialSelect(SelectInst *SI) {
524 using namespace llvm::PatternMatch;
525
526 // If the select is a logical-and/logical-or then it is better treated as a
527 // and/or by the backend.
530 return true;
531
532 return false;
533}
534
535void SelectOptimize::collectSelectGroups(BasicBlock &BB,
536 SelectGroups &SIGroups) {
537 BasicBlock::iterator BBIt = BB.begin();
538 while (BBIt != BB.end()) {
539 Instruction *I = &*BBIt++;
540 if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
541 if (isSpecialSelect(SI))
542 continue;
543
544 SelectGroup SIGroup;
545 SIGroup.push_back(SI);
546 while (BBIt != BB.end()) {
547 Instruction *NI = &*BBIt;
548 SelectInst *NSI = dyn_cast<SelectInst>(NI);
549 if (NSI && SI->getCondition() == NSI->getCondition()) {
550 SIGroup.push_back(NSI);
551 } else if (!NI->isDebugOrPseudoInst()) {
552 // Debug/pseudo instructions should be skipped and not prevent the
553 // formation of a select group.
554 break;
555 }
556 ++BBIt;
557 }
558
559 // If the select type is not supported, no point optimizing it.
560 // Instruction selection will take care of it.
561 if (!isSelectKindSupported(SI))
562 continue;
563
564 SIGroups.push_back(SIGroup);
565 }
566 }
567}
568
569void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups,
570 SelectGroups &ProfSIGroups) {
571 for (SelectGroup &ASI : SIGroups) {
572 ++NumSelectOptAnalyzed;
573 if (isConvertToBranchProfitableBase(ASI))
574 ProfSIGroups.push_back(ASI);
575 }
576}
577
580 LLVM_DEBUG(dbgs() << Rem.getMsg() << "\n");
581 ORE->emit(Rem);
582}
583
584void SelectOptimize::findProfitableSIGroupsInnerLoops(
585 const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
586 NumSelectOptAnalyzed += SIGroups.size();
587 // For each select group in an inner-most loop,
588 // a branch is more preferable than a select/conditional-move if:
589 // i) conversion to branches for all the select groups of the loop satisfies
590 // loop-level heuristics including reducing the loop's critical path by
591 // some threshold (see SelectOptimize::checkLoopHeuristics); and
592 // ii) the total cost of the select group is cheaper with a branch compared
593 // to its predicated version. The cost is in terms of latency and the cost
594 // of a select group is the cost of its most expensive select instruction
595 // (assuming infinite resources and thus fully leveraging available ILP).
596
598 CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
600 if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
601 !checkLoopHeuristics(L, LoopCost)) {
602 return;
603 }
604
605 for (SelectGroup &ASI : SIGroups) {
606 // Assuming infinite resources, the cost of a group of instructions is the
607 // cost of the most expensive instruction of the group.
608 Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
609 for (SelectInst *SI : ASI) {
610 SelectCost = std::max(SelectCost, InstCostMap[SI].PredCost);
611 BranchCost = std::max(BranchCost, InstCostMap[SI].NonPredCost);
612 }
613 if (BranchCost < SelectCost) {
614 OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", ASI.front());
615 OR << "Profitable to convert to branch (loop analysis). BranchCost="
616 << BranchCost.toString() << ", SelectCost=" << SelectCost.toString()
617 << ". ";
618 EmitAndPrintRemark(ORE, OR);
619 ++NumSelectConvertedLoop;
620 ProfSIGroups.push_back(ASI);
621 } else {
622 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front());
623 ORmiss << "Select is more profitable (loop analysis). BranchCost="
624 << BranchCost.toString()
625 << ", SelectCost=" << SelectCost.toString() << ". ";
626 EmitAndPrintRemark(ORE, ORmiss);
627 }
628 }
629}
630
631bool SelectOptimize::isConvertToBranchProfitableBase(
632 const SmallVector<SelectInst *, 2> &ASI) {
633 SelectInst *SI = ASI.front();
634 LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI << "\n");
635 OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI);
636 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI);
637
638 // Skip cold basic blocks. Better to optimize for size for cold blocks.
639 if (PSI->isColdBlock(SI->getParent(), BFI.get())) {
640 ++NumSelectColdBB;
641 ORmiss << "Not converted to branch because of cold basic block. ";
642 EmitAndPrintRemark(ORE, ORmiss);
643 return false;
644 }
645
646 // If unpredictable, branch form is less profitable.
647 if (SI->getMetadata(LLVMContext::MD_unpredictable)) {
648 ++NumSelectUnPred;
649 ORmiss << "Not converted to branch because of unpredictable branch. ";
650 EmitAndPrintRemark(ORE, ORmiss);
651 return false;
652 }
653
654 // If highly predictable, branch form is more profitable, unless a
655 // predictable select is inexpensive in the target architecture.
656 if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) {
657 ++NumSelectConvertedHighPred;
658 OR << "Converted to branch because of highly predictable branch. ";
659 EmitAndPrintRemark(ORE, OR);
660 return true;
661 }
662
663 // Look for expensive instructions in the cold operand's (if any) dependence
664 // slice of any of the selects in the group.
665 if (hasExpensiveColdOperand(ASI)) {
666 ++NumSelectConvertedExpColdOperand;
667 OR << "Converted to branch because of expensive cold operand.";
668 EmitAndPrintRemark(ORE, OR);
669 return true;
670 }
671
672 ORmiss << "Not profitable to convert to branch (base heuristic).";
673 EmitAndPrintRemark(ORE, ORmiss);
674 return false;
675}
676
678 uint64_t Denominator) {
679 return (Numerator + (Denominator / 2)) / Denominator;
680}
681
682bool SelectOptimize::hasExpensiveColdOperand(
683 const SmallVector<SelectInst *, 2> &ASI) {
684 bool ColdOperand = false;
685 uint64_t TrueWeight, FalseWeight, TotalWeight;
686 if (extractBranchWeights(*ASI.front(), TrueWeight, FalseWeight)) {
687 uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
688 TotalWeight = TrueWeight + FalseWeight;
689 // Is there a path with frequency <ColdOperandThreshold% (default:20%) ?
690 ColdOperand = TotalWeight * ColdOperandThreshold > 100 * MinWeight;
691 } else if (PSI->hasProfileSummary()) {
692 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front());
693 ORmiss << "Profile data available but missing branch-weights metadata for "
694 "select instruction. ";
695 EmitAndPrintRemark(ORE, ORmiss);
696 }
697 if (!ColdOperand)
698 return false;
699 // Check if the cold path's dependence slice is expensive for any of the
700 // selects of the group.
701 for (SelectInst *SI : ASI) {
702 Instruction *ColdI = nullptr;
703 uint64_t HotWeight;
704 if (TrueWeight < FalseWeight) {
705 ColdI = dyn_cast<Instruction>(SI->getTrueValue());
706 HotWeight = FalseWeight;
707 } else {
708 ColdI = dyn_cast<Instruction>(SI->getFalseValue());
709 HotWeight = TrueWeight;
710 }
711 if (ColdI) {
712 std::stack<Instruction *> ColdSlice;
713 getExclBackwardsSlice(ColdI, ColdSlice, SI);
714 InstructionCost SliceCost = 0;
715 while (!ColdSlice.empty()) {
716 SliceCost += TTI->getInstructionCost(ColdSlice.top(),
718 ColdSlice.pop();
719 }
720 // The colder the cold value operand of the select is the more expensive
721 // the cmov becomes for computing the cold value operand every time. Thus,
722 // the colder the cold operand is the more its cost counts.
723 // Get nearest integer cost adjusted for coldness.
724 InstructionCost AdjSliceCost =
725 divideNearest(SliceCost * HotWeight, TotalWeight);
726 if (AdjSliceCost >=
728 return true;
729 }
730 }
731 return false;
732}
733
734// Check if it is safe to move LoadI next to the SI.
735// Conservatively assume it is safe only if there is no instruction
736// modifying memory in-between the load and the select instruction.
737static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI) {
738 // Assume loads from different basic blocks are unsafe to move.
739 if (LoadI->getParent() != SI->getParent())
740 return false;
741 auto It = LoadI->getIterator();
742 while (&*It != SI) {
743 if (It->mayWriteToMemory())
744 return false;
745 It++;
746 }
747 return true;
748}
749
750// For a given source instruction, collect its backwards dependence slice
751// consisting of instructions exclusively computed for the purpose of producing
752// the operands of the source instruction. As an approximation
753// (sufficiently-accurate in practice), we populate this set with the
754// instructions of the backwards dependence slice that only have one-use and
755// form an one-use chain that leads to the source instruction.
756void SelectOptimize::getExclBackwardsSlice(Instruction *I,
757 std::stack<Instruction *> &Slice,
758 Instruction *SI, bool ForSinking) {
760 std::queue<Instruction *> Worklist;
761 Worklist.push(I);
762 while (!Worklist.empty()) {
763 Instruction *II = Worklist.front();
764 Worklist.pop();
765
766 // Avoid cycles.
767 if (!Visited.insert(II).second)
768 continue;
769
770 if (!II->hasOneUse())
771 continue;
772
773 // Cannot soundly sink instructions with side-effects.
774 // Terminator or phi instructions cannot be sunk.
775 // Avoid sinking other select instructions (should be handled separetely).
776 if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() ||
777 isa<SelectInst>(II) || isa<PHINode>(II)))
778 continue;
779
780 // Avoid sinking loads in order not to skip state-modifying instructions,
781 // that may alias with the loaded address.
782 // Only allow sinking of loads within the same basic block that are
783 // conservatively proven to be safe.
784 if (ForSinking && II->mayReadFromMemory() && !isSafeToSinkLoad(II, SI))
785 continue;
786
787 // Avoid considering instructions with less frequency than the source
788 // instruction (i.e., avoid colder code regions of the dependence slice).
789 if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent()))
790 continue;
791
792 // Eligible one-use instruction added to the dependence slice.
793 Slice.push(II);
794
795 // Explore all the operands of the current instruction to expand the slice.
796 for (unsigned k = 0; k < II->getNumOperands(); ++k)
797 if (auto *OpI = dyn_cast<Instruction>(II->getOperand(k)))
798 Worklist.push(OpI);
799 }
800}
801
802bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) {
803 uint64_t TrueWeight, FalseWeight;
804 if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
805 uint64_t Max = std::max(TrueWeight, FalseWeight);
806 uint64_t Sum = TrueWeight + FalseWeight;
807 if (Sum != 0) {
808 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
809 if (Probability > TTI->getPredictableBranchThreshold())
810 return true;
811 }
812 }
813 return false;
814}
815
816bool SelectOptimize::checkLoopHeuristics(const Loop *L,
817 const CostInfo LoopCost[2]) {
818 // Loop-level checks to determine if a non-predicated version (with branches)
819 // of the loop is more profitable than its predicated version.
820
822 return true;
823
824 OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti",
825 L->getHeader()->getFirstNonPHI());
826
827 if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
828 LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
829 ORmissL << "No select conversion in the loop due to no reduction of loop's "
830 "critical path. ";
831 EmitAndPrintRemark(ORE, ORmissL);
832 return false;
833 }
834
835 Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
836 LoopCost[1].PredCost - LoopCost[1].NonPredCost};
837
838 // Profitably converting to branches need to reduce the loop's critical path
839 // by at least some threshold (absolute gain of GainCycleThreshold cycles and
840 // relative gain of 12.5%).
841 if (Gain[1] < Scaled64::get(GainCycleThreshold) ||
842 Gain[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) {
843 Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost;
844 ORmissL << "No select conversion in the loop due to small reduction of "
845 "loop's critical path. Gain="
846 << Gain[1].toString()
847 << ", RelativeGain=" << RelativeGain.toString() << "%. ";
848 EmitAndPrintRemark(ORE, ORmissL);
849 return false;
850 }
851
852 // If the loop's critical path involves loop-carried dependences, the gradient
853 // of the gain needs to be at least GainGradientThreshold% (defaults to 25%).
854 // This check ensures that the latency reduction for the loop's critical path
855 // keeps decreasing with sufficient rate beyond the two analyzed loop
856 // iterations.
857 if (Gain[1] > Gain[0]) {
858 Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) /
859 (LoopCost[1].PredCost - LoopCost[0].PredCost);
860 if (GradientGain < Scaled64::get(GainGradientThreshold)) {
861 ORmissL << "No select conversion in the loop due to small gradient gain. "
862 "GradientGain="
863 << GradientGain.toString() << "%. ";
864 EmitAndPrintRemark(ORE, ORmissL);
865 return false;
866 }
867 }
868 // If the gain decreases it is not profitable to convert.
869 else if (Gain[1] < Gain[0]) {
870 ORmissL
871 << "No select conversion in the loop due to negative gradient gain. ";
872 EmitAndPrintRemark(ORE, ORmissL);
873 return false;
874 }
875
876 // Non-predicated version of the loop is more profitable than its
877 // predicated version.
878 return true;
879}
880
881// Computes instruction and loop-critical-path costs for both the predicated
882// and non-predicated version of the given loop.
883// Returns false if unable to compute these costs due to invalid cost of loop
884// instruction(s).
885bool SelectOptimize::computeLoopCosts(
886 const Loop *L, const SelectGroups &SIGroups,
887 DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {
888 LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop "
889 << L->getHeader()->getName() << "\n");
890 const auto &SIset = getSIset(SIGroups);
891 // Compute instruction and loop-critical-path costs across two iterations for
892 // both predicated and non-predicated version.
893 const unsigned Iterations = 2;
894 for (unsigned Iter = 0; Iter < Iterations; ++Iter) {
895 // Cost of the loop's critical path.
896 CostInfo &MaxCost = LoopCost[Iter];
897 for (BasicBlock *BB : L->getBlocks()) {
898 for (const Instruction &I : *BB) {
899 if (I.isDebugOrPseudoInst())
900 continue;
901 // Compute the predicated and non-predicated cost of the instruction.
902 Scaled64 IPredCost = Scaled64::getZero(),
903 INonPredCost = Scaled64::getZero();
904
905 // Assume infinite resources that allow to fully exploit the available
906 // instruction-level parallelism.
907 // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost)
908 for (const Use &U : I.operands()) {
909 auto UI = dyn_cast<Instruction>(U.get());
910 if (!UI)
911 continue;
912 if (InstCostMap.count(UI)) {
913 IPredCost = std::max(IPredCost, InstCostMap[UI].PredCost);
914 INonPredCost = std::max(INonPredCost, InstCostMap[UI].NonPredCost);
915 }
916 }
917 auto ILatency = computeInstCost(&I);
918 if (!ILatency) {
919 OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I);
920 ORmissL << "Invalid instruction cost preventing analysis and "
921 "optimization of the inner-most loop containing this "
922 "instruction. ";
923 EmitAndPrintRemark(ORE, ORmissL);
924 return false;
925 }
926 IPredCost += Scaled64::get(*ILatency);
927 INonPredCost += Scaled64::get(*ILatency);
928
929 // For a select that can be converted to branch,
930 // compute its cost as a branch (non-predicated cost).
931 //
932 // BranchCost = PredictedPathCost + MispredictCost
933 // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb
934 // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate
935 if (SIset.contains(&I)) {
936 auto SI = cast<SelectInst>(&I);
937
938 Scaled64 TrueOpCost = Scaled64::getZero(),
939 FalseOpCost = Scaled64::getZero();
940 if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue()))
941 if (InstCostMap.count(TI))
942 TrueOpCost = InstCostMap[TI].NonPredCost;
943 if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue()))
944 if (InstCostMap.count(FI))
945 FalseOpCost = InstCostMap[FI].NonPredCost;
946 Scaled64 PredictedPathCost =
947 getPredictedPathCost(TrueOpCost, FalseOpCost, SI);
948
949 Scaled64 CondCost = Scaled64::getZero();
950 if (auto *CI = dyn_cast<Instruction>(SI->getCondition()))
951 if (InstCostMap.count(CI))
952 CondCost = InstCostMap[CI].NonPredCost;
953 Scaled64 MispredictCost = getMispredictionCost(SI, CondCost);
954
955 INonPredCost = PredictedPathCost + MispredictCost;
956 }
957 LLVM_DEBUG(dbgs() << " " << ILatency << "/" << IPredCost << "/"
958 << INonPredCost << " for " << I << "\n");
959
960 InstCostMap[&I] = {IPredCost, INonPredCost};
961 MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
962 MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
963 }
964 }
965 LLVM_DEBUG(dbgs() << "Iteration " << Iter + 1
966 << " MaxCost = " << MaxCost.PredCost << " "
967 << MaxCost.NonPredCost << "\n");
968 }
969 return true;
970}
971
973SelectOptimize::getSIset(const SelectGroups &SIGroups) {
975 for (const SelectGroup &ASI : SIGroups)
976 for (const SelectInst *SI : ASI)
977 SIset.insert(SI);
978 return SIset;
979}
980
981std::optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) {
982 InstructionCost ICost =
984 if (auto OC = ICost.getValue())
985 return std::optional<uint64_t>(*OC);
986 return std::nullopt;
987}
988
990SelectOptimize::getMispredictionCost(const SelectInst *SI,
991 const Scaled64 CondCost) {
992 uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
993
994 // Account for the default misprediction rate when using a branch
995 // (conservatively set to 25% by default).
996 uint64_t MispredictRate = MispredictDefaultRate;
997 // If the select condition is obviously predictable, then the misprediction
998 // rate is zero.
999 if (isSelectHighlyPredictable(SI))
1000 MispredictRate = 0;
1001
1002 // CondCost is included to account for cases where the computation of the
1003 // condition is part of a long dependence chain (potentially loop-carried)
1004 // that would delay detection of a misprediction and increase its cost.
1005 Scaled64 MispredictCost =
1006 std::max(Scaled64::get(MispredictPenalty), CondCost) *
1007 Scaled64::get(MispredictRate);
1008 MispredictCost /= Scaled64::get(100);
1009
1010 return MispredictCost;
1011}
1012
1013// Returns the cost of a branch when the prediction is correct.
1014// TrueCost * TrueProbability + FalseCost * FalseProbability.
1016SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
1017 const SelectInst *SI) {
1018 Scaled64 PredPathCost;
1019 uint64_t TrueWeight, FalseWeight;
1020 if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
1021 uint64_t SumWeight = TrueWeight + FalseWeight;
1022 if (SumWeight != 0) {
1023 PredPathCost = TrueCost * Scaled64::get(TrueWeight) +
1024 FalseCost * Scaled64::get(FalseWeight);
1025 PredPathCost /= Scaled64::get(SumWeight);
1026 return PredPathCost;
1027 }
1028 }
1029 // Without branch weight metadata, we assume 75% for the one path and 25% for
1030 // the other, and pick the result with the biggest cost.
1031 PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,
1032 FalseCost * Scaled64::get(3) + TrueCost);
1033 PredPathCost /= Scaled64::get(4);
1034 return PredPathCost;
1035}
1036
1037bool SelectOptimize::isSelectKindSupported(SelectInst *SI) {
1038 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
1039 if (VectorCond)
1040 return false;
1042 if (SI->getType()->isVectorTy())
1043 SelectKind = TargetLowering::ScalarCondVectorVal;
1044 else
1045 SelectKind = TargetLowering::ScalarValSelect;
1046 return TLI->isSelectSupported(SelectKind);
1047}
#define LLVM_DEBUG(X)
Definition: Debug.h:101
Hexagon Hardware Loops
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file contains the declarations for profiling metadata utility functions.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI)
static InstructionCost divideNearest(InstructionCost Numerator, uint64_t Denominator)
Optimize selects
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static cl::opt< unsigned > ColdOperandMaxCostMultiplier("cold-operand-max-cost-multiplier", cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " "slice of a cold operand to be considered inexpensive."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > ColdOperandThreshold("cold-operand-threshold", cl::desc("Maximum frequency of path for an operand to be considered cold."), cl::init(20), cl::Hidden)
static cl::opt< bool > DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, cl::init(false), cl::desc("Disable loop-level heuristics."))
static bool isSpecialSelect(SelectInst *SI)
static cl::opt< unsigned > GainCycleThreshold("select-opti-loop-cycle-gain-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
#define DEBUG_TYPE
static cl::opt< unsigned > MispredictDefaultRate("mispredict-default-rate", cl::Hidden, cl::init(25), cl::desc("Default mispredict rate (initialized to 25%)."))
static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE, DiagnosticInfoOptimizationBase &Rem)
static cl::opt< unsigned > GainGradientThreshold("select-opti-loop-gradient-gain-threshold", cl::desc("Gradient gain threshold (%)."), cl::init(25), cl::Hidden)
static cl::opt< unsigned > GainRelativeThreshold("select-opti-loop-relative-gain-threshold", cl::desc("Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), cl::init(8), cl::Hidden)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator end()
Definition: BasicBlock.h:337
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:335
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:257
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:105
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:414
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Analysis providing branch probability information.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
Common features for diagnostics dealing with optimization remarks that are used by both IR and MIR pa...
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2628
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:89
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:392
const BasicBlock * getParent() const
Definition: Instruction.h:90
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
Definition: Instruction.h:198
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:83
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:389
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:594
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:47
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
std::string toString(unsigned Precision=DefaultPrecision)
Convert to a decimal representation in a string.
Definition: ScaledNumber.h:596
static ScaledNumber get(uint64_t N)
Definition: ScaledNumber.h:526
static ScaledNumber getZero()
Definition: ScaledNumber.h:521
This class represents the LLVM 'select' instruction.
const Value * getCondition() const
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:380
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:384
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:451
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
SelectSupportKind
Enum that describes what type of support for selects the target has.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
Target-Independent Code Generator Pass Configuration Options.
Provide an instruction scheduling machine model to CodeGen passes.
TargetSubtargetInfo - Generic base class for all target subtargets.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCK_Latency
The latency of instruction.
bool enableSelectOptimize() const
Should the Select Optimization pass be enabled and ran.
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
@ TCC_Expensive
The cost of a 'div' instruction on x86.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1069
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:384
self_iterator getIterator()
Definition: ilist_node.h:82
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:218
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createSelectOptimizePass()
This pass converts conditional moves to conditional jumps when profitable.
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
void initializeSelectOptimizePass(PassRegistry &)