LLVM 17.0.0git
SelectOptimize.cpp
Go to the documentation of this file.
1//===--- SelectOptimize.cpp - Convert select to branches if profitable ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass converts selects to conditional jumps when profitable.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/Statistic.h"
21#include "llvm/CodeGen/Passes.h"
26#include "llvm/IR/BasicBlock.h"
27#include "llvm/IR/Dominators.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/Instruction.h"
33#include "llvm/Pass.h"
37#include <algorithm>
38#include <memory>
39#include <queue>
40#include <stack>
41#include <string>
42
43using namespace llvm;
44
45#define DEBUG_TYPE "select-optimize"
46
47STATISTIC(NumSelectOptAnalyzed,
48 "Number of select groups considered for conversion to branch");
49STATISTIC(NumSelectConvertedExpColdOperand,
50 "Number of select groups converted due to expensive cold operand");
51STATISTIC(NumSelectConvertedHighPred,
52 "Number of select groups converted due to high-predictability");
53STATISTIC(NumSelectUnPred,
54 "Number of select groups not converted due to unpredictability");
55STATISTIC(NumSelectColdBB,
56 "Number of select groups not converted due to cold basic block");
57STATISTIC(NumSelectConvertedLoop,
58 "Number of select groups converted due to loop-level analysis");
59STATISTIC(NumSelectsConverted, "Number of selects converted");
60
62 "cold-operand-threshold",
63 cl::desc("Maximum frequency of path for an operand to be considered cold."),
64 cl::init(20), cl::Hidden);
65
67 "cold-operand-max-cost-multiplier",
68 cl::desc("Maximum cost multiplier of TCC_expensive for the dependence "
69 "slice of a cold operand to be considered inexpensive."),
71
73 GainGradientThreshold("select-opti-loop-gradient-gain-threshold",
74 cl::desc("Gradient gain threshold (%)."),
75 cl::init(25), cl::Hidden);
76
78 GainCycleThreshold("select-opti-loop-cycle-gain-threshold",
79 cl::desc("Minimum gain per loop (in cycles) threshold."),
81
83 "select-opti-loop-relative-gain-threshold",
85 "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
87
89 "mispredict-default-rate", cl::Hidden, cl::init(25),
90 cl::desc("Default mispredict rate (initialized to 25%)."));
91
92static cl::opt<bool>
93 DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden,
94 cl::init(false),
95 cl::desc("Disable loop-level heuristics."));
96
97namespace {
98
99class SelectOptimize : public FunctionPass {
100 const TargetMachine *TM = nullptr;
101 const TargetSubtargetInfo *TSI;
102 const TargetLowering *TLI = nullptr;
103 const TargetTransformInfo *TTI = nullptr;
104 const LoopInfo *LI;
105 DominatorTree *DT;
106 std::unique_ptr<BlockFrequencyInfo> BFI;
107 std::unique_ptr<BranchProbabilityInfo> BPI;
110 TargetSchedModel TSchedModel;
111
112public:
113 static char ID;
114
115 SelectOptimize() : FunctionPass(ID) {
117 }
118
119 bool runOnFunction(Function &F) override;
120
121 void getAnalysisUsage(AnalysisUsage &AU) const override {
128 }
129
130private:
131 // Select groups consist of consecutive select instructions with the same
132 // condition.
133 using SelectGroup = SmallVector<SelectInst *, 2>;
134 using SelectGroups = SmallVector<SelectGroup, 2>;
135
137
138 struct CostInfo {
139 /// Predicated cost (with selects as conditional moves).
140 Scaled64 PredCost;
141 /// Non-predicated cost (with selects converted to branches).
142 Scaled64 NonPredCost;
143 };
144
145 // Converts select instructions of a function to conditional jumps when deemed
146 // profitable. Returns true if at least one select was converted.
147 bool optimizeSelects(Function &F);
148
149 // Heuristics for determining which select instructions can be profitably
150 // conveted to branches. Separate heuristics for selects in inner-most loops
151 // and the rest of code regions (base heuristics for non-inner-most loop
152 // regions).
153 void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups);
154 void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups);
155
156 // Converts to branches the select groups that were deemed
157 // profitable-to-convert.
158 void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
159
160 // Splits selects of a given basic block into select groups.
161 void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
162
163 // Determines for which select groups it is profitable converting to branches
164 // (base and inner-most-loop heuristics).
165 void findProfitableSIGroupsBase(SelectGroups &SIGroups,
166 SelectGroups &ProfSIGroups);
167 void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups,
168 SelectGroups &ProfSIGroups);
169
170 // Determines if a select group should be converted to a branch (base
171 // heuristics).
172 bool isConvertToBranchProfitableBase(const SmallVector<SelectInst *, 2> &ASI);
173
174 // Returns true if there are expensive instructions in the cold value
175 // operand's (if any) dependence slice of any of the selects of the given
176 // group.
177 bool hasExpensiveColdOperand(const SmallVector<SelectInst *, 2> &ASI);
178
179 // For a given source instruction, collect its backwards dependence slice
180 // consisting of instructions exclusively computed for producing the operands
181 // of the source instruction.
182 void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice,
183 Instruction *SI, bool ForSinking = false);
184
185 // Returns true if the condition of the select is highly predictable.
186 bool isSelectHighlyPredictable(const SelectInst *SI);
187
188 // Loop-level checks to determine if a non-predicated version (with branches)
189 // of the given loop is more profitable than its predicated version.
190 bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]);
191
192 // Computes instruction and loop-critical-path costs for both the predicated
193 // and non-predicated version of the given loop.
194 bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups,
196 CostInfo *LoopCost);
197
198 // Returns a set of all the select instructions in the given select groups.
199 SmallPtrSet<const Instruction *, 2> getSIset(const SelectGroups &SIGroups);
200
201 // Returns the latency cost of a given instruction.
202 std::optional<uint64_t> computeInstCost(const Instruction *I);
203
204 // Returns the misprediction cost of a given select when converted to branch.
205 Scaled64 getMispredictionCost(const SelectInst *SI, const Scaled64 CondCost);
206
207 // Returns the cost of a branch when the prediction is correct.
208 Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
209 const SelectInst *SI);
210
211 // Returns true if the target architecture supports lowering a given select.
212 bool isSelectKindSupported(SelectInst *SI);
213};
214} // namespace
215
216char SelectOptimize::ID = 0;
217
218INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
219 false)
226INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
227 false)
228
229FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); }
230
231bool SelectOptimize::runOnFunction(Function &F) {
232 TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
233 TSI = TM->getSubtargetImpl(F);
234 TLI = TSI->getTargetLowering();
235
236 // If none of the select types is supported then skip this pass.
237 // This is an optimization pass. Legality issues will be handled by
238 // instruction selection.
239 if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) &&
240 !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) &&
241 !TLI->isSelectSupported(TargetLowering::VectorMaskSelect))
242 return false;
243
244 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
245
247 return false;
248
249 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
250 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
251 BPI.reset(new BranchProbabilityInfo(F, *LI));
252 BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
253 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
254 ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
255 TSchedModel.init(TSI);
256
257 // When optimizing for size, selects are preferable over branches.
258 if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get()))
259 return false;
260
261 return optimizeSelects(F);
262}
263
264bool SelectOptimize::optimizeSelects(Function &F) {
265 // Determine for which select groups it is profitable converting to branches.
266 SelectGroups ProfSIGroups;
267 // Base heuristics apply only to non-loops and outer loops.
268 optimizeSelectsBase(F, ProfSIGroups);
269 // Separate heuristics for inner-most loops.
270 optimizeSelectsInnerLoops(F, ProfSIGroups);
271
272 // Convert to branches the select groups that were deemed
273 // profitable-to-convert.
274 convertProfitableSIGroups(ProfSIGroups);
275
276 // Code modified if at least one select group was converted.
277 return !ProfSIGroups.empty();
278}
279
280void SelectOptimize::optimizeSelectsBase(Function &F,
281 SelectGroups &ProfSIGroups) {
282 // Collect all the select groups.
283 SelectGroups SIGroups;
284 for (BasicBlock &BB : F) {
285 // Base heuristics apply only to non-loops and outer loops.
286 Loop *L = LI->getLoopFor(&BB);
287 if (L && L->isInnermost())
288 continue;
289 collectSelectGroups(BB, SIGroups);
290 }
291
292 // Determine for which select groups it is profitable converting to branches.
293 findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
294}
295
296void SelectOptimize::optimizeSelectsInnerLoops(Function &F,
297 SelectGroups &ProfSIGroups) {
298 SmallVector<Loop *, 4> Loops(LI->begin(), LI->end());
299 // Need to check size on each iteration as we accumulate child loops.
300 for (unsigned long i = 0; i < Loops.size(); ++i)
301 for (Loop *ChildL : Loops[i]->getSubLoops())
302 Loops.push_back(ChildL);
303
304 for (Loop *L : Loops) {
305 if (!L->isInnermost())
306 continue;
307
308 SelectGroups SIGroups;
309 for (BasicBlock *BB : L->getBlocks())
310 collectSelectGroups(*BB, SIGroups);
311
312 findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
313 }
314}
315
316/// If \p isTrue is true, return the true value of \p SI, otherwise return
317/// false value of \p SI. If the true/false value of \p SI is defined by any
318/// select instructions in \p Selects, look through the defining select
319/// instruction until the true/false value is not defined in \p Selects.
320static Value *
323 Value *V = nullptr;
324 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
325 DefSI = dyn_cast<SelectInst>(V)) {
326 assert(DefSI->getCondition() == SI->getCondition() &&
327 "The condition of DefSI does not match with SI");
328 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
329 }
330 assert(V && "Failed to get select true/false value");
331 return V;
332}
333
334void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
335 for (SelectGroup &ASI : ProfSIGroups) {
336 // The code transformation here is a modified version of the sinking
337 // transformation in CodeGenPrepare::optimizeSelectInst with a more
338 // aggressive strategy of which instructions to sink.
339 //
340 // TODO: eliminate the redundancy of logic transforming selects to branches
341 // by removing CodeGenPrepare::optimizeSelectInst and optimizing here
342 // selects for all cases (with and without profile information).
343
344 // Transform a sequence like this:
345 // start:
346 // %cmp = cmp uge i32 %a, %b
347 // %sel = select i1 %cmp, i32 %c, i32 %d
348 //
349 // Into:
350 // start:
351 // %cmp = cmp uge i32 %a, %b
352 // %cmp.frozen = freeze %cmp
353 // br i1 %cmp.frozen, label %select.true, label %select.false
354 // select.true:
355 // br label %select.end
356 // select.false:
357 // br label %select.end
358 // select.end:
359 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
360 //
361 // %cmp should be frozen, otherwise it may introduce undefined behavior.
362 // In addition, we may sink instructions that produce %c or %d into the
363 // destination(s) of the new branch.
364 // If the true or false blocks do not contain a sunken instruction, that
365 // block and its branch may be optimized away. In that case, one side of the
366 // first branch will point directly to select.end, and the corresponding PHI
367 // predecessor block will be the start block.
368
369 // Find all the instructions that can be soundly sunk to the true/false
370 // blocks. These are instructions that are computed solely for producing the
371 // operands of the select instructions in the group and can be sunk without
372 // breaking the semantics of the LLVM IR (e.g., cannot sink instructions
373 // with side effects).
374 SmallVector<std::stack<Instruction *>, 2> TrueSlices, FalseSlices;
375 typedef std::stack<Instruction *>::size_type StackSizeType;
376 StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;
377 for (SelectInst *SI : ASI) {
378 // For each select, compute the sinkable dependence chains of the true and
379 // false operands.
380 if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue())) {
381 std::stack<Instruction *> TrueSlice;
382 getExclBackwardsSlice(TI, TrueSlice, SI, true);
383 maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
384 TrueSlices.push_back(TrueSlice);
385 }
386 if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue())) {
387 std::stack<Instruction *> FalseSlice;
388 getExclBackwardsSlice(FI, FalseSlice, SI, true);
389 maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
390 FalseSlices.push_back(FalseSlice);
391 }
392 }
393 // In the case of multiple select instructions in the same group, the order
394 // of non-dependent instructions (instructions of different dependence
395 // slices) in the true/false blocks appears to affect performance.
396 // Interleaving the slices seems to experimentally be the optimal approach.
397 // This interleaving scheduling allows for more ILP (with a natural downside
398 // of increasing a bit register pressure) compared to a simple ordering of
399 // one whole chain after another. One would expect that this ordering would
400 // not matter since the scheduling in the backend of the compiler would
401 // take care of it, but apparently the scheduler fails to deliver optimal
402 // ILP with a naive ordering here.
403 SmallVector<Instruction *, 2> TrueSlicesInterleaved, FalseSlicesInterleaved;
404 for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {
405 for (auto &S : TrueSlices) {
406 if (!S.empty()) {
407 TrueSlicesInterleaved.push_back(S.top());
408 S.pop();
409 }
410 }
411 }
412 for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {
413 for (auto &S : FalseSlices) {
414 if (!S.empty()) {
415 FalseSlicesInterleaved.push_back(S.top());
416 S.pop();
417 }
418 }
419 }
420
421 // We split the block containing the select(s) into two blocks.
422 SelectInst *SI = ASI.front();
423 SelectInst *LastSI = ASI.back();
424 BasicBlock *StartBlock = SI->getParent();
425 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
426 BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
427 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
428 // Delete the unconditional branch that was just created by the split.
429 StartBlock->getTerminator()->eraseFromParent();
430
431 // Move any debug/pseudo instructions that were in-between the select
432 // group to the newly-created end block.
433 SmallVector<Instruction *, 2> DebugPseudoINS;
434 auto DIt = SI->getIterator();
435 while (&*DIt != LastSI) {
436 if (DIt->isDebugOrPseudoInst())
437 DebugPseudoINS.push_back(&*DIt);
438 DIt++;
439 }
440 for (auto *DI : DebugPseudoINS) {
441 DI->moveBefore(&*EndBlock->getFirstInsertionPt());
442 }
443
444 // These are the new basic blocks for the conditional branch.
445 // At least one will become an actual new basic block.
446 BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;
447 BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr;
448 if (!TrueSlicesInterleaved.empty()) {
449 TrueBlock = BasicBlock::Create(LastSI->getContext(), "select.true.sink",
450 EndBlock->getParent(), EndBlock);
451 TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
452 TrueBranch->setDebugLoc(LastSI->getDebugLoc());
453 for (Instruction *TrueInst : TrueSlicesInterleaved)
454 TrueInst->moveBefore(TrueBranch);
455 }
456 if (!FalseSlicesInterleaved.empty()) {
457 FalseBlock = BasicBlock::Create(LastSI->getContext(), "select.false.sink",
458 EndBlock->getParent(), EndBlock);
459 FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
460 FalseBranch->setDebugLoc(LastSI->getDebugLoc());
461 for (Instruction *FalseInst : FalseSlicesInterleaved)
462 FalseInst->moveBefore(FalseBranch);
463 }
464 // If there was nothing to sink, then arbitrarily choose the 'false' side
465 // for a new input value to the PHI.
466 if (TrueBlock == FalseBlock) {
467 assert(TrueBlock == nullptr &&
468 "Unexpected basic block transform while optimizing select");
469
470 FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
471 EndBlock->getParent(), EndBlock);
472 auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
473 FalseBranch->setDebugLoc(SI->getDebugLoc());
474 }
475
476 // Insert the real conditional branch based on the original condition.
477 // If we did not create a new block for one of the 'true' or 'false' paths
478 // of the condition, it means that side of the branch goes to the end block
479 // directly and the path originates from the start block from the point of
480 // view of the new PHI.
481 BasicBlock *TT, *FT;
482 if (TrueBlock == nullptr) {
483 TT = EndBlock;
484 FT = FalseBlock;
485 TrueBlock = StartBlock;
486 } else if (FalseBlock == nullptr) {
487 TT = TrueBlock;
488 FT = EndBlock;
489 FalseBlock = StartBlock;
490 } else {
491 TT = TrueBlock;
492 FT = FalseBlock;
493 }
494 IRBuilder<> IB(SI);
495 auto *CondFr =
496 IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
497 IB.CreateCondBr(CondFr, TT, FT, SI);
498
500 INS.insert(ASI.begin(), ASI.end());
501 // Use reverse iterator because later select may use the value of the
502 // earlier select, and we need to propagate value through earlier select
503 // to get the PHI operand.
504 for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
505 SelectInst *SI = *It;
506 // The select itself is replaced with a PHI Node.
507 PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
508 PN->takeName(SI);
509 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
510 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
511 PN->setDebugLoc(SI->getDebugLoc());
512
513 SI->replaceAllUsesWith(PN);
514 SI->eraseFromParent();
515 INS.erase(SI);
516 ++NumSelectsConverted;
517 }
518 }
519}
520
521static bool isSpecialSelect(SelectInst *SI) {
522 using namespace llvm::PatternMatch;
523
524 // If the select is a logical-and/logical-or then it is better treated as a
525 // and/or by the backend.
528 return true;
529
530 return false;
531}
532
533void SelectOptimize::collectSelectGroups(BasicBlock &BB,
534 SelectGroups &SIGroups) {
535 BasicBlock::iterator BBIt = BB.begin();
536 while (BBIt != BB.end()) {
537 Instruction *I = &*BBIt++;
538 if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
539 if (isSpecialSelect(SI))
540 continue;
541
542 SelectGroup SIGroup;
543 SIGroup.push_back(SI);
544 while (BBIt != BB.end()) {
545 Instruction *NI = &*BBIt;
546 SelectInst *NSI = dyn_cast<SelectInst>(NI);
547 if (NSI && SI->getCondition() == NSI->getCondition()) {
548 SIGroup.push_back(NSI);
549 } else if (!NI->isDebugOrPseudoInst()) {
550 // Debug/pseudo instructions should be skipped and not prevent the
551 // formation of a select group.
552 break;
553 }
554 ++BBIt;
555 }
556
557 // If the select type is not supported, no point optimizing it.
558 // Instruction selection will take care of it.
559 if (!isSelectKindSupported(SI))
560 continue;
561
562 SIGroups.push_back(SIGroup);
563 }
564 }
565}
566
567void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups,
568 SelectGroups &ProfSIGroups) {
569 for (SelectGroup &ASI : SIGroups) {
570 ++NumSelectOptAnalyzed;
571 if (isConvertToBranchProfitableBase(ASI))
572 ProfSIGroups.push_back(ASI);
573 }
574}
575
578 LLVM_DEBUG(dbgs() << Rem.getMsg() << "\n");
579 ORE->emit(Rem);
580}
581
582void SelectOptimize::findProfitableSIGroupsInnerLoops(
583 const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
584 NumSelectOptAnalyzed += SIGroups.size();
585 // For each select group in an inner-most loop,
586 // a branch is more preferable than a select/conditional-move if:
587 // i) conversion to branches for all the select groups of the loop satisfies
588 // loop-level heuristics including reducing the loop's critical path by
589 // some threshold (see SelectOptimize::checkLoopHeuristics); and
590 // ii) the total cost of the select group is cheaper with a branch compared
591 // to its predicated version. The cost is in terms of latency and the cost
592 // of a select group is the cost of its most expensive select instruction
593 // (assuming infinite resources and thus fully leveraging available ILP).
594
596 CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
598 if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
599 !checkLoopHeuristics(L, LoopCost)) {
600 return;
601 }
602
603 for (SelectGroup &ASI : SIGroups) {
604 // Assuming infinite resources, the cost of a group of instructions is the
605 // cost of the most expensive instruction of the group.
606 Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
607 for (SelectInst *SI : ASI) {
608 SelectCost = std::max(SelectCost, InstCostMap[SI].PredCost);
609 BranchCost = std::max(BranchCost, InstCostMap[SI].NonPredCost);
610 }
611 if (BranchCost < SelectCost) {
612 OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", ASI.front());
613 OR << "Profitable to convert to branch (loop analysis). BranchCost="
614 << BranchCost.toString() << ", SelectCost=" << SelectCost.toString()
615 << ". ";
616 EmitAndPrintRemark(ORE, OR);
617 ++NumSelectConvertedLoop;
618 ProfSIGroups.push_back(ASI);
619 } else {
620 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front());
621 ORmiss << "Select is more profitable (loop analysis). BranchCost="
622 << BranchCost.toString()
623 << ", SelectCost=" << SelectCost.toString() << ". ";
624 EmitAndPrintRemark(ORE, ORmiss);
625 }
626 }
627}
628
629bool SelectOptimize::isConvertToBranchProfitableBase(
630 const SmallVector<SelectInst *, 2> &ASI) {
631 SelectInst *SI = ASI.front();
632 LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI << "\n");
633 OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI);
634 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI);
635
636 // Skip cold basic blocks. Better to optimize for size for cold blocks.
637 if (PSI->isColdBlock(SI->getParent(), BFI.get())) {
638 ++NumSelectColdBB;
639 ORmiss << "Not converted to branch because of cold basic block. ";
640 EmitAndPrintRemark(ORE, ORmiss);
641 return false;
642 }
643
644 // If unpredictable, branch form is less profitable.
645 if (SI->getMetadata(LLVMContext::MD_unpredictable)) {
646 ++NumSelectUnPred;
647 ORmiss << "Not converted to branch because of unpredictable branch. ";
648 EmitAndPrintRemark(ORE, ORmiss);
649 return false;
650 }
651
652 // If highly predictable, branch form is more profitable, unless a
653 // predictable select is inexpensive in the target architecture.
654 if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) {
655 ++NumSelectConvertedHighPred;
656 OR << "Converted to branch because of highly predictable branch. ";
657 EmitAndPrintRemark(ORE, OR);
658 return true;
659 }
660
661 // Look for expensive instructions in the cold operand's (if any) dependence
662 // slice of any of the selects in the group.
663 if (hasExpensiveColdOperand(ASI)) {
664 ++NumSelectConvertedExpColdOperand;
665 OR << "Converted to branch because of expensive cold operand.";
666 EmitAndPrintRemark(ORE, OR);
667 return true;
668 }
669
670 ORmiss << "Not profitable to convert to branch (base heuristic).";
671 EmitAndPrintRemark(ORE, ORmiss);
672 return false;
673}
674
676 uint64_t Denominator) {
677 return (Numerator + (Denominator / 2)) / Denominator;
678}
679
680bool SelectOptimize::hasExpensiveColdOperand(
681 const SmallVector<SelectInst *, 2> &ASI) {
682 bool ColdOperand = false;
683 uint64_t TrueWeight, FalseWeight, TotalWeight;
684 if (extractBranchWeights(*ASI.front(), TrueWeight, FalseWeight)) {
685 uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
686 TotalWeight = TrueWeight + FalseWeight;
687 // Is there a path with frequency <ColdOperandThreshold% (default:20%) ?
688 ColdOperand = TotalWeight * ColdOperandThreshold > 100 * MinWeight;
689 } else if (PSI->hasProfileSummary()) {
690 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front());
691 ORmiss << "Profile data available but missing branch-weights metadata for "
692 "select instruction. ";
693 EmitAndPrintRemark(ORE, ORmiss);
694 }
695 if (!ColdOperand)
696 return false;
697 // Check if the cold path's dependence slice is expensive for any of the
698 // selects of the group.
699 for (SelectInst *SI : ASI) {
700 Instruction *ColdI = nullptr;
701 uint64_t HotWeight;
702 if (TrueWeight < FalseWeight) {
703 ColdI = dyn_cast<Instruction>(SI->getTrueValue());
704 HotWeight = FalseWeight;
705 } else {
706 ColdI = dyn_cast<Instruction>(SI->getFalseValue());
707 HotWeight = TrueWeight;
708 }
709 if (ColdI) {
710 std::stack<Instruction *> ColdSlice;
711 getExclBackwardsSlice(ColdI, ColdSlice, SI);
712 InstructionCost SliceCost = 0;
713 while (!ColdSlice.empty()) {
714 SliceCost += TTI->getInstructionCost(ColdSlice.top(),
716 ColdSlice.pop();
717 }
718 // The colder the cold value operand of the select is the more expensive
719 // the cmov becomes for computing the cold value operand every time. Thus,
720 // the colder the cold operand is the more its cost counts.
721 // Get nearest integer cost adjusted for coldness.
722 InstructionCost AdjSliceCost =
723 divideNearest(SliceCost * HotWeight, TotalWeight);
724 if (AdjSliceCost >=
726 return true;
727 }
728 }
729 return false;
730}
731
732// Check if it is safe to move LoadI next to the SI.
733// Conservatively assume it is safe only if there is no instruction
734// modifying memory in-between the load and the select instruction.
735static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI) {
736 // Assume loads from different basic blocks are unsafe to move.
737 if (LoadI->getParent() != SI->getParent())
738 return false;
739 auto It = LoadI->getIterator();
740 while (&*It != SI) {
741 if (It->mayWriteToMemory())
742 return false;
743 It++;
744 }
745 return true;
746}
747
748// For a given source instruction, collect its backwards dependence slice
749// consisting of instructions exclusively computed for the purpose of producing
750// the operands of the source instruction. As an approximation
751// (sufficiently-accurate in practice), we populate this set with the
752// instructions of the backwards dependence slice that only have one-use and
753// form an one-use chain that leads to the source instruction.
754void SelectOptimize::getExclBackwardsSlice(Instruction *I,
755 std::stack<Instruction *> &Slice,
756 Instruction *SI, bool ForSinking) {
758 std::queue<Instruction *> Worklist;
759 Worklist.push(I);
760 while (!Worklist.empty()) {
761 Instruction *II = Worklist.front();
762 Worklist.pop();
763
764 // Avoid cycles.
765 if (!Visited.insert(II).second)
766 continue;
767
768 if (!II->hasOneUse())
769 continue;
770
771 // Cannot soundly sink instructions with side-effects.
772 // Terminator or phi instructions cannot be sunk.
773 // Avoid sinking other select instructions (should be handled separetely).
774 if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() ||
775 isa<SelectInst>(II) || isa<PHINode>(II)))
776 continue;
777
778 // Avoid sinking loads in order not to skip state-modifying instructions,
779 // that may alias with the loaded address.
780 // Only allow sinking of loads within the same basic block that are
781 // conservatively proven to be safe.
782 if (ForSinking && II->mayReadFromMemory() && !isSafeToSinkLoad(II, SI))
783 continue;
784
785 // Avoid considering instructions with less frequency than the source
786 // instruction (i.e., avoid colder code regions of the dependence slice).
787 if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent()))
788 continue;
789
790 // Eligible one-use instruction added to the dependence slice.
791 Slice.push(II);
792
793 // Explore all the operands of the current instruction to expand the slice.
794 for (unsigned k = 0; k < II->getNumOperands(); ++k)
795 if (auto *OpI = dyn_cast<Instruction>(II->getOperand(k)))
796 Worklist.push(OpI);
797 }
798}
799
800bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) {
801 uint64_t TrueWeight, FalseWeight;
802 if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
803 uint64_t Max = std::max(TrueWeight, FalseWeight);
804 uint64_t Sum = TrueWeight + FalseWeight;
805 if (Sum != 0) {
806 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
807 if (Probability > TTI->getPredictableBranchThreshold())
808 return true;
809 }
810 }
811 return false;
812}
813
814bool SelectOptimize::checkLoopHeuristics(const Loop *L,
815 const CostInfo LoopCost[2]) {
816 // Loop-level checks to determine if a non-predicated version (with branches)
817 // of the loop is more profitable than its predicated version.
818
820 return true;
821
822 OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti",
823 L->getHeader()->getFirstNonPHI());
824
825 if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
826 LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
827 ORmissL << "No select conversion in the loop due to no reduction of loop's "
828 "critical path. ";
829 EmitAndPrintRemark(ORE, ORmissL);
830 return false;
831 }
832
833 Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
834 LoopCost[1].PredCost - LoopCost[1].NonPredCost};
835
836 // Profitably converting to branches need to reduce the loop's critical path
837 // by at least some threshold (absolute gain of GainCycleThreshold cycles and
838 // relative gain of 12.5%).
839 if (Gain[1] < Scaled64::get(GainCycleThreshold) ||
840 Gain[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) {
841 Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost;
842 ORmissL << "No select conversion in the loop due to small reduction of "
843 "loop's critical path. Gain="
844 << Gain[1].toString()
845 << ", RelativeGain=" << RelativeGain.toString() << "%. ";
846 EmitAndPrintRemark(ORE, ORmissL);
847 return false;
848 }
849
850 // If the loop's critical path involves loop-carried dependences, the gradient
851 // of the gain needs to be at least GainGradientThreshold% (defaults to 25%).
852 // This check ensures that the latency reduction for the loop's critical path
853 // keeps decreasing with sufficient rate beyond the two analyzed loop
854 // iterations.
855 if (Gain[1] > Gain[0]) {
856 Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) /
857 (LoopCost[1].PredCost - LoopCost[0].PredCost);
858 if (GradientGain < Scaled64::get(GainGradientThreshold)) {
859 ORmissL << "No select conversion in the loop due to small gradient gain. "
860 "GradientGain="
861 << GradientGain.toString() << "%. ";
862 EmitAndPrintRemark(ORE, ORmissL);
863 return false;
864 }
865 }
866 // If the gain decreases it is not profitable to convert.
867 else if (Gain[1] < Gain[0]) {
868 ORmissL
869 << "No select conversion in the loop due to negative gradient gain. ";
870 EmitAndPrintRemark(ORE, ORmissL);
871 return false;
872 }
873
874 // Non-predicated version of the loop is more profitable than its
875 // predicated version.
876 return true;
877}
878
879// Computes instruction and loop-critical-path costs for both the predicated
880// and non-predicated version of the given loop.
881// Returns false if unable to compute these costs due to invalid cost of loop
882// instruction(s).
883bool SelectOptimize::computeLoopCosts(
884 const Loop *L, const SelectGroups &SIGroups,
885 DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {
886 LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop "
887 << L->getHeader()->getName() << "\n");
888 const auto &SIset = getSIset(SIGroups);
889 // Compute instruction and loop-critical-path costs across two iterations for
890 // both predicated and non-predicated version.
891 const unsigned Iterations = 2;
892 for (unsigned Iter = 0; Iter < Iterations; ++Iter) {
893 // Cost of the loop's critical path.
894 CostInfo &MaxCost = LoopCost[Iter];
895 for (BasicBlock *BB : L->getBlocks()) {
896 for (const Instruction &I : *BB) {
897 if (I.isDebugOrPseudoInst())
898 continue;
899 // Compute the predicated and non-predicated cost of the instruction.
900 Scaled64 IPredCost = Scaled64::getZero(),
901 INonPredCost = Scaled64::getZero();
902
903 // Assume infinite resources that allow to fully exploit the available
904 // instruction-level parallelism.
905 // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost)
906 for (const Use &U : I.operands()) {
907 auto UI = dyn_cast<Instruction>(U.get());
908 if (!UI)
909 continue;
910 if (InstCostMap.count(UI)) {
911 IPredCost = std::max(IPredCost, InstCostMap[UI].PredCost);
912 INonPredCost = std::max(INonPredCost, InstCostMap[UI].NonPredCost);
913 }
914 }
915 auto ILatency = computeInstCost(&I);
916 if (!ILatency) {
917 OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I);
918 ORmissL << "Invalid instruction cost preventing analysis and "
919 "optimization of the inner-most loop containing this "
920 "instruction. ";
921 EmitAndPrintRemark(ORE, ORmissL);
922 return false;
923 }
924 IPredCost += Scaled64::get(*ILatency);
925 INonPredCost += Scaled64::get(*ILatency);
926
927 // For a select that can be converted to branch,
928 // compute its cost as a branch (non-predicated cost).
929 //
930 // BranchCost = PredictedPathCost + MispredictCost
931 // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb
932 // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate
933 if (SIset.contains(&I)) {
934 auto SI = cast<SelectInst>(&I);
935
936 Scaled64 TrueOpCost = Scaled64::getZero(),
937 FalseOpCost = Scaled64::getZero();
938 if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue()))
939 if (InstCostMap.count(TI))
940 TrueOpCost = InstCostMap[TI].NonPredCost;
941 if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue()))
942 if (InstCostMap.count(FI))
943 FalseOpCost = InstCostMap[FI].NonPredCost;
944 Scaled64 PredictedPathCost =
945 getPredictedPathCost(TrueOpCost, FalseOpCost, SI);
946
947 Scaled64 CondCost = Scaled64::getZero();
948 if (auto *CI = dyn_cast<Instruction>(SI->getCondition()))
949 if (InstCostMap.count(CI))
950 CondCost = InstCostMap[CI].NonPredCost;
951 Scaled64 MispredictCost = getMispredictionCost(SI, CondCost);
952
953 INonPredCost = PredictedPathCost + MispredictCost;
954 }
955 LLVM_DEBUG(dbgs() << " " << ILatency << "/" << IPredCost << "/"
956 << INonPredCost << " for " << I << "\n");
957
958 InstCostMap[&I] = {IPredCost, INonPredCost};
959 MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
960 MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
961 }
962 }
963 LLVM_DEBUG(dbgs() << "Iteration " << Iter + 1
964 << " MaxCost = " << MaxCost.PredCost << " "
965 << MaxCost.NonPredCost << "\n");
966 }
967 return true;
968}
969
971SelectOptimize::getSIset(const SelectGroups &SIGroups) {
973 for (const SelectGroup &ASI : SIGroups)
974 for (const SelectInst *SI : ASI)
975 SIset.insert(SI);
976 return SIset;
977}
978
979std::optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) {
980 InstructionCost ICost =
982 if (auto OC = ICost.getValue())
983 return std::optional<uint64_t>(*OC);
984 return std::nullopt;
985}
986
988SelectOptimize::getMispredictionCost(const SelectInst *SI,
989 const Scaled64 CondCost) {
990 uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
991
992 // Account for the default misprediction rate when using a branch
993 // (conservatively set to 25% by default).
994 uint64_t MispredictRate = MispredictDefaultRate;
995 // If the select condition is obviously predictable, then the misprediction
996 // rate is zero.
997 if (isSelectHighlyPredictable(SI))
998 MispredictRate = 0;
999
1000 // CondCost is included to account for cases where the computation of the
1001 // condition is part of a long dependence chain (potentially loop-carried)
1002 // that would delay detection of a misprediction and increase its cost.
1003 Scaled64 MispredictCost =
1004 std::max(Scaled64::get(MispredictPenalty), CondCost) *
1005 Scaled64::get(MispredictRate);
1006 MispredictCost /= Scaled64::get(100);
1007
1008 return MispredictCost;
1009}
1010
1011// Returns the cost of a branch when the prediction is correct.
1012// TrueCost * TrueProbability + FalseCost * FalseProbability.
1014SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
1015 const SelectInst *SI) {
1016 Scaled64 PredPathCost;
1017 uint64_t TrueWeight, FalseWeight;
1018 if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
1019 uint64_t SumWeight = TrueWeight + FalseWeight;
1020 if (SumWeight != 0) {
1021 PredPathCost = TrueCost * Scaled64::get(TrueWeight) +
1022 FalseCost * Scaled64::get(FalseWeight);
1023 PredPathCost /= Scaled64::get(SumWeight);
1024 return PredPathCost;
1025 }
1026 }
1027 // Without branch weight metadata, we assume 75% for the one path and 25% for
1028 // the other, and pick the result with the biggest cost.
1029 PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,
1030 FalseCost * Scaled64::get(3) + TrueCost);
1031 PredPathCost /= Scaled64::get(4);
1032 return PredPathCost;
1033}
1034
1035bool SelectOptimize::isSelectKindSupported(SelectInst *SI) {
1036 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
1037 if (VectorCond)
1038 return false;
1040 if (SI->getType()->isVectorTy())
1041 SelectKind = TargetLowering::ScalarCondVectorVal;
1042 else
1043 SelectKind = TargetLowering::ScalarValSelect;
1044 return TLI->isSelectSupported(SelectKind);
1045}
#define LLVM_DEBUG(X)
Definition: Debug.h:101
Hexagon Hardware Loops
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file contains the declarations for profiling metadata utility functions.
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI)
static InstructionCost divideNearest(InstructionCost Numerator, uint64_t Denominator)
Optimize selects
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static cl::opt< unsigned > ColdOperandMaxCostMultiplier("cold-operand-max-cost-multiplier", cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " "slice of a cold operand to be considered inexpensive."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > ColdOperandThreshold("cold-operand-threshold", cl::desc("Maximum frequency of path for an operand to be considered cold."), cl::init(20), cl::Hidden)
static cl::opt< bool > DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, cl::init(false), cl::desc("Disable loop-level heuristics."))
static bool isSpecialSelect(SelectInst *SI)
static cl::opt< unsigned > GainCycleThreshold("select-opti-loop-cycle-gain-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
#define DEBUG_TYPE
static cl::opt< unsigned > MispredictDefaultRate("mispredict-default-rate", cl::Hidden, cl::init(25), cl::desc("Default mispredict rate (initialized to 25%)."))
static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE, DiagnosticInfoOptimizationBase &Rem)
static cl::opt< unsigned > GainGradientThreshold("select-opti-loop-gradient-gain-threshold", cl::desc("Gradient gain threshold (%)."), cl::init(25), cl::Hidden)
static cl::opt< unsigned > GainRelativeThreshold("select-opti-loop-relative-gain-threshold", cl::desc("Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), cl::init(8), cl::Hidden)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator end()
Definition: BasicBlock.h:325
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:323
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:254
const Instruction & front() const
Definition: BasicBlock.h:335
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:105
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:410
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Analysis providing branch probability information.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
Common features for diagnostics dealing with optimization remarks that are used by both IR and MIR pa...
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2564
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:358
const BasicBlock * getParent() const
Definition: Instruction.h:90
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
Definition: Instruction.h:171
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:82
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:355
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1293
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
std::string toString(unsigned Precision=DefaultPrecision)
Convert to a decimal representation in a string.
Definition: ScaledNumber.h:596
static ScaledNumber get(uint64_t N)
Definition: ScaledNumber.h:526
static ScaledNumber getZero()
Definition: ScaledNumber.h:521
This class represents the LLVM 'select' instruction.
const Value * getCondition() const
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
SelectSupportKind
Enum that describes what type of support for selects the target has.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
Target-Independent Code Generator Pass Configuration Options.
Provide an instruction scheduling machine model to CodeGen passes.
TargetSubtargetInfo - Generic base class for all target subtargets.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCK_Latency
The latency of instruction.
bool enableSelectOptimize() const
Should the Select Optimization pass be enabled and ran.
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
@ TCC_Expensive
The cost of a 'div' instruction on x86.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:381
self_iterator getIterator()
Definition: ilist_node.h:82
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:218
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createSelectOptimizePass()
This pass converts conditional moves to conditional jumps when profitable.
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
void initializeSelectOptimizePass(PassRegistry &)