LLVM 22.0.0git
SelectOptimize.cpp
Go to the documentation of this file.
1//===--- SelectOptimize.cpp - Convert select to branches if profitable ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass converts selects to conditional jumps when profitable.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/SetVector.h"
16#include "llvm/ADT/Statistic.h"
23#include "llvm/CodeGen/Passes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Dominators.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/Instruction.h"
36#include "llvm/Pass.h"
40#include <algorithm>
41#include <queue>
42#include <stack>
43
44using namespace llvm;
45using namespace llvm::PatternMatch;
46
47#define DEBUG_TYPE "select-optimize"
48
49STATISTIC(NumSelectOptAnalyzed,
50 "Number of select groups considered for conversion to branch");
51STATISTIC(NumSelectConvertedExpColdOperand,
52 "Number of select groups converted due to expensive cold operand");
53STATISTIC(NumSelectConvertedHighPred,
54 "Number of select groups converted due to high-predictability");
55STATISTIC(NumSelectUnPred,
56 "Number of select groups not converted due to unpredictability");
57STATISTIC(NumSelectColdBB,
58 "Number of select groups not converted due to cold basic block");
59STATISTIC(NumSelectConvertedLoop,
60 "Number of select groups converted due to loop-level analysis");
61STATISTIC(NumSelectsConverted, "Number of selects converted");
62
64 "cold-operand-threshold",
65 cl::desc("Maximum frequency of path for an operand to be considered cold."),
66 cl::init(20), cl::Hidden);
67
69 "cold-operand-max-cost-multiplier",
70 cl::desc("Maximum cost multiplier of TCC_expensive for the dependence "
71 "slice of a cold operand to be considered inexpensive."),
73
75 GainGradientThreshold("select-opti-loop-gradient-gain-threshold",
76 cl::desc("Gradient gain threshold (%)."),
77 cl::init(25), cl::Hidden);
78
80 GainCycleThreshold("select-opti-loop-cycle-gain-threshold",
81 cl::desc("Minimum gain per loop (in cycles) threshold."),
83
85 "select-opti-loop-relative-gain-threshold",
87 "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
89
91 "mispredict-default-rate", cl::Hidden, cl::init(25),
92 cl::desc("Default mispredict rate (initialized to 25%)."));
93
94static cl::opt<bool>
95 DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden,
96 cl::init(false),
97 cl::desc("Disable loop-level heuristics."));
98
99namespace {
100
101class SelectOptimizeImpl {
102 const TargetMachine *TM = nullptr;
103 const TargetSubtargetInfo *TSI = nullptr;
104 const TargetLowering *TLI = nullptr;
105 const TargetTransformInfo *TTI = nullptr;
106 const LoopInfo *LI = nullptr;
108 ProfileSummaryInfo *PSI = nullptr;
109 OptimizationRemarkEmitter *ORE = nullptr;
110 TargetSchedModel TSchedModel;
111
112public:
113 SelectOptimizeImpl() = default;
114 SelectOptimizeImpl(const TargetMachine *TM) : TM(TM){};
115 PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
116 bool runOnFunction(Function &F, Pass &P);
117
118 using Scaled64 = ScaledNumber<uint64_t>;
119
120 struct CostInfo {
121 /// Predicated cost (with selects as conditional moves).
122 Scaled64 PredCost;
123 /// Non-predicated cost (with selects converted to branches).
124 Scaled64 NonPredCost;
125 };
126
127 /// SelectLike is an abstraction over SelectInst and other operations that can
128 /// act like selects. For example Or(Zext(icmp), X) can be treated like
129 /// select(icmp, X|1, X).
130 class SelectLike {
131 /// The select (/or) instruction.
132 Instruction *I;
133 /// Whether this select is inverted, "not(cond), FalseVal, TrueVal", as
134 /// opposed to the original condition.
135 bool Inverted = false;
136
137 /// The index of the operand that depends on condition. Only for select-like
138 /// instruction such as Or/Add.
139 unsigned CondIdx;
140
141 public:
142 SelectLike(Instruction *I, bool Inverted = false, unsigned CondIdx = 0)
143 : I(I), Inverted(Inverted), CondIdx(CondIdx) {}
144
145 Instruction *getI() { return I; }
146 const Instruction *getI() const { return I; }
147
148 Type *getType() const { return I->getType(); }
149
150 unsigned getConditionOpIndex() { return CondIdx; };
151
152 /// Return the true value for the SelectLike instruction. Note this may not
153 /// exist for all SelectLike instructions. For example, for `or(zext(c), x)`
154 /// the true value would be `or(x,1)`. As this value does not exist, nullptr
155 /// is returned.
156 Value *getTrueValue(bool HonorInverts = true) const {
157 if (Inverted && HonorInverts)
158 return getFalseValue(/*HonorInverts=*/false);
159 if (auto *Sel = dyn_cast<SelectInst>(I))
160 return Sel->getTrueValue();
161 // Or(zext) case - The true value is Or(X), so return nullptr as the value
162 // does not yet exist.
163 if (isa<BinaryOperator>(I))
164 return nullptr;
165
166 llvm_unreachable("Unhandled case in getTrueValue");
167 }
168
169 /// Return the false value for the SelectLike instruction. For example the
170 /// getFalseValue of a select or `x` in `or(zext(c), x)` (which is
171 /// `select(c, x|1, x)`)
172 Value *getFalseValue(bool HonorInverts = true) const {
173 if (Inverted && HonorInverts)
174 return getTrueValue(/*HonorInverts=*/false);
175 if (auto *Sel = dyn_cast<SelectInst>(I))
176 return Sel->getFalseValue();
177 // We are on the branch where the condition is zero, which means BinOp
178 // does not perform any computation, and we can simply return the operand
179 // that is not related to the condition
180 if (auto *BO = dyn_cast<BinaryOperator>(I))
181 return BO->getOperand(1 - CondIdx);
182
183 llvm_unreachable("Unhandled case in getFalseValue");
184 }
185
186 /// Return the NonPredCost cost of the op on \p isTrue branch, given the
187 /// costs in \p InstCostMap. This may need to be generated for select-like
188 /// instructions.
189 Scaled64 getOpCostOnBranch(
190 bool IsTrue, const DenseMap<const Instruction *, CostInfo> &InstCostMap,
191 const TargetTransformInfo *TTI) {
192 auto *V = IsTrue ? getTrueValue() : getFalseValue();
193 if (V) {
194 if (auto *IV = dyn_cast<Instruction>(V)) {
195 auto It = InstCostMap.find(IV);
196 return It != InstCostMap.end() ? It->second.NonPredCost
198 }
199 return Scaled64::getZero();
200 }
201 // If getTrue(False)Value() return nullptr, it means we are dealing with
202 // select-like instructions on the branch where the actual computation is
203 // happening. In that case the cost is equal to the cost of computation +
204 // cost of non-dependant on condition operand
206 getI()->getOpcode(), I->getType(), TargetTransformInfo::TCK_Latency,
207 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
208 {TTI::OK_UniformConstantValue, TTI::OP_PowerOf2});
209 auto TotalCost = Scaled64::get(Cost.getValue());
210 if (auto *OpI = dyn_cast<Instruction>(I->getOperand(1 - CondIdx))) {
211 auto It = InstCostMap.find(OpI);
212 if (It != InstCostMap.end())
213 TotalCost += It->second.NonPredCost;
214 }
215 return TotalCost;
216 }
217 };
218
219private:
220 // Select groups consist of consecutive select-like instructions with the same
221 // condition. Between select-likes could be any number of auxiliary
222 // instructions related to the condition like not, zext, ashr/lshr
223 struct SelectGroup {
224 Value *Condition;
226 };
227 using SelectGroups = SmallVector<SelectGroup, 2>;
228
229 // Converts select instructions of a function to conditional jumps when deemed
230 // profitable. Returns true if at least one select was converted.
231 bool optimizeSelects(Function &F);
232
233 // Heuristics for determining which select instructions can be profitably
234 // conveted to branches. Separate heuristics for selects in inner-most loops
235 // and the rest of code regions (base heuristics for non-inner-most loop
236 // regions).
237 void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups);
238 void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups);
239
240 // Converts to branches the select groups that were deemed
241 // profitable-to-convert.
242 void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
243
244 // Splits selects of a given basic block into select groups.
245 void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
246
247 // Determines for which select groups it is profitable converting to branches
248 // (base and inner-most-loop heuristics).
249 void findProfitableSIGroupsBase(SelectGroups &SIGroups,
250 SelectGroups &ProfSIGroups);
251 void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups,
252 SelectGroups &ProfSIGroups);
253
254 // Determines if a select group should be converted to a branch (base
255 // heuristics).
256 bool isConvertToBranchProfitableBase(const SelectGroup &ASI);
257
258 // Returns true if there are expensive instructions in the cold value
259 // operand's (if any) dependence slice of any of the selects of the given
260 // group.
261 bool hasExpensiveColdOperand(const SelectGroup &ASI);
262
263 // For a given source instruction, collect its backwards dependence slice
264 // consisting of instructions exclusively computed for producing the operands
265 // of the source instruction.
266 void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice,
267 Instruction *SI, bool ForSinking = false);
268
269 // Returns true if the condition of the select is highly predictable.
270 bool isSelectHighlyPredictable(const SelectLike SI);
271
272 // Loop-level checks to determine if a non-predicated version (with branches)
273 // of the given loop is more profitable than its predicated version.
274 bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]);
275
276 // Computes instruction and loop-critical-path costs for both the predicated
277 // and non-predicated version of the given loop.
278 bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups,
279 DenseMap<const Instruction *, CostInfo> &InstCostMap,
280 CostInfo *LoopCost);
281
282 // Returns a set of all the select instructions in the given select groups.
283 SmallDenseMap<const Instruction *, SelectLike, 2>
284 getSImap(const SelectGroups &SIGroups);
285
286 // Returns a map from select-like instructions to the corresponding select
287 // group.
288 SmallDenseMap<const Instruction *, const SelectGroup *, 2>
289 getSGmap(const SelectGroups &SIGroups);
290
291 // Returns the latency cost of a given instruction.
292 std::optional<uint64_t> computeInstCost(const Instruction *I);
293
294 // Returns the misprediction cost of a given select when converted to branch.
295 Scaled64 getMispredictionCost(const SelectLike SI, const Scaled64 CondCost);
296
297 // Returns the cost of a branch when the prediction is correct.
298 Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
299 const SelectLike SI);
300
301 // Returns true if the target architecture supports lowering a given select.
302 bool isSelectKindSupported(const SelectLike SI);
303};
304
305class SelectOptimize : public FunctionPass {
306 SelectOptimizeImpl Impl;
307
308public:
309 static char ID;
310
311 SelectOptimize() : FunctionPass(ID) {
313 }
314
315 bool runOnFunction(Function &F) override {
316 return Impl.runOnFunction(F, *this);
317 }
318
319 void getAnalysisUsage(AnalysisUsage &AU) const override {
320 AU.addRequired<ProfileSummaryInfoWrapperPass>();
321 AU.addRequired<TargetPassConfig>();
322 AU.addRequired<TargetTransformInfoWrapperPass>();
323 AU.addRequired<LoopInfoWrapperPass>();
324 AU.addRequired<BlockFrequencyInfoWrapperPass>();
325 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
326 }
327};
328
329} // namespace
330
333 SelectOptimizeImpl Impl(TM);
334 return Impl.run(F, FAM);
335}
336
337char SelectOptimize::ID = 0;
338
339INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
340 false)
347INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
348 false)
349
350FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); }
351
352PreservedAnalyses SelectOptimizeImpl::run(Function &F,
354 TSI = TM->getSubtargetImpl(F);
355 TLI = TSI->getTargetLowering();
356
357 // If none of the select types are supported then skip this pass.
358 // This is an optimization pass. Legality issues will be handled by
359 // instruction selection.
363 return PreservedAnalyses::all();
364
365 TTI = &FAM.getResult<TargetIRAnalysis>(F);
366 if (!TTI->enableSelectOptimize())
367 return PreservedAnalyses::all();
368
370 .getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
371 assert(PSI && "This pass requires module analysis pass `profile-summary`!");
372 BFI = &FAM.getResult<BlockFrequencyAnalysis>(F);
373
374 // When optimizing for size, selects are preferable over branches.
375 if (llvm::shouldOptimizeForSize(&F, PSI, BFI))
376 return PreservedAnalyses::all();
377
378 LI = &FAM.getResult<LoopAnalysis>(F);
379 ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
380 TSchedModel.init(TSI);
381
382 bool Changed = optimizeSelects(F);
384}
385
386bool SelectOptimizeImpl::runOnFunction(Function &F, Pass &P) {
387 TM = &P.getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
388 TSI = TM->getSubtargetImpl(F);
389 TLI = TSI->getTargetLowering();
390
391 // If none of the select types are supported then skip this pass.
392 // This is an optimization pass. Legality issues will be handled by
393 // instruction selection.
397 return false;
398
399 TTI = &P.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
400
401 if (!TTI->enableSelectOptimize())
402 return false;
403
404 LI = &P.getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
405 BFI = &P.getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
406 PSI = &P.getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
407 ORE = &P.getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
408 TSchedModel.init(TSI);
409
410 // When optimizing for size, selects are preferable over branches.
411 if (llvm::shouldOptimizeForSize(&F, PSI, BFI))
412 return false;
413
414 return optimizeSelects(F);
415}
416
417bool SelectOptimizeImpl::optimizeSelects(Function &F) {
418 // Determine for which select groups it is profitable converting to branches.
419 SelectGroups ProfSIGroups;
420 // Base heuristics apply only to non-loops and outer loops.
421 optimizeSelectsBase(F, ProfSIGroups);
422 // Separate heuristics for inner-most loops.
423 optimizeSelectsInnerLoops(F, ProfSIGroups);
424
425 // Convert to branches the select groups that were deemed
426 // profitable-to-convert.
427 convertProfitableSIGroups(ProfSIGroups);
428
429 // Code modified if at least one select group was converted.
430 return !ProfSIGroups.empty();
431}
432
433void SelectOptimizeImpl::optimizeSelectsBase(Function &F,
434 SelectGroups &ProfSIGroups) {
435 // Collect all the select groups.
436 SelectGroups SIGroups;
437 for (BasicBlock &BB : F) {
438 // Base heuristics apply only to non-loops and outer loops.
439 Loop *L = LI->getLoopFor(&BB);
440 if (L && L->isInnermost())
441 continue;
442 collectSelectGroups(BB, SIGroups);
443 }
444
445 // Determine for which select groups it is profitable converting to branches.
446 findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
447}
448
449void SelectOptimizeImpl::optimizeSelectsInnerLoops(Function &F,
450 SelectGroups &ProfSIGroups) {
452 // Need to check size on each iteration as we accumulate child loops.
453 for (unsigned long i = 0; i < Loops.size(); ++i)
454 llvm::append_range(Loops, Loops[i]->getSubLoops());
455
456 for (Loop *L : Loops) {
457 if (!L->isInnermost())
458 continue;
459
460 SelectGroups SIGroups;
461 for (BasicBlock *BB : L->getBlocks())
462 collectSelectGroups(*BB, SIGroups);
463
464 findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
465 }
466}
467
468/// Returns optimised value on \p IsTrue branch. For SelectInst that would be
469/// either True or False value. For (BinaryOperator) instructions, where the
470/// condition may be skipped, the operation will use a non-conditional operand.
471/// For example, for `or(V,zext(cond))` this function would return V.
472/// However, if the conditional operand on \p IsTrue branch matters, we create a
473/// clone of instruction at the end of that branch \p B and replace the
474/// condition operand with a constant.
475///
476/// Also /p OptSelects contains previously optimised select-like instructions.
477/// If the current value uses one of the optimised values, we can optimise it
478/// further by replacing it with the corresponding value on the given branch
480 SelectOptimizeImpl::SelectLike &SI, bool isTrue,
481 SmallDenseMap<Instruction *, std::pair<Value *, Value *>, 2> &OptSelects,
482 BasicBlock *B) {
483 Value *V = isTrue ? SI.getTrueValue() : SI.getFalseValue();
484 if (V) {
485 if (auto *IV = dyn_cast<Instruction>(V))
486 if (auto It = OptSelects.find(IV); It != OptSelects.end())
487 return isTrue ? It->second.first : It->second.second;
488 return V;
489 }
490
491 auto *BO = cast<BinaryOperator>(SI.getI());
492 assert((BO->getOpcode() == Instruction::Add ||
493 BO->getOpcode() == Instruction::Or ||
494 BO->getOpcode() == Instruction::Sub) &&
495 "Only currently handling Add, Or and Sub binary operators.");
496
497 auto *CBO = BO->clone();
498 auto CondIdx = SI.getConditionOpIndex();
499 auto *AuxI = cast<Instruction>(CBO->getOperand(CondIdx));
500 if (isa<ZExtInst>(AuxI) || isa<LShrOperator>(AuxI)) {
501 CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), 1));
502 } else {
503 assert((isa<AShrOperator>(AuxI) || isa<SExtInst>(AuxI)) &&
504 "Unexpected opcode");
505 CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), -1));
506 }
507
508 unsigned OtherIdx = 1 - CondIdx;
509 if (auto *IV = dyn_cast<Instruction>(CBO->getOperand(OtherIdx))) {
510 if (auto It = OptSelects.find(IV); It != OptSelects.end())
511 CBO->setOperand(OtherIdx, isTrue ? It->second.first : It->second.second);
512 }
513 CBO->insertBefore(B->getTerminator()->getIterator());
514 return CBO;
515}
516
517void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
518 for (SelectGroup &ASI : ProfSIGroups) {
519 // The code transformation here is a modified version of the sinking
520 // transformation in CodeGenPrepare::optimizeSelectInst with a more
521 // aggressive strategy of which instructions to sink.
522 //
523 // TODO: eliminate the redundancy of logic transforming selects to branches
524 // by removing CodeGenPrepare::optimizeSelectInst and optimizing here
525 // selects for all cases (with and without profile information).
526
527 // Transform a sequence like this:
528 // start:
529 // %cmp = cmp uge i32 %a, %b
530 // %sel = select i1 %cmp, i32 %c, i32 %d
531 //
532 // Into:
533 // start:
534 // %cmp = cmp uge i32 %a, %b
535 // %cmp.frozen = freeze %cmp
536 // br i1 %cmp.frozen, label %select.true, label %select.false
537 // select.true:
538 // br label %select.end
539 // select.false:
540 // br label %select.end
541 // select.end:
542 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
543 //
544 // %cmp should be frozen, otherwise it may introduce undefined behavior.
545 // In addition, we may sink instructions that produce %c or %d into the
546 // destination(s) of the new branch.
547 // If the true or false blocks do not contain a sunken instruction, that
548 // block and its branch may be optimized away. In that case, one side of the
549 // first branch will point directly to select.end, and the corresponding PHI
550 // predecessor block will be the start block.
551
552 // Find all the instructions that can be soundly sunk to the true/false
553 // blocks. These are instructions that are computed solely for producing the
554 // operands of the select instructions in the group and can be sunk without
555 // breaking the semantics of the LLVM IR (e.g., cannot sink instructions
556 // with side effects).
557 SmallVector<std::stack<Instruction *>, 2> TrueSlices, FalseSlices;
558 typedef std::stack<Instruction *>::size_type StackSizeType;
559 StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;
560 for (SelectLike &SI : ASI.Selects) {
561 if (!isa<SelectInst>(SI.getI()))
562 continue;
563 // For each select, compute the sinkable dependence chains of the true and
564 // false operands.
565 if (auto *TI = dyn_cast_or_null<Instruction>(SI.getTrueValue())) {
566 std::stack<Instruction *> TrueSlice;
567 getExclBackwardsSlice(TI, TrueSlice, SI.getI(), true);
568 maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
569 TrueSlices.push_back(TrueSlice);
570 }
571 if (auto *FI = dyn_cast_or_null<Instruction>(SI.getFalseValue())) {
572 if (isa<SelectInst>(SI.getI()) || !FI->hasOneUse()) {
573 std::stack<Instruction *> FalseSlice;
574 getExclBackwardsSlice(FI, FalseSlice, SI.getI(), true);
575 maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
576 FalseSlices.push_back(FalseSlice);
577 }
578 }
579 }
580 // In the case of multiple select instructions in the same group, the order
581 // of non-dependent instructions (instructions of different dependence
582 // slices) in the true/false blocks appears to affect performance.
583 // Interleaving the slices seems to experimentally be the optimal approach.
584 // This interleaving scheduling allows for more ILP (with a natural downside
585 // of increasing a bit register pressure) compared to a simple ordering of
586 // one whole chain after another. One would expect that this ordering would
587 // not matter since the scheduling in the backend of the compiler would
588 // take care of it, but apparently the scheduler fails to deliver optimal
589 // ILP with a naive ordering here.
590 SmallVector<Instruction *, 2> TrueSlicesInterleaved, FalseSlicesInterleaved;
591 for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {
592 for (auto &S : TrueSlices) {
593 if (!S.empty()) {
594 TrueSlicesInterleaved.push_back(S.top());
595 S.pop();
596 }
597 }
598 }
599 for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {
600 for (auto &S : FalseSlices) {
601 if (!S.empty()) {
602 FalseSlicesInterleaved.push_back(S.top());
603 S.pop();
604 }
605 }
606 }
607
608 // We split the block containing the select(s) into two blocks.
609 SelectLike &SI = ASI.Selects.front();
610 SelectLike &LastSI = ASI.Selects.back();
611 BasicBlock *StartBlock = SI.getI()->getParent();
612 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI.getI()));
613 // With RemoveDIs turned off, SplitPt can be a dbg.* intrinsic. With
614 // RemoveDIs turned on, SplitPt would instead point to the next
615 // instruction. To match existing dbg.* intrinsic behaviour with RemoveDIs,
616 // tell splitBasicBlock that we want to include any DbgVariableRecords
617 // attached to SplitPt in the splice.
618 SplitPt.setHeadBit(true);
619 BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
620 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
621 // Delete the unconditional branch that was just created by the split.
622 StartBlock->getTerminator()->eraseFromParent();
623
624 // Move any debug/pseudo and auxiliary instructions that were in-between the
625 // select group to the newly-created end block.
627 auto DIt = SI.getI()->getIterator();
628 auto NIt = ASI.Selects.begin();
629 while (&*DIt != LastSI.getI()) {
630 if (NIt != ASI.Selects.end() && &*DIt == NIt->getI())
631 ++NIt;
632 else
633 SinkInstrs.push_back(&*DIt);
634 DIt++;
635 }
636 auto InsertionPoint = EndBlock->getFirstInsertionPt();
637 for (auto *DI : SinkInstrs)
638 DI->moveBeforePreserving(InsertionPoint);
639
640 // Duplicate implementation for DbgRecords, the non-instruction debug-info
641 // format. Helper lambda for moving DbgRecords to the end block.
642 auto TransferDbgRecords = [&](Instruction &I) {
643 for (auto &DbgRecord :
644 llvm::make_early_inc_range(I.getDbgRecordRange())) {
647 EndBlock->getFirstInsertionPt());
648 }
649 };
650
651 // Iterate over all instructions in between SI and LastSI, not including
652 // SI itself. These are all the variable assignments that happen "in the
653 // middle" of the select group.
654 auto R = make_range(std::next(SI.getI()->getIterator()),
655 std::next(LastSI.getI()->getIterator()));
656 llvm::for_each(R, TransferDbgRecords);
657
658 // These are the new basic blocks for the conditional branch.
659 // At least one will become an actual new basic block.
660 BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;
661 BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr;
662 // Checks if select-like instruction would materialise on the given branch
663 auto HasSelectLike = [](SelectGroup &SG, bool IsTrue) {
664 for (auto &SL : SG.Selects) {
665 if ((IsTrue ? SL.getTrueValue() : SL.getFalseValue()) == nullptr)
666 return true;
667 }
668 return false;
669 };
670 if (!TrueSlicesInterleaved.empty() || HasSelectLike(ASI, true)) {
671 TrueBlock = BasicBlock::Create(EndBlock->getContext(), "select.true.sink",
672 EndBlock->getParent(), EndBlock);
673 TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
674 TrueBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
675 for (Instruction *TrueInst : TrueSlicesInterleaved)
676 TrueInst->moveBefore(TrueBranch->getIterator());
677 }
678 if (!FalseSlicesInterleaved.empty() || HasSelectLike(ASI, false)) {
679 FalseBlock =
680 BasicBlock::Create(EndBlock->getContext(), "select.false.sink",
681 EndBlock->getParent(), EndBlock);
682 FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
683 FalseBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
684 for (Instruction *FalseInst : FalseSlicesInterleaved)
685 FalseInst->moveBefore(FalseBranch->getIterator());
686 }
687 // If there was nothing to sink, then arbitrarily choose the 'false' side
688 // for a new input value to the PHI.
689 if (TrueBlock == FalseBlock) {
690 assert(TrueBlock == nullptr &&
691 "Unexpected basic block transform while optimizing select");
692
693 FalseBlock = BasicBlock::Create(StartBlock->getContext(), "select.false",
694 EndBlock->getParent(), EndBlock);
695 auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
696 FalseBranch->setDebugLoc(SI.getI()->getDebugLoc());
697 }
698
699 // Insert the real conditional branch based on the original condition.
700 // If we did not create a new block for one of the 'true' or 'false' paths
701 // of the condition, it means that side of the branch goes to the end block
702 // directly and the path originates from the start block from the point of
703 // view of the new PHI.
704 BasicBlock *TT, *FT;
705 if (TrueBlock == nullptr) {
706 TT = EndBlock;
707 FT = FalseBlock;
708 TrueBlock = StartBlock;
709 } else if (FalseBlock == nullptr) {
710 TT = TrueBlock;
711 FT = EndBlock;
712 FalseBlock = StartBlock;
713 } else {
714 TT = TrueBlock;
715 FT = FalseBlock;
716 }
717 IRBuilder<> IB(SI.getI());
718 auto *CondFr =
719 IB.CreateFreeze(ASI.Condition, ASI.Condition->getName() + ".frozen");
720
722
723 // Use reverse iterator because later select may use the value of the
724 // earlier select, and we need to propagate value through earlier select
725 // to get the PHI operand.
726 InsertionPoint = EndBlock->begin();
727 for (SelectLike &SI : ASI.Selects) {
728 // The select itself is replaced with a PHI Node.
729 PHINode *PN = PHINode::Create(SI.getType(), 2, "");
730 PN->insertBefore(InsertionPoint);
731 PN->takeName(SI.getI());
732 // Current instruction might be a condition of some other group, so we
733 // need to replace it there to avoid dangling pointer
734 if (PN->getType()->isIntegerTy(1)) {
735 for (auto &SG : ProfSIGroups) {
736 if (SG.Condition == SI.getI())
737 SG.Condition = PN;
738 }
739 }
740 SI.getI()->replaceAllUsesWith(PN);
741 auto *TV = getTrueOrFalseValue(SI, true, INS, TrueBlock);
742 auto *FV = getTrueOrFalseValue(SI, false, INS, FalseBlock);
743 INS[PN] = {TV, FV};
744 PN->addIncoming(TV, TrueBlock);
745 PN->addIncoming(FV, FalseBlock);
746 PN->setDebugLoc(SI.getI()->getDebugLoc());
747 ++NumSelectsConverted;
748 }
749 IB.CreateCondBr(CondFr, TT, FT, SI.getI());
750
751 // Remove the old select instructions, now that they are not longer used.
752 for (SelectLike &SI : ASI.Selects)
753 SI.getI()->eraseFromParent();
754 }
755}
756
757void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB,
758 SelectGroups &SIGroups) {
759 // Represents something that can be considered as select instruction.
760 // Auxiliary instruction are instructions that depends on a condition and have
761 // zero or some constant value on True/False branch, such as:
762 // * ZExt(1bit)
763 // * SExt(1bit)
764 // * Not(1bit)
765 // * A(L)Shr(Val), ValBitSize - 1, where there is a condition like `Val <= 0`
766 // earlier in the BB. For conditions that check the sign of the Val compiler
767 // may generate shifts instead of ZExt/SExt.
768 struct SelectLikeInfo {
769 Value *Cond;
770 bool IsAuxiliary;
771 bool IsInverted;
772 unsigned ConditionIdx;
773 };
774
776 // Keeps visited comparisons to help identify AShr/LShr variants of auxiliary
777 // instructions.
779
780 // Check if the instruction is SelectLike or might be part of SelectLike
781 // expression, put information into SelectInfo and return the iterator to the
782 // inserted position.
783 auto ProcessSelectInfo = [&SelectInfo, &SeenCmp](Instruction *I) {
784 if (auto *Cmp = dyn_cast<CmpInst>(I)) {
785 SeenCmp.insert(Cmp);
786 return SelectInfo.end();
787 }
788
789 Value *Cond;
791 Cond->getType()->isIntegerTy(1)) {
792 bool Inverted = match(Cond, m_Not(m_Value(Cond)));
793 return SelectInfo.insert({I, {Cond, true, Inverted, 0}}).first;
794 }
795
796 if (match(I, m_Not(m_Value(Cond)))) {
797 return SelectInfo.insert({I, {Cond, true, true, 0}}).first;
798 }
799
800 // Select instruction are what we are usually looking for.
801 if (match(I, m_Select(m_Value(Cond), m_Value(), m_Value()))) {
802 bool Inverted = match(Cond, m_Not(m_Value(Cond)));
803 return SelectInfo.insert({I, {Cond, false, Inverted, 0}}).first;
804 }
805 Value *Val;
806 ConstantInt *Shift;
807 if (match(I, m_Shr(m_Value(Val), m_ConstantInt(Shift))) &&
808 I->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1) {
809 for (auto *CmpI : SeenCmp) {
810 auto Pred = CmpI->getPredicate();
811 if (Val != CmpI->getOperand(0))
812 continue;
813 if ((Pred == CmpInst::ICMP_SGT &&
814 match(CmpI->getOperand(1), m_ConstantInt<-1>())) ||
815 (Pred == CmpInst::ICMP_SGE &&
816 match(CmpI->getOperand(1), m_Zero())) ||
817 (Pred == CmpInst::ICMP_SLT &&
818 match(CmpI->getOperand(1), m_Zero())) ||
819 (Pred == CmpInst::ICMP_SLE &&
820 match(CmpI->getOperand(1), m_ConstantInt<-1>()))) {
821 bool Inverted =
822 Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE;
823 return SelectInfo.insert({I, {CmpI, true, Inverted, 0}}).first;
824 }
825 }
826 return SelectInfo.end();
827 }
828
829 // An BinOp(Aux(X), Y) can also be treated like a select, with condition X
830 // and values Y|1 and Y.
831 // `Aux` can be either `ZExt(1bit)`, `SExt(1bit)` or `XShr(Val), ValBitSize
832 // - 1` `BinOp` can be Add, Sub, Or
833 Value *X;
834 auto MatchZExtOrSExtPattern =
836 auto MatchShiftPattern =
838
839 // This check is unnecessary, but it prevents costly access to the
840 // SelectInfo map.
841 if ((match(I, MatchZExtOrSExtPattern) && X->getType()->isIntegerTy(1)) ||
842 (match(I, MatchShiftPattern) &&
843 X->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1)) {
844 if (I->getOpcode() != Instruction::Add &&
845 I->getOpcode() != Instruction::Sub &&
846 I->getOpcode() != Instruction::Or)
847 return SelectInfo.end();
848
849 if (I->getOpcode() == Instruction::Or && I->getType()->isIntegerTy(1))
850 return SelectInfo.end();
851
852 // Iterate through operands and find dependant on recognised sign
853 // extending auxiliary select-like instructions. The operand index does
854 // not matter for Add and Or. However, for Sub, we can only safely
855 // transform when the operand is second.
856 unsigned Idx = I->getOpcode() == Instruction::Sub ? 1 : 0;
857 for (; Idx < 2; Idx++) {
858 auto *Op = I->getOperand(Idx);
859 auto It = SelectInfo.find(Op);
860 if (It != SelectInfo.end() && It->second.IsAuxiliary) {
861 Cond = It->second.Cond;
862 bool Inverted = It->second.IsInverted;
863 return SelectInfo.insert({I, {Cond, false, Inverted, Idx}}).first;
864 }
865 }
866 }
867 return SelectInfo.end();
868 };
869
870 bool AlreadyProcessed = false;
871 BasicBlock::iterator BBIt = BB.begin();
873 while (BBIt != BB.end()) {
874 Instruction *I = &*BBIt++;
875 if (I->isDebugOrPseudoInst())
876 continue;
877
878 if (!AlreadyProcessed)
879 It = ProcessSelectInfo(I);
880 else
881 AlreadyProcessed = false;
882
883 if (It == SelectInfo.end() || It->second.IsAuxiliary)
884 continue;
885
886 if (!TTI->shouldTreatInstructionLikeSelect(I))
887 continue;
888
889 Value *Cond = It->second.Cond;
890 // Vector conditions are not supported.
891 if (!Cond->getType()->isIntegerTy(1))
892 continue;
893
894 SelectGroup SIGroup = {Cond, {}};
895 SIGroup.Selects.emplace_back(I, It->second.IsInverted,
896 It->second.ConditionIdx);
897
898 // If the select type is not supported, no point optimizing it.
899 // Instruction selection will take care of it.
900 if (!isSelectKindSupported(SIGroup.Selects.front()))
901 continue;
902
903 while (BBIt != BB.end()) {
904 Instruction *NI = &*BBIt;
905 // Debug/pseudo instructions should be skipped and not prevent the
906 // formation of a select group.
907 if (NI->isDebugOrPseudoInst()) {
908 ++BBIt;
909 continue;
910 }
911
912 It = ProcessSelectInfo(NI);
913 if (It == SelectInfo.end()) {
914 AlreadyProcessed = true;
915 break;
916 }
917
918 // Auxiliary with same condition
919 auto [CurrCond, IsAux, IsRev, CondIdx] = It->second;
920 if (Cond != CurrCond) {
921 AlreadyProcessed = true;
922 break;
923 }
924
925 if (!IsAux)
926 SIGroup.Selects.emplace_back(NI, IsRev, CondIdx);
927 ++BBIt;
928 }
929 LLVM_DEBUG({
930 dbgs() << "New Select group (" << SIGroup.Selects.size() << ") with\n";
931 for (auto &SI : SIGroup.Selects)
932 dbgs() << " " << *SI.getI() << "\n";
933 });
934
935 SIGroups.push_back(SIGroup);
936 }
937}
938
939void SelectOptimizeImpl::findProfitableSIGroupsBase(
940 SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
941 for (SelectGroup &ASI : SIGroups) {
942 ++NumSelectOptAnalyzed;
943 if (isConvertToBranchProfitableBase(ASI))
944 ProfSIGroups.push_back(ASI);
945 }
946}
947
950 LLVM_DEBUG(dbgs() << Rem.getMsg() << "\n");
951 ORE->emit(Rem);
952}
953
954void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops(
955 const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
956 NumSelectOptAnalyzed += SIGroups.size();
957 // For each select group in an inner-most loop,
958 // a branch is more preferable than a select/conditional-move if:
959 // i) conversion to branches for all the select groups of the loop satisfies
960 // loop-level heuristics including reducing the loop's critical path by
961 // some threshold (see SelectOptimizeImpl::checkLoopHeuristics); and
962 // ii) the total cost of the select group is cheaper with a branch compared
963 // to its predicated version. The cost is in terms of latency and the cost
964 // of a select group is the cost of its most expensive select instruction
965 // (assuming infinite resources and thus fully leveraging available ILP).
966
968 CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
969 {Scaled64::getZero(), Scaled64::getZero()}};
970 if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
971 !checkLoopHeuristics(L, LoopCost)) {
972 return;
973 }
974
975 for (SelectGroup &ASI : SIGroups) {
976 // Assuming infinite resources, the cost of a group of instructions is the
977 // cost of the most expensive instruction of the group.
978 Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
979 for (SelectLike &SI : ASI.Selects) {
980 const auto &ICM = InstCostMap[SI.getI()];
981 SelectCost = std::max(SelectCost, ICM.PredCost);
982 BranchCost = std::max(BranchCost, ICM.NonPredCost);
983 }
984 if (BranchCost < SelectCost) {
985 OptimizationRemark OR(DEBUG_TYPE, "SelectOpti",
986 ASI.Selects.front().getI());
987 OR << "Profitable to convert to branch (loop analysis). BranchCost="
988 << BranchCost.toString() << ", SelectCost=" << SelectCost.toString()
989 << ". ";
990 EmitAndPrintRemark(ORE, OR);
991 ++NumSelectConvertedLoop;
992 ProfSIGroups.push_back(ASI);
993 } else {
994 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti",
995 ASI.Selects.front().getI());
996 ORmiss << "Select is more profitable (loop analysis). BranchCost="
997 << BranchCost.toString()
998 << ", SelectCost=" << SelectCost.toString() << ". ";
999 EmitAndPrintRemark(ORE, ORmiss);
1000 }
1001 }
1002}
1003
1004bool SelectOptimizeImpl::isConvertToBranchProfitableBase(
1005 const SelectGroup &ASI) {
1006 const SelectLike &SI = ASI.Selects.front();
1007 LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI.getI()
1008 << "\n");
1009 OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI.getI());
1010 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI.getI());
1011
1012 // Skip cold basic blocks. Better to optimize for size for cold blocks.
1013 if (PSI->isColdBlock(SI.getI()->getParent(), BFI)) {
1014 ++NumSelectColdBB;
1015 ORmiss << "Not converted to branch because of cold basic block. ";
1016 EmitAndPrintRemark(ORE, ORmiss);
1017 return false;
1018 }
1019
1020 // If unpredictable, branch form is less profitable.
1021 if (SI.getI()->getMetadata(LLVMContext::MD_unpredictable)) {
1022 ++NumSelectUnPred;
1023 ORmiss << "Not converted to branch because of unpredictable branch. ";
1024 EmitAndPrintRemark(ORE, ORmiss);
1025 return false;
1026 }
1027
1028 // If highly predictable, branch form is more profitable, unless a
1029 // predictable select is inexpensive in the target architecture.
1030 if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) {
1031 ++NumSelectConvertedHighPred;
1032 OR << "Converted to branch because of highly predictable branch. ";
1033 EmitAndPrintRemark(ORE, OR);
1034 return true;
1035 }
1036
1037 // Look for expensive instructions in the cold operand's (if any) dependence
1038 // slice of any of the selects in the group.
1039 if (hasExpensiveColdOperand(ASI)) {
1040 ++NumSelectConvertedExpColdOperand;
1041 OR << "Converted to branch because of expensive cold operand.";
1042 EmitAndPrintRemark(ORE, OR);
1043 return true;
1044 }
1045
1046 // If latch has a select group with several elements, it is usually profitable
1047 // to convert it to branches. We let `optimizeSelectsInnerLoops` decide if
1048 // conversion is profitable for innermost loops.
1049 auto *BB = SI.getI()->getParent();
1050 auto *L = LI->getLoopFor(BB);
1051 if (L && !L->isInnermost() && L->getLoopLatch() == BB &&
1052 ASI.Selects.size() >= 3) {
1053 OR << "Converted to branch because select group in the latch block is big.";
1054 EmitAndPrintRemark(ORE, OR);
1055 return true;
1056 }
1057
1058 ORmiss << "Not profitable to convert to branch (base heuristic).";
1059 EmitAndPrintRemark(ORE, ORmiss);
1060 return false;
1061}
1062
1064 uint64_t Denominator) {
1065 return (Numerator + (Denominator / 2)) / Denominator;
1066}
1067
1068static bool extractBranchWeights(const SelectOptimizeImpl::SelectLike SI,
1069 uint64_t &TrueVal, uint64_t &FalseVal) {
1070 if (isa<SelectInst>(SI.getI()))
1071 return extractBranchWeights(*SI.getI(), TrueVal, FalseVal);
1072 return false;
1073}
1074
1075bool SelectOptimizeImpl::hasExpensiveColdOperand(const SelectGroup &ASI) {
1076 bool ColdOperand = false;
1077 uint64_t TrueWeight, FalseWeight, TotalWeight;
1078 if (extractBranchWeights(ASI.Selects.front(), TrueWeight, FalseWeight)) {
1079 uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
1080 TotalWeight = TrueWeight + FalseWeight;
1081 // Is there a path with frequency <ColdOperandThreshold% (default:20%) ?
1082 ColdOperand = TotalWeight * ColdOperandThreshold > 100 * MinWeight;
1083 } else if (PSI->hasProfileSummary()) {
1084 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti",
1085 ASI.Selects.front().getI());
1086 ORmiss << "Profile data available but missing branch-weights metadata for "
1087 "select instruction. ";
1088 EmitAndPrintRemark(ORE, ORmiss);
1089 }
1090 if (!ColdOperand)
1091 return false;
1092 // Check if the cold path's dependence slice is expensive for any of the
1093 // selects of the group.
1094 for (SelectLike SI : ASI.Selects) {
1095 Instruction *ColdI = nullptr;
1096 uint64_t HotWeight;
1097 if (TrueWeight < FalseWeight) {
1098 ColdI = dyn_cast_or_null<Instruction>(SI.getTrueValue());
1099 HotWeight = FalseWeight;
1100 } else {
1101 ColdI = dyn_cast_or_null<Instruction>(SI.getFalseValue());
1102 HotWeight = TrueWeight;
1103 }
1104 if (ColdI) {
1105 std::stack<Instruction *> ColdSlice;
1106 getExclBackwardsSlice(ColdI, ColdSlice, SI.getI());
1107 InstructionCost SliceCost = 0;
1108 while (!ColdSlice.empty()) {
1109 SliceCost += TTI->getInstructionCost(ColdSlice.top(),
1111 ColdSlice.pop();
1112 }
1113 // The colder the cold value operand of the select is the more expensive
1114 // the cmov becomes for computing the cold value operand every time. Thus,
1115 // the colder the cold operand is the more its cost counts.
1116 // Get nearest integer cost adjusted for coldness.
1117 InstructionCost AdjSliceCost =
1118 divideNearest(SliceCost * HotWeight, TotalWeight);
1119 if (AdjSliceCost >=
1121 return true;
1122 }
1123 }
1124 return false;
1125}
1126
1127// Check if it is safe to move LoadI next to the SI.
1128// Conservatively assume it is safe only if there is no instruction
1129// modifying memory in-between the load and the select instruction.
1131 // Assume loads from different basic blocks are unsafe to move.
1132 if (LoadI->getParent() != SI->getParent())
1133 return false;
1134 auto It = LoadI->getIterator();
1135 while (&*It != SI) {
1136 if (It->mayWriteToMemory())
1137 return false;
1138 It++;
1139 }
1140 return true;
1141}
1142
1143// For a given source instruction, collect its backwards dependence slice
1144// consisting of instructions exclusively computed for the purpose of producing
1145// the operands of the source instruction. As an approximation
1146// (sufficiently-accurate in practice), we populate this set with the
1147// instructions of the backwards dependence slice that only have one-use and
1148// form an one-use chain that leads to the source instruction.
1149void SelectOptimizeImpl::getExclBackwardsSlice(Instruction *I,
1150 std::stack<Instruction *> &Slice,
1151 Instruction *SI,
1152 bool ForSinking) {
1154 std::queue<Instruction *> Worklist;
1155 Worklist.push(I);
1156 while (!Worklist.empty()) {
1157 Instruction *II = Worklist.front();
1158 Worklist.pop();
1159
1160 // Avoid cycles.
1161 if (!Visited.insert(II).second)
1162 continue;
1163
1164 if (!II->hasOneUse())
1165 continue;
1166
1167 // Cannot soundly sink instructions with side-effects.
1168 // Terminator or phi instructions cannot be sunk.
1169 // Avoid sinking other select instructions (should be handled separetely).
1170 if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() ||
1172 continue;
1173
1174 // Avoid sinking loads in order not to skip state-modifying instructions,
1175 // that may alias with the loaded address.
1176 // Only allow sinking of loads within the same basic block that are
1177 // conservatively proven to be safe.
1178 if (ForSinking && II->mayReadFromMemory() && !isSafeToSinkLoad(II, SI))
1179 continue;
1180
1181 // Avoid considering instructions with less frequency than the source
1182 // instruction (i.e., avoid colder code regions of the dependence slice).
1183 if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent()))
1184 continue;
1185
1186 // Eligible one-use instruction added to the dependence slice.
1187 Slice.push(II);
1188
1189 // Explore all the operands of the current instruction to expand the slice.
1190 for (Value *Op : II->operand_values())
1191 if (auto *OpI = dyn_cast<Instruction>(Op))
1192 Worklist.push(OpI);
1193 }
1194}
1195
1196bool SelectOptimizeImpl::isSelectHighlyPredictable(const SelectLike SI) {
1197 uint64_t TrueWeight, FalseWeight;
1198 if (extractBranchWeights(SI, TrueWeight, FalseWeight)) {
1199 uint64_t Max = std::max(TrueWeight, FalseWeight);
1200 uint64_t Sum = TrueWeight + FalseWeight;
1201 if (Sum != 0) {
1202 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
1203 if (Probability > TTI->getPredictableBranchThreshold())
1204 return true;
1205 }
1206 }
1207 return false;
1208}
1209
1210bool SelectOptimizeImpl::checkLoopHeuristics(const Loop *L,
1211 const CostInfo LoopCost[2]) {
1212 // Loop-level checks to determine if a non-predicated version (with branches)
1213 // of the loop is more profitable than its predicated version.
1214
1216 return true;
1217
1218 OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti",
1219 &*L->getHeader()->getFirstNonPHIIt());
1220
1221 if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
1222 LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
1223 ORmissL << "No select conversion in the loop due to no reduction of loop's "
1224 "critical path. ";
1225 EmitAndPrintRemark(ORE, ORmissL);
1226 return false;
1227 }
1228
1229 Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
1230 LoopCost[1].PredCost - LoopCost[1].NonPredCost};
1231
1232 // Profitably converting to branches need to reduce the loop's critical path
1233 // by at least some threshold (absolute gain of GainCycleThreshold cycles and
1234 // relative gain of 12.5%).
1235 if (Gain[1] < Scaled64::get(GainCycleThreshold) ||
1236 Gain[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) {
1237 Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost;
1238 ORmissL << "No select conversion in the loop due to small reduction of "
1239 "loop's critical path. Gain="
1240 << Gain[1].toString()
1241 << ", RelativeGain=" << RelativeGain.toString() << "%. ";
1242 EmitAndPrintRemark(ORE, ORmissL);
1243 return false;
1244 }
1245
1246 // If the loop's critical path involves loop-carried dependences, the gradient
1247 // of the gain needs to be at least GainGradientThreshold% (defaults to 25%).
1248 // This check ensures that the latency reduction for the loop's critical path
1249 // keeps decreasing with sufficient rate beyond the two analyzed loop
1250 // iterations.
1251 if (Gain[1] > Gain[0]) {
1252 Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) /
1253 (LoopCost[1].PredCost - LoopCost[0].PredCost);
1254 if (GradientGain < Scaled64::get(GainGradientThreshold)) {
1255 ORmissL << "No select conversion in the loop due to small gradient gain. "
1256 "GradientGain="
1257 << GradientGain.toString() << "%. ";
1258 EmitAndPrintRemark(ORE, ORmissL);
1259 return false;
1260 }
1261 }
1262 // If the gain decreases it is not profitable to convert.
1263 else if (Gain[1] < Gain[0]) {
1264 ORmissL
1265 << "No select conversion in the loop due to negative gradient gain. ";
1266 EmitAndPrintRemark(ORE, ORmissL);
1267 return false;
1268 }
1269
1270 // Non-predicated version of the loop is more profitable than its
1271 // predicated version.
1272 return true;
1273}
1274
1275// Computes instruction and loop-critical-path costs for both the predicated
1276// and non-predicated version of the given loop.
1277// Returns false if unable to compute these costs due to invalid cost of loop
1278// instruction(s).
1279bool SelectOptimizeImpl::computeLoopCosts(
1280 const Loop *L, const SelectGroups &SIGroups,
1281 DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {
1282 LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop "
1283 << L->getHeader()->getName() << "\n");
1284 const auto SImap = getSImap(SIGroups);
1285 const auto SGmap = getSGmap(SIGroups);
1286 // Compute instruction and loop-critical-path costs across two iterations for
1287 // both predicated and non-predicated version.
1288 const unsigned Iterations = 2;
1289 for (unsigned Iter = 0; Iter < Iterations; ++Iter) {
1290 // Cost of the loop's critical path.
1291 CostInfo &MaxCost = LoopCost[Iter];
1292 for (BasicBlock *BB : L->getBlocks()) {
1293 for (const Instruction &I : *BB) {
1294 if (I.isDebugOrPseudoInst())
1295 continue;
1296 // Compute the predicated and non-predicated cost of the instruction.
1297 Scaled64 IPredCost = Scaled64::getZero(),
1298 INonPredCost = Scaled64::getZero();
1299
1300 // Assume infinite resources that allow to fully exploit the available
1301 // instruction-level parallelism.
1302 // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost)
1303 for (const Use &U : I.operands()) {
1304 auto UI = dyn_cast<Instruction>(U.get());
1305 if (!UI)
1306 continue;
1307 if (auto It = InstCostMap.find(UI); It != InstCostMap.end()) {
1308 IPredCost = std::max(IPredCost, It->second.PredCost);
1309 INonPredCost = std::max(INonPredCost, It->second.NonPredCost);
1310 }
1311 }
1312 auto ILatency = computeInstCost(&I);
1313 if (!ILatency) {
1314 OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I);
1315 ORmissL << "Invalid instruction cost preventing analysis and "
1316 "optimization of the inner-most loop containing this "
1317 "instruction. ";
1318 EmitAndPrintRemark(ORE, ORmissL);
1319 return false;
1320 }
1321 IPredCost += Scaled64::get(*ILatency);
1322 INonPredCost += Scaled64::get(*ILatency);
1323
1324 // For a select that can be converted to branch,
1325 // compute its cost as a branch (non-predicated cost).
1326 //
1327 // BranchCost = PredictedPathCost + MispredictCost
1328 // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb
1329 // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate
1330 if (auto It = SImap.find(&I); It != SImap.end()) {
1331 auto SI = It->second;
1332 const auto *SG = SGmap.at(&I);
1333 Scaled64 TrueOpCost = SI.getOpCostOnBranch(true, InstCostMap, TTI);
1334 Scaled64 FalseOpCost = SI.getOpCostOnBranch(false, InstCostMap, TTI);
1335 Scaled64 PredictedPathCost =
1336 getPredictedPathCost(TrueOpCost, FalseOpCost, SI);
1337
1338 Scaled64 CondCost = Scaled64::getZero();
1339 if (auto *CI = dyn_cast<Instruction>(SG->Condition))
1340 if (auto It = InstCostMap.find(CI); It != InstCostMap.end())
1341 CondCost = It->second.NonPredCost;
1342 Scaled64 MispredictCost = getMispredictionCost(SI, CondCost);
1343
1344 INonPredCost = PredictedPathCost + MispredictCost;
1345 }
1346 LLVM_DEBUG(dbgs() << " " << ILatency << "/" << IPredCost << "/"
1347 << INonPredCost << " for " << I << "\n");
1348
1349 InstCostMap[&I] = {IPredCost, INonPredCost};
1350 MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
1351 MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
1352 }
1353 }
1354 LLVM_DEBUG(dbgs() << "Iteration " << Iter + 1
1355 << " MaxCost = " << MaxCost.PredCost << " "
1356 << MaxCost.NonPredCost << "\n");
1357 }
1358 return true;
1359}
1360
1362SelectOptimizeImpl::getSImap(const SelectGroups &SIGroups) {
1364 for (const SelectGroup &ASI : SIGroups)
1365 for (const SelectLike &SI : ASI.Selects)
1366 SImap.try_emplace(SI.getI(), SI);
1367 return SImap;
1368}
1369
1371SelectOptimizeImpl::getSGmap(const SelectGroups &SIGroups) {
1373 for (const SelectGroup &ASI : SIGroups)
1374 for (const SelectLike &SI : ASI.Selects)
1375 SImap.try_emplace(SI.getI(), &ASI);
1376 return SImap;
1377}
1378
1379std::optional<uint64_t>
1380SelectOptimizeImpl::computeInstCost(const Instruction *I) {
1381 InstructionCost ICost =
1382 TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
1383 if (ICost.isValid())
1384 return std::optional<uint64_t>(ICost.getValue());
1385 return std::nullopt;
1386}
1387
1389SelectOptimizeImpl::getMispredictionCost(const SelectLike SI,
1390 const Scaled64 CondCost) {
1391 uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
1392
1393 // Account for the default misprediction rate when using a branch
1394 // (conservatively set to 25% by default).
1395 uint64_t MispredictRate = MispredictDefaultRate;
1396 // If the select condition is obviously predictable, then the misprediction
1397 // rate is zero.
1398 if (isSelectHighlyPredictable(SI))
1399 MispredictRate = 0;
1400
1401 // CondCost is included to account for cases where the computation of the
1402 // condition is part of a long dependence chain (potentially loop-carried)
1403 // that would delay detection of a misprediction and increase its cost.
1404 Scaled64 MispredictCost =
1405 std::max(Scaled64::get(MispredictPenalty), CondCost) *
1406 Scaled64::get(MispredictRate);
1407 MispredictCost /= Scaled64::get(100);
1408
1409 return MispredictCost;
1410}
1411
1412// Returns the cost of a branch when the prediction is correct.
1413// TrueCost * TrueProbability + FalseCost * FalseProbability.
1415SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
1416 const SelectLike SI) {
1417 Scaled64 PredPathCost;
1418 uint64_t TrueWeight, FalseWeight;
1419 if (extractBranchWeights(SI, TrueWeight, FalseWeight)) {
1420 uint64_t SumWeight = TrueWeight + FalseWeight;
1421 if (SumWeight != 0) {
1422 PredPathCost = TrueCost * Scaled64::get(TrueWeight) +
1423 FalseCost * Scaled64::get(FalseWeight);
1424 PredPathCost /= Scaled64::get(SumWeight);
1425 return PredPathCost;
1426 }
1427 }
1428 // Without branch weight metadata, we assume 75% for the one path and 25% for
1429 // the other, and pick the result with the biggest cost.
1430 PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,
1431 FalseCost * Scaled64::get(3) + TrueCost);
1432 PredPathCost /= Scaled64::get(4);
1433 return PredPathCost;
1434}
1435
1436bool SelectOptimizeImpl::isSelectKindSupported(const SelectLike SI) {
1438 if (SI.getType()->isVectorTy())
1440 else
1442 return TLI->isSelectSupported(SelectKind);
1443}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
Hexagon Hardware Loops
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
uint64_t IntrinsicInst * II
#define P(N)
FunctionAnalysisManager FAM
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
const GCNTargetMachine & getTM(const GCNSubtarget *STI)
static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI)
static cl::opt< unsigned > ColdOperandMaxCostMultiplier("cold-operand-max-cost-multiplier", cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " "slice of a cold operand to be considered inexpensive."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > ColdOperandThreshold("cold-operand-threshold", cl::desc("Maximum frequency of path for an operand to be considered cold."), cl::init(20), cl::Hidden)
static cl::opt< bool > DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, cl::init(false), cl::desc("Disable loop-level heuristics."))
static cl::opt< unsigned > GainCycleThreshold("select-opti-loop-cycle-gain-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
static cl::opt< unsigned > MispredictDefaultRate("mispredict-default-rate", cl::Hidden, cl::init(25), cl::desc("Default mispredict rate (initialized to 25%)."))
static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE, DiagnosticInfoOptimizationBase &Rem)
static cl::opt< unsigned > GainGradientThreshold("select-opti-loop-gradient-gain-threshold", cl::desc("Gradient gain threshold (%)."), cl::init(25), cl::Hidden)
static cl::opt< unsigned > GainRelativeThreshold("select-opti-loop-relative-gain-threshold", cl::desc("Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), cl::init(8), cl::Hidden)
This file contains the declaration of the SelectOptimizePass class, its corresponding pass name is se...
This file implements a set that has insertion order iteration characteristics.
Shrink Wrap Pass
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static const uint32_t IV[8]
Definition blake3_impl.h:83
AnalysisUsage & addRequired()
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Analysis pass which computes BlockFrequencyInfo.
Legacy analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
@ ICMP_SLT
signed less than
Definition InstrTypes.h:707
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:708
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:705
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:706
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:165
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:229
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition DenseMap.h:74
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:214
Common features for diagnostics dealing with optimization remarks that are used by both IR and MIR pa...
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
iterator end() const
iterator begin() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:596
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition Pass.h:99
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
Simple representation of a scaled number.
static ScaledNumber get(uint64_t N)
static ScaledNumber getZero()
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:168
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:356
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
virtual bool isSelectSupported(SelectSupportKind) const
SelectSupportKind
Enum that describes what type of support for selects the target has.
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
Provide an instruction scheduling machine model to CodeGen passes.
const MCSchedModel * getMCSchedModel() const
LLVM_ABI void init(const TargetSubtargetInfo *TSInfo, bool EnableSModel=true, bool EnableSItins=true)
Initialize the machine model for instruction scheduling.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCK_Latency
The latency of instruction.
LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:134
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1700
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI FunctionPass * createSelectOptimizePass()
This pass converts conditional moves to conditional jumps when profitable.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2118
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:626
constexpr T divideNearest(U Numerator, V Denominator)
Returns (Numerator / Denominator) rounded by round-half-up.
Definition MathExtras.h:469
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
LLVM_ABI void initializeSelectOptimizePass(PassRegistry &)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
TargetTransformInfo TTI
DWARFExpression::Operation Op
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
unsigned MispredictPenalty
Definition MCSchedule.h:311