LLVM 20.0.0git
SelectOptimize.cpp
Go to the documentation of this file.
1//===--- SelectOptimize.cpp - Convert select to branches if profitable ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass converts selects to conditional jumps when profitable.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/SetVector.h"
16#include "llvm/ADT/Statistic.h"
23#include "llvm/CodeGen/Passes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Dominators.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/Instruction.h"
36#include "llvm/Pass.h"
40#include <algorithm>
41#include <queue>
42#include <stack>
43
44using namespace llvm;
45using namespace llvm::PatternMatch;
46
47#define DEBUG_TYPE "select-optimize"
48
49STATISTIC(NumSelectOptAnalyzed,
50 "Number of select groups considered for conversion to branch");
51STATISTIC(NumSelectConvertedExpColdOperand,
52 "Number of select groups converted due to expensive cold operand");
53STATISTIC(NumSelectConvertedHighPred,
54 "Number of select groups converted due to high-predictability");
55STATISTIC(NumSelectUnPred,
56 "Number of select groups not converted due to unpredictability");
57STATISTIC(NumSelectColdBB,
58 "Number of select groups not converted due to cold basic block");
59STATISTIC(NumSelectConvertedLoop,
60 "Number of select groups converted due to loop-level analysis");
61STATISTIC(NumSelectsConverted, "Number of selects converted");
62
64 "cold-operand-threshold",
65 cl::desc("Maximum frequency of path for an operand to be considered cold."),
66 cl::init(20), cl::Hidden);
67
69 "cold-operand-max-cost-multiplier",
70 cl::desc("Maximum cost multiplier of TCC_expensive for the dependence "
71 "slice of a cold operand to be considered inexpensive."),
73
75 GainGradientThreshold("select-opti-loop-gradient-gain-threshold",
76 cl::desc("Gradient gain threshold (%)."),
77 cl::init(25), cl::Hidden);
78
80 GainCycleThreshold("select-opti-loop-cycle-gain-threshold",
81 cl::desc("Minimum gain per loop (in cycles) threshold."),
83
85 "select-opti-loop-relative-gain-threshold",
87 "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
89
91 "mispredict-default-rate", cl::Hidden, cl::init(25),
92 cl::desc("Default mispredict rate (initialized to 25%)."));
93
94static cl::opt<bool>
95 DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden,
96 cl::init(false),
97 cl::desc("Disable loop-level heuristics."));
98
99namespace {
100
101class SelectOptimizeImpl {
102 const TargetMachine *TM = nullptr;
103 const TargetSubtargetInfo *TSI = nullptr;
104 const TargetLowering *TLI = nullptr;
105 const TargetTransformInfo *TTI = nullptr;
106 const LoopInfo *LI = nullptr;
108 ProfileSummaryInfo *PSI = nullptr;
109 OptimizationRemarkEmitter *ORE = nullptr;
110 TargetSchedModel TSchedModel;
111
112public:
113 SelectOptimizeImpl() = default;
114 SelectOptimizeImpl(const TargetMachine *TM) : TM(TM){};
116 bool runOnFunction(Function &F, Pass &P);
117
118 using Scaled64 = ScaledNumber<uint64_t>;
119
120 struct CostInfo {
121 /// Predicated cost (with selects as conditional moves).
122 Scaled64 PredCost;
123 /// Non-predicated cost (with selects converted to branches).
124 Scaled64 NonPredCost;
125 };
126
127 /// SelectLike is an abstraction over SelectInst and other operations that can
128 /// act like selects. For example Or(Zext(icmp), X) can be treated like
129 /// select(icmp, X|1, X).
130 class SelectLike {
131 /// The select (/or) instruction.
132 Instruction *I;
133 /// Whether this select is inverted, "not(cond), FalseVal, TrueVal", as
134 /// opposed to the original condition.
135 bool Inverted = false;
136
137 /// The index of the operand that depends on condition. Only for select-like
138 /// instruction such as Or/Add.
139 unsigned CondIdx;
140
141 public:
142 SelectLike(Instruction *I, bool Inverted = false, unsigned CondIdx = 0)
143 : I(I), Inverted(Inverted), CondIdx(CondIdx) {}
144
145 Instruction *getI() { return I; }
146 const Instruction *getI() const { return I; }
147
148 Type *getType() const { return I->getType(); }
149
150 unsigned getConditionOpIndex() { return CondIdx; };
151
152 /// Return the true value for the SelectLike instruction. Note this may not
153 /// exist for all SelectLike instructions. For example, for `or(zext(c), x)`
154 /// the true value would be `or(x,1)`. As this value does not exist, nullptr
155 /// is returned.
156 Value *getTrueValue(bool HonorInverts = true) const {
157 if (Inverted && HonorInverts)
158 return getFalseValue(/*HonorInverts=*/false);
159 if (auto *Sel = dyn_cast<SelectInst>(I))
160 return Sel->getTrueValue();
161 // Or(zext) case - The true value is Or(X), so return nullptr as the value
162 // does not yet exist.
163 if (isa<BinaryOperator>(I))
164 return nullptr;
165
166 llvm_unreachable("Unhandled case in getTrueValue");
167 }
168
169 /// Return the false value for the SelectLike instruction. For example the
170 /// getFalseValue of a select or `x` in `or(zext(c), x)` (which is
171 /// `select(c, x|1, x)`)
172 Value *getFalseValue(bool HonorInverts = true) const {
173 if (Inverted && HonorInverts)
174 return getTrueValue(/*HonorInverts=*/false);
175 if (auto *Sel = dyn_cast<SelectInst>(I))
176 return Sel->getFalseValue();
177 // We are on the branch where the condition is zero, which means BinOp
178 // does not perform any computation, and we can simply return the operand
179 // that is not related to the condition
180 if (auto *BO = dyn_cast<BinaryOperator>(I))
181 return BO->getOperand(1 - CondIdx);
182
183 llvm_unreachable("Unhandled case in getFalseValue");
184 }
185
186 /// Return the NonPredCost cost of the op on \p isTrue branch, given the
187 /// costs in \p InstCostMap. This may need to be generated for select-like
188 /// instructions.
189 Scaled64 getOpCostOnBranch(
190 bool IsTrue, const DenseMap<const Instruction *, CostInfo> &InstCostMap,
191 const TargetTransformInfo *TTI) {
192 auto *V = IsTrue ? getTrueValue() : getFalseValue();
193 if (V) {
194 if (auto *IV = dyn_cast<Instruction>(V)) {
195 auto It = InstCostMap.find(IV);
196 return It != InstCostMap.end() ? It->second.NonPredCost
198 }
199 return Scaled64::getZero();
200 }
201 // If getTrue(False)Value() return nullptr, it means we are dealing with
202 // select-like instructions on the branch where the actual computation is
203 // happening. In that case the cost is equal to the cost of computation +
204 // cost of non-dependant on condition operand
206 getI()->getOpcode(), I->getType(), TargetTransformInfo::TCK_Latency,
207 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
208 {TTI::OK_UniformConstantValue, TTI::OP_PowerOf2});
209 auto TotalCost = Scaled64::get(*Cost.getValue());
210 if (auto *OpI = dyn_cast<Instruction>(I->getOperand(1 - CondIdx))) {
211 auto It = InstCostMap.find(OpI);
212 if (It != InstCostMap.end())
213 TotalCost += It->second.NonPredCost;
214 }
215 return TotalCost;
216 }
217 };
218
219private:
220 // Select groups consist of consecutive select-like instructions with the same
221 // condition. Between select-likes could be any number of auxiliary
222 // instructions related to the condition like not, zext, ashr/lshr
223 struct SelectGroup {
224 Value *Condition;
226 };
227 using SelectGroups = SmallVector<SelectGroup, 2>;
228
229 // Converts select instructions of a function to conditional jumps when deemed
230 // profitable. Returns true if at least one select was converted.
231 bool optimizeSelects(Function &F);
232
233 // Heuristics for determining which select instructions can be profitably
234 // conveted to branches. Separate heuristics for selects in inner-most loops
235 // and the rest of code regions (base heuristics for non-inner-most loop
236 // regions).
237 void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups);
238 void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups);
239
240 // Converts to branches the select groups that were deemed
241 // profitable-to-convert.
242 void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
243
244 // Splits selects of a given basic block into select groups.
245 void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
246
247 // Determines for which select groups it is profitable converting to branches
248 // (base and inner-most-loop heuristics).
249 void findProfitableSIGroupsBase(SelectGroups &SIGroups,
250 SelectGroups &ProfSIGroups);
251 void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups,
252 SelectGroups &ProfSIGroups);
253
254 // Determines if a select group should be converted to a branch (base
255 // heuristics).
256 bool isConvertToBranchProfitableBase(const SelectGroup &ASI);
257
258 // Returns true if there are expensive instructions in the cold value
259 // operand's (if any) dependence slice of any of the selects of the given
260 // group.
261 bool hasExpensiveColdOperand(const SelectGroup &ASI);
262
263 // For a given source instruction, collect its backwards dependence slice
264 // consisting of instructions exclusively computed for producing the operands
265 // of the source instruction.
266 void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice,
267 Instruction *SI, bool ForSinking = false);
268
269 // Returns true if the condition of the select is highly predictable.
270 bool isSelectHighlyPredictable(const SelectLike SI);
271
272 // Loop-level checks to determine if a non-predicated version (with branches)
273 // of the given loop is more profitable than its predicated version.
274 bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]);
275
276 // Computes instruction and loop-critical-path costs for both the predicated
277 // and non-predicated version of the given loop.
278 bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups,
280 CostInfo *LoopCost);
281
282 // Returns a set of all the select instructions in the given select groups.
284 getSImap(const SelectGroups &SIGroups);
285
286 // Returns a map from select-like instructions to the corresponding select
287 // group.
289 getSGmap(const SelectGroups &SIGroups);
290
291 // Returns the latency cost of a given instruction.
292 std::optional<uint64_t> computeInstCost(const Instruction *I);
293
294 // Returns the misprediction cost of a given select when converted to branch.
295 Scaled64 getMispredictionCost(const SelectLike SI, const Scaled64 CondCost);
296
297 // Returns the cost of a branch when the prediction is correct.
298 Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
299 const SelectLike SI);
300
301 // Returns true if the target architecture supports lowering a given select.
302 bool isSelectKindSupported(const SelectLike SI);
303};
304
305class SelectOptimize : public FunctionPass {
306 SelectOptimizeImpl Impl;
307
308public:
309 static char ID;
310
311 SelectOptimize() : FunctionPass(ID) {
313 }
314
315 bool runOnFunction(Function &F) override {
316 return Impl.runOnFunction(F, *this);
317 }
318
319 void getAnalysisUsage(AnalysisUsage &AU) const override {
326 }
327};
328
329} // namespace
330
333 SelectOptimizeImpl Impl(TM);
334 return Impl.run(F, FAM);
335}
336
337char SelectOptimize::ID = 0;
338
339INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
340 false)
348 false)
349
350FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); }
351
352PreservedAnalyses SelectOptimizeImpl::run(Function &F,
354 TSI = TM->getSubtargetImpl(F);
355 TLI = TSI->getTargetLowering();
356
357 // If none of the select types are supported then skip this pass.
358 // This is an optimization pass. Legality issues will be handled by
359 // instruction selection.
363 return PreservedAnalyses::all();
364
367 return PreservedAnalyses::all();
368
370 .getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
371 assert(PSI && "This pass requires module analysis pass `profile-summary`!");
373
374 // When optimizing for size, selects are preferable over branches.
375 if (llvm::shouldOptimizeForSize(&F, PSI, BFI))
376 return PreservedAnalyses::all();
377
378 LI = &FAM.getResult<LoopAnalysis>(F);
380 TSchedModel.init(TSI);
381
382 bool Changed = optimizeSelects(F);
383 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
384}
385
386bool SelectOptimizeImpl::runOnFunction(Function &F, Pass &P) {
387 TM = &P.getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
388 TSI = TM->getSubtargetImpl(F);
389 TLI = TSI->getTargetLowering();
390
391 // If none of the select types are supported then skip this pass.
392 // This is an optimization pass. Legality issues will be handled by
393 // instruction selection.
397 return false;
398
399 TTI = &P.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
400
402 return false;
403
404 LI = &P.getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
405 BFI = &P.getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
406 PSI = &P.getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
407 ORE = &P.getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
408 TSchedModel.init(TSI);
409
410 // When optimizing for size, selects are preferable over branches.
411 if (llvm::shouldOptimizeForSize(&F, PSI, BFI))
412 return false;
413
414 return optimizeSelects(F);
415}
416
417bool SelectOptimizeImpl::optimizeSelects(Function &F) {
418 // Determine for which select groups it is profitable converting to branches.
419 SelectGroups ProfSIGroups;
420 // Base heuristics apply only to non-loops and outer loops.
421 optimizeSelectsBase(F, ProfSIGroups);
422 // Separate heuristics for inner-most loops.
423 optimizeSelectsInnerLoops(F, ProfSIGroups);
424
425 // Convert to branches the select groups that were deemed
426 // profitable-to-convert.
427 convertProfitableSIGroups(ProfSIGroups);
428
429 // Code modified if at least one select group was converted.
430 return !ProfSIGroups.empty();
431}
432
433void SelectOptimizeImpl::optimizeSelectsBase(Function &F,
434 SelectGroups &ProfSIGroups) {
435 // Collect all the select groups.
436 SelectGroups SIGroups;
437 for (BasicBlock &BB : F) {
438 // Base heuristics apply only to non-loops and outer loops.
439 Loop *L = LI->getLoopFor(&BB);
440 if (L && L->isInnermost())
441 continue;
442 collectSelectGroups(BB, SIGroups);
443 }
444
445 // Determine for which select groups it is profitable converting to branches.
446 findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
447}
448
449void SelectOptimizeImpl::optimizeSelectsInnerLoops(Function &F,
450 SelectGroups &ProfSIGroups) {
452 // Need to check size on each iteration as we accumulate child loops.
453 for (unsigned long i = 0; i < Loops.size(); ++i)
454 for (Loop *ChildL : Loops[i]->getSubLoops())
455 Loops.push_back(ChildL);
456
457 for (Loop *L : Loops) {
458 if (!L->isInnermost())
459 continue;
460
461 SelectGroups SIGroups;
462 for (BasicBlock *BB : L->getBlocks())
463 collectSelectGroups(*BB, SIGroups);
464
465 findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
466 }
467}
468
469/// Returns optimised value on \p IsTrue branch. For SelectInst that would be
470/// either True or False value. For (BinaryOperator) instructions, where the
471/// condition may be skipped, the operation will use a non-conditional operand.
472/// For example, for `or(V,zext(cond))` this function would return V.
473/// However, if the conditional operand on \p IsTrue branch matters, we create a
474/// clone of instruction at the end of that branch \p B and replace the
475/// condition operand with a constant.
476///
477/// Also /p OptSelects contains previously optimised select-like instructions.
478/// If the current value uses one of the optimised values, we can optimise it
479/// further by replacing it with the corresponding value on the given branch
481 SelectOptimizeImpl::SelectLike &SI, bool isTrue,
482 SmallDenseMap<Instruction *, std::pair<Value *, Value *>, 2> &OptSelects,
483 BasicBlock *B) {
484 Value *V = isTrue ? SI.getTrueValue() : SI.getFalseValue();
485 if (V) {
486 auto *IV = dyn_cast<Instruction>(V);
487 if (IV && OptSelects.count(IV))
488 return isTrue ? OptSelects[IV].first : OptSelects[IV].second;
489 return V;
490 }
491
492 auto *BO = cast<BinaryOperator>(SI.getI());
493 assert((BO->getOpcode() == Instruction::Add ||
494 BO->getOpcode() == Instruction::Or ||
495 BO->getOpcode() == Instruction::Sub) &&
496 "Only currently handling Add, Or and Sub binary operators.");
497
498 auto *CBO = BO->clone();
499 auto CondIdx = SI.getConditionOpIndex();
500 auto *AuxI = cast<Instruction>(CBO->getOperand(CondIdx));
501 if (isa<ZExtInst>(AuxI) || isa<LShrOperator>(AuxI)) {
502 CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), 1));
503 } else {
504 assert((isa<AShrOperator>(AuxI) || isa<SExtInst>(AuxI)) &&
505 "Unexpected opcode");
506 CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), -1));
507 }
508
509 unsigned OtherIdx = 1 - CondIdx;
510 if (auto *IV = dyn_cast<Instruction>(CBO->getOperand(OtherIdx))) {
511 if (OptSelects.count(IV))
512 CBO->setOperand(OtherIdx,
513 isTrue ? OptSelects[IV].first : OptSelects[IV].second);
514 }
515 CBO->insertBefore(B->getTerminator());
516 return CBO;
517}
518
519void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
520 for (SelectGroup &ASI : ProfSIGroups) {
521 // The code transformation here is a modified version of the sinking
522 // transformation in CodeGenPrepare::optimizeSelectInst with a more
523 // aggressive strategy of which instructions to sink.
524 //
525 // TODO: eliminate the redundancy of logic transforming selects to branches
526 // by removing CodeGenPrepare::optimizeSelectInst and optimizing here
527 // selects for all cases (with and without profile information).
528
529 // Transform a sequence like this:
530 // start:
531 // %cmp = cmp uge i32 %a, %b
532 // %sel = select i1 %cmp, i32 %c, i32 %d
533 //
534 // Into:
535 // start:
536 // %cmp = cmp uge i32 %a, %b
537 // %cmp.frozen = freeze %cmp
538 // br i1 %cmp.frozen, label %select.true, label %select.false
539 // select.true:
540 // br label %select.end
541 // select.false:
542 // br label %select.end
543 // select.end:
544 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
545 //
546 // %cmp should be frozen, otherwise it may introduce undefined behavior.
547 // In addition, we may sink instructions that produce %c or %d into the
548 // destination(s) of the new branch.
549 // If the true or false blocks do not contain a sunken instruction, that
550 // block and its branch may be optimized away. In that case, one side of the
551 // first branch will point directly to select.end, and the corresponding PHI
552 // predecessor block will be the start block.
553
554 // Find all the instructions that can be soundly sunk to the true/false
555 // blocks. These are instructions that are computed solely for producing the
556 // operands of the select instructions in the group and can be sunk without
557 // breaking the semantics of the LLVM IR (e.g., cannot sink instructions
558 // with side effects).
559 SmallVector<std::stack<Instruction *>, 2> TrueSlices, FalseSlices;
560 typedef std::stack<Instruction *>::size_type StackSizeType;
561 StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;
562 for (SelectLike &SI : ASI.Selects) {
563 if (!isa<SelectInst>(SI.getI()))
564 continue;
565 // For each select, compute the sinkable dependence chains of the true and
566 // false operands.
567 if (auto *TI = dyn_cast_or_null<Instruction>(SI.getTrueValue())) {
568 std::stack<Instruction *> TrueSlice;
569 getExclBackwardsSlice(TI, TrueSlice, SI.getI(), true);
570 maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
571 TrueSlices.push_back(TrueSlice);
572 }
573 if (auto *FI = dyn_cast_or_null<Instruction>(SI.getFalseValue())) {
574 if (isa<SelectInst>(SI.getI()) || !FI->hasOneUse()) {
575 std::stack<Instruction *> FalseSlice;
576 getExclBackwardsSlice(FI, FalseSlice, SI.getI(), true);
577 maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
578 FalseSlices.push_back(FalseSlice);
579 }
580 }
581 }
582 // In the case of multiple select instructions in the same group, the order
583 // of non-dependent instructions (instructions of different dependence
584 // slices) in the true/false blocks appears to affect performance.
585 // Interleaving the slices seems to experimentally be the optimal approach.
586 // This interleaving scheduling allows for more ILP (with a natural downside
587 // of increasing a bit register pressure) compared to a simple ordering of
588 // one whole chain after another. One would expect that this ordering would
589 // not matter since the scheduling in the backend of the compiler would
590 // take care of it, but apparently the scheduler fails to deliver optimal
591 // ILP with a naive ordering here.
592 SmallVector<Instruction *, 2> TrueSlicesInterleaved, FalseSlicesInterleaved;
593 for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {
594 for (auto &S : TrueSlices) {
595 if (!S.empty()) {
596 TrueSlicesInterleaved.push_back(S.top());
597 S.pop();
598 }
599 }
600 }
601 for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {
602 for (auto &S : FalseSlices) {
603 if (!S.empty()) {
604 FalseSlicesInterleaved.push_back(S.top());
605 S.pop();
606 }
607 }
608 }
609
610 // We split the block containing the select(s) into two blocks.
611 SelectLike &SI = ASI.Selects.front();
612 SelectLike &LastSI = ASI.Selects.back();
613 BasicBlock *StartBlock = SI.getI()->getParent();
614 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI.getI()));
615 // With RemoveDIs turned off, SplitPt can be a dbg.* intrinsic. With
616 // RemoveDIs turned on, SplitPt would instead point to the next
617 // instruction. To match existing dbg.* intrinsic behaviour with RemoveDIs,
618 // tell splitBasicBlock that we want to include any DbgVariableRecords
619 // attached to SplitPt in the splice.
620 SplitPt.setHeadBit(true);
621 BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
622 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
623 // Delete the unconditional branch that was just created by the split.
624 StartBlock->getTerminator()->eraseFromParent();
625
626 // Move any debug/pseudo and auxiliary instructions that were in-between the
627 // select group to the newly-created end block.
629 auto DIt = SI.getI()->getIterator();
630 auto NIt = ASI.Selects.begin();
631 while (&*DIt != LastSI.getI()) {
632 if (NIt != ASI.Selects.end() && &*DIt == NIt->getI())
633 ++NIt;
634 else
635 SinkInstrs.push_back(&*DIt);
636 DIt++;
637 }
638 auto InsertionPoint = EndBlock->getFirstInsertionPt();
639 for (auto *DI : SinkInstrs)
640 DI->moveBeforePreserving(&*InsertionPoint);
641
642 // Duplicate implementation for DbgRecords, the non-instruction debug-info
643 // format. Helper lambda for moving DbgRecords to the end block.
644 auto TransferDbgRecords = [&](Instruction &I) {
645 for (auto &DbgRecord :
646 llvm::make_early_inc_range(I.getDbgRecordRange())) {
649 EndBlock->getFirstInsertionPt());
650 }
651 };
652
653 // Iterate over all instructions in between SI and LastSI, not including
654 // SI itself. These are all the variable assignments that happen "in the
655 // middle" of the select group.
656 auto R = make_range(std::next(SI.getI()->getIterator()),
657 std::next(LastSI.getI()->getIterator()));
658 llvm::for_each(R, TransferDbgRecords);
659
660 // These are the new basic blocks for the conditional branch.
661 // At least one will become an actual new basic block.
662 BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;
663 BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr;
664 // Checks if select-like instruction would materialise on the given branch
665 auto HasSelectLike = [](SelectGroup &SG, bool IsTrue) {
666 for (auto &SL : SG.Selects) {
667 if ((IsTrue ? SL.getTrueValue() : SL.getFalseValue()) == nullptr)
668 return true;
669 }
670 return false;
671 };
672 if (!TrueSlicesInterleaved.empty() || HasSelectLike(ASI, true)) {
673 TrueBlock = BasicBlock::Create(EndBlock->getContext(), "select.true.sink",
674 EndBlock->getParent(), EndBlock);
675 TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
676 TrueBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
677 for (Instruction *TrueInst : TrueSlicesInterleaved)
678 TrueInst->moveBefore(TrueBranch);
679 }
680 if (!FalseSlicesInterleaved.empty() || HasSelectLike(ASI, false)) {
681 FalseBlock =
682 BasicBlock::Create(EndBlock->getContext(), "select.false.sink",
683 EndBlock->getParent(), EndBlock);
684 FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
685 FalseBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
686 for (Instruction *FalseInst : FalseSlicesInterleaved)
687 FalseInst->moveBefore(FalseBranch);
688 }
689 // If there was nothing to sink, then arbitrarily choose the 'false' side
690 // for a new input value to the PHI.
691 if (TrueBlock == FalseBlock) {
692 assert(TrueBlock == nullptr &&
693 "Unexpected basic block transform while optimizing select");
694
695 FalseBlock = BasicBlock::Create(StartBlock->getContext(), "select.false",
696 EndBlock->getParent(), EndBlock);
697 auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
698 FalseBranch->setDebugLoc(SI.getI()->getDebugLoc());
699 }
700
701 // Insert the real conditional branch based on the original condition.
702 // If we did not create a new block for one of the 'true' or 'false' paths
703 // of the condition, it means that side of the branch goes to the end block
704 // directly and the path originates from the start block from the point of
705 // view of the new PHI.
706 BasicBlock *TT, *FT;
707 if (TrueBlock == nullptr) {
708 TT = EndBlock;
709 FT = FalseBlock;
710 TrueBlock = StartBlock;
711 } else if (FalseBlock == nullptr) {
712 TT = TrueBlock;
713 FT = EndBlock;
714 FalseBlock = StartBlock;
715 } else {
716 TT = TrueBlock;
717 FT = FalseBlock;
718 }
719 IRBuilder<> IB(SI.getI());
720 auto *CondFr =
721 IB.CreateFreeze(ASI.Condition, ASI.Condition->getName() + ".frozen");
722
724
725 // Use reverse iterator because later select may use the value of the
726 // earlier select, and we need to propagate value through earlier select
727 // to get the PHI operand.
728 InsertionPoint = EndBlock->begin();
729 for (SelectLike &SI : ASI.Selects) {
730 // The select itself is replaced with a PHI Node.
731 PHINode *PN = PHINode::Create(SI.getType(), 2, "");
733 PN->takeName(SI.getI());
734 // Current instruction might be a condition of some other group, so we
735 // need to replace it there to avoid dangling pointer
736 if (PN->getType()->isIntegerTy(1)) {
737 for (auto &SG : ProfSIGroups) {
738 if (SG.Condition == SI.getI())
739 SG.Condition = PN;
740 }
741 }
742 SI.getI()->replaceAllUsesWith(PN);
743 auto *TV = getTrueOrFalseValue(SI, true, INS, TrueBlock);
744 auto *FV = getTrueOrFalseValue(SI, false, INS, FalseBlock);
745 INS[PN] = {TV, FV};
746 PN->addIncoming(TV, TrueBlock);
747 PN->addIncoming(FV, FalseBlock);
748 PN->setDebugLoc(SI.getI()->getDebugLoc());
749 ++NumSelectsConverted;
750 }
751 IB.CreateCondBr(CondFr, TT, FT, SI.getI());
752
753 // Remove the old select instructions, now that they are not longer used.
754 for (SelectLike &SI : ASI.Selects)
755 SI.getI()->eraseFromParent();
756 }
757}
758
759void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB,
760 SelectGroups &SIGroups) {
761 // Represents something that can be considered as select instruction.
762 // Auxiliary instruction are instructions that depends on a condition and have
763 // zero or some constant value on True/False branch, such as:
764 // * ZExt(1bit)
765 // * SExt(1bit)
766 // * Not(1bit)
767 // * A(L)Shr(Val), ValBitSize - 1, where there is a condition like `Val <= 0`
768 // earlier in the BB. For conditions that check the sign of the Val compiler
769 // may generate shifts instead of ZExt/SExt.
770 struct SelectLikeInfo {
771 Value *Cond;
772 bool IsAuxiliary;
773 bool IsInverted;
774 unsigned ConditionIdx;
775 };
776
778 // Keeps visited comparisons to help identify AShr/LShr variants of auxiliary
779 // instructions.
781
782 // Check if the instruction is SelectLike or might be part of SelectLike
783 // expression, put information into SelectInfo and return the iterator to the
784 // inserted position.
785 auto ProcessSelectInfo = [&SelectInfo, &SeenCmp](Instruction *I) {
786 if (auto *Cmp = dyn_cast<CmpInst>(I)) {
787 SeenCmp.insert(Cmp);
788 return SelectInfo.end();
789 }
790
791 Value *Cond;
793 Cond->getType()->isIntegerTy(1)) {
794 bool Inverted = match(Cond, m_Not(m_Value(Cond)));
795 return SelectInfo.insert({I, {Cond, true, Inverted, 0}}).first;
796 }
797
798 if (match(I, m_Not(m_Value(Cond)))) {
799 return SelectInfo.insert({I, {Cond, true, true, 0}}).first;
800 }
801
802 // Select instruction are what we are usually looking for.
803 if (match(I, m_Select(m_Value(Cond), m_Value(), m_Value()))) {
804 bool Inverted = match(Cond, m_Not(m_Value(Cond)));
805 return SelectInfo.insert({I, {Cond, false, Inverted, 0}}).first;
806 }
807 Value *Val;
808 ConstantInt *Shift;
809 if (match(I, m_Shr(m_Value(Val), m_ConstantInt(Shift))) &&
810 I->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1) {
811 for (auto *CmpI : SeenCmp) {
812 auto Pred = CmpI->getPredicate();
813 if (Val != CmpI->getOperand(0))
814 continue;
815 if ((Pred == CmpInst::ICMP_SGT &&
816 match(CmpI->getOperand(1), m_ConstantInt<-1>())) ||
817 (Pred == CmpInst::ICMP_SGE &&
818 match(CmpI->getOperand(1), m_Zero())) ||
819 (Pred == CmpInst::ICMP_SLT &&
820 match(CmpI->getOperand(1), m_Zero())) ||
821 (Pred == CmpInst::ICMP_SLE &&
822 match(CmpI->getOperand(1), m_ConstantInt<-1>()))) {
823 bool Inverted =
824 Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE;
825 return SelectInfo.insert({I, {CmpI, true, Inverted, 0}}).first;
826 }
827 }
828 return SelectInfo.end();
829 }
830
831 // An BinOp(Aux(X), Y) can also be treated like a select, with condition X
832 // and values Y|1 and Y.
833 // `Aux` can be either `ZExt(1bit)`, `SExt(1bit)` or `XShr(Val), ValBitSize
834 // - 1` `BinOp` can be Add, Sub, Or
835 Value *X;
836 auto MatchZExtOrSExtPattern =
838 auto MatchShiftPattern =
840
841 // This check is unnecessary, but it prevents costly access to the
842 // SelectInfo map.
843 if ((match(I, MatchZExtOrSExtPattern) && X->getType()->isIntegerTy(1)) ||
844 (match(I, MatchShiftPattern) &&
845 X->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1)) {
846 if (I->getOpcode() != Instruction::Add &&
847 I->getOpcode() != Instruction::Sub &&
848 I->getOpcode() != Instruction::Or)
849 return SelectInfo.end();
850
851 if (I->getOpcode() == Instruction::Or && I->getType()->isIntegerTy(1))
852 return SelectInfo.end();
853
854 // Iterate through operands and find dependant on recognised sign
855 // extending auxiliary select-like instructions. The operand index does
856 // not matter for Add and Or. However, for Sub, we can only safely
857 // transform when the operand is second.
858 unsigned Idx = I->getOpcode() == Instruction::Sub ? 1 : 0;
859 for (; Idx < 2; Idx++) {
860 auto *Op = I->getOperand(Idx);
861 auto It = SelectInfo.find(Op);
862 if (It != SelectInfo.end() && It->second.IsAuxiliary) {
863 Cond = It->second.Cond;
864 bool Inverted = It->second.IsInverted;
865 return SelectInfo.insert({I, {Cond, false, Inverted, Idx}}).first;
866 }
867 }
868 }
869 return SelectInfo.end();
870 };
871
872 bool AlreadyProcessed = false;
873 BasicBlock::iterator BBIt = BB.begin();
875 while (BBIt != BB.end()) {
876 Instruction *I = &*BBIt++;
877 if (I->isDebugOrPseudoInst())
878 continue;
879
880 if (!AlreadyProcessed)
881 It = ProcessSelectInfo(I);
882 else
883 AlreadyProcessed = false;
884
885 if (It == SelectInfo.end() || It->second.IsAuxiliary)
886 continue;
887
889 continue;
890
891 Value *Cond = It->second.Cond;
892 // Vector conditions are not supported.
893 if (!Cond->getType()->isIntegerTy(1))
894 continue;
895
896 SelectGroup SIGroup = {Cond, {}};
897 SIGroup.Selects.emplace_back(I, It->second.IsInverted,
898 It->second.ConditionIdx);
899
900 // If the select type is not supported, no point optimizing it.
901 // Instruction selection will take care of it.
902 if (!isSelectKindSupported(SIGroup.Selects.front()))
903 continue;
904
905 while (BBIt != BB.end()) {
906 Instruction *NI = &*BBIt;
907 // Debug/pseudo instructions should be skipped and not prevent the
908 // formation of a select group.
909 if (NI->isDebugOrPseudoInst()) {
910 ++BBIt;
911 continue;
912 }
913
914 It = ProcessSelectInfo(NI);
915 if (It == SelectInfo.end()) {
916 AlreadyProcessed = true;
917 break;
918 }
919
920 // Auxiliary with same condition
921 auto [CurrCond, IsAux, IsRev, CondIdx] = It->second;
922 if (Cond != CurrCond) {
923 AlreadyProcessed = true;
924 break;
925 }
926
927 if (!IsAux)
928 SIGroup.Selects.emplace_back(NI, IsRev, CondIdx);
929 ++BBIt;
930 }
931 LLVM_DEBUG({
932 dbgs() << "New Select group (" << SIGroup.Selects.size() << ") with\n";
933 for (auto &SI : SIGroup.Selects)
934 dbgs() << " " << *SI.getI() << "\n";
935 });
936
937 SIGroups.push_back(SIGroup);
938 }
939}
940
941void SelectOptimizeImpl::findProfitableSIGroupsBase(
942 SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
943 for (SelectGroup &ASI : SIGroups) {
944 ++NumSelectOptAnalyzed;
945 if (isConvertToBranchProfitableBase(ASI))
946 ProfSIGroups.push_back(ASI);
947 }
948}
949
952 LLVM_DEBUG(dbgs() << Rem.getMsg() << "\n");
953 ORE->emit(Rem);
954}
955
956void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops(
957 const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
958 NumSelectOptAnalyzed += SIGroups.size();
959 // For each select group in an inner-most loop,
960 // a branch is more preferable than a select/conditional-move if:
961 // i) conversion to branches for all the select groups of the loop satisfies
962 // loop-level heuristics including reducing the loop's critical path by
963 // some threshold (see SelectOptimizeImpl::checkLoopHeuristics); and
964 // ii) the total cost of the select group is cheaper with a branch compared
965 // to its predicated version. The cost is in terms of latency and the cost
966 // of a select group is the cost of its most expensive select instruction
967 // (assuming infinite resources and thus fully leveraging available ILP).
968
970 CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
971 {Scaled64::getZero(), Scaled64::getZero()}};
972 if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
973 !checkLoopHeuristics(L, LoopCost)) {
974 return;
975 }
976
977 for (SelectGroup &ASI : SIGroups) {
978 // Assuming infinite resources, the cost of a group of instructions is the
979 // cost of the most expensive instruction of the group.
980 Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
981 for (SelectLike &SI : ASI.Selects) {
982 SelectCost = std::max(SelectCost, InstCostMap[SI.getI()].PredCost);
983 BranchCost = std::max(BranchCost, InstCostMap[SI.getI()].NonPredCost);
984 }
985 if (BranchCost < SelectCost) {
986 OptimizationRemark OR(DEBUG_TYPE, "SelectOpti",
987 ASI.Selects.front().getI());
988 OR << "Profitable to convert to branch (loop analysis). BranchCost="
989 << BranchCost.toString() << ", SelectCost=" << SelectCost.toString()
990 << ". ";
991 EmitAndPrintRemark(ORE, OR);
992 ++NumSelectConvertedLoop;
993 ProfSIGroups.push_back(ASI);
994 } else {
995 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti",
996 ASI.Selects.front().getI());
997 ORmiss << "Select is more profitable (loop analysis). BranchCost="
998 << BranchCost.toString()
999 << ", SelectCost=" << SelectCost.toString() << ". ";
1000 EmitAndPrintRemark(ORE, ORmiss);
1001 }
1002 }
1003}
1004
1005bool SelectOptimizeImpl::isConvertToBranchProfitableBase(
1006 const SelectGroup &ASI) {
1007 const SelectLike &SI = ASI.Selects.front();
1008 LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI.getI()
1009 << "\n");
1010 OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI.getI());
1011 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI.getI());
1012
1013 // Skip cold basic blocks. Better to optimize for size for cold blocks.
1014 if (PSI->isColdBlock(SI.getI()->getParent(), BFI)) {
1015 ++NumSelectColdBB;
1016 ORmiss << "Not converted to branch because of cold basic block. ";
1017 EmitAndPrintRemark(ORE, ORmiss);
1018 return false;
1019 }
1020
1021 // If unpredictable, branch form is less profitable.
1022 if (SI.getI()->getMetadata(LLVMContext::MD_unpredictable)) {
1023 ++NumSelectUnPred;
1024 ORmiss << "Not converted to branch because of unpredictable branch. ";
1025 EmitAndPrintRemark(ORE, ORmiss);
1026 return false;
1027 }
1028
1029 // If highly predictable, branch form is more profitable, unless a
1030 // predictable select is inexpensive in the target architecture.
1031 if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) {
1032 ++NumSelectConvertedHighPred;
1033 OR << "Converted to branch because of highly predictable branch. ";
1034 EmitAndPrintRemark(ORE, OR);
1035 return true;
1036 }
1037
1038 // Look for expensive instructions in the cold operand's (if any) dependence
1039 // slice of any of the selects in the group.
1040 if (hasExpensiveColdOperand(ASI)) {
1041 ++NumSelectConvertedExpColdOperand;
1042 OR << "Converted to branch because of expensive cold operand.";
1043 EmitAndPrintRemark(ORE, OR);
1044 return true;
1045 }
1046
1047 ORmiss << "Not profitable to convert to branch (base heuristic).";
1048 EmitAndPrintRemark(ORE, ORmiss);
1049 return false;
1050}
1051
1053 uint64_t Denominator) {
1054 return (Numerator + (Denominator / 2)) / Denominator;
1055}
1056
1057static bool extractBranchWeights(const SelectOptimizeImpl::SelectLike SI,
1058 uint64_t &TrueVal, uint64_t &FalseVal) {
1059 if (isa<SelectInst>(SI.getI()))
1060 return extractBranchWeights(*SI.getI(), TrueVal, FalseVal);
1061 return false;
1062}
1063
1064bool SelectOptimizeImpl::hasExpensiveColdOperand(const SelectGroup &ASI) {
1065 bool ColdOperand = false;
1066 uint64_t TrueWeight, FalseWeight, TotalWeight;
1067 if (extractBranchWeights(ASI.Selects.front(), TrueWeight, FalseWeight)) {
1068 uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
1069 TotalWeight = TrueWeight + FalseWeight;
1070 // Is there a path with frequency <ColdOperandThreshold% (default:20%) ?
1071 ColdOperand = TotalWeight * ColdOperandThreshold > 100 * MinWeight;
1072 } else if (PSI->hasProfileSummary()) {
1073 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti",
1074 ASI.Selects.front().getI());
1075 ORmiss << "Profile data available but missing branch-weights metadata for "
1076 "select instruction. ";
1077 EmitAndPrintRemark(ORE, ORmiss);
1078 }
1079 if (!ColdOperand)
1080 return false;
1081 // Check if the cold path's dependence slice is expensive for any of the
1082 // selects of the group.
1083 for (SelectLike SI : ASI.Selects) {
1084 Instruction *ColdI = nullptr;
1085 uint64_t HotWeight;
1086 if (TrueWeight < FalseWeight) {
1087 ColdI = dyn_cast_or_null<Instruction>(SI.getTrueValue());
1088 HotWeight = FalseWeight;
1089 } else {
1090 ColdI = dyn_cast_or_null<Instruction>(SI.getFalseValue());
1091 HotWeight = TrueWeight;
1092 }
1093 if (ColdI) {
1094 std::stack<Instruction *> ColdSlice;
1095 getExclBackwardsSlice(ColdI, ColdSlice, SI.getI());
1096 InstructionCost SliceCost = 0;
1097 while (!ColdSlice.empty()) {
1098 SliceCost += TTI->getInstructionCost(ColdSlice.top(),
1100 ColdSlice.pop();
1101 }
1102 // The colder the cold value operand of the select is the more expensive
1103 // the cmov becomes for computing the cold value operand every time. Thus,
1104 // the colder the cold operand is the more its cost counts.
1105 // Get nearest integer cost adjusted for coldness.
1106 InstructionCost AdjSliceCost =
1107 divideNearest(SliceCost * HotWeight, TotalWeight);
1108 if (AdjSliceCost >=
1110 return true;
1111 }
1112 }
1113 return false;
1114}
1115
1116// Check if it is safe to move LoadI next to the SI.
1117// Conservatively assume it is safe only if there is no instruction
1118// modifying memory in-between the load and the select instruction.
1119static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI) {
1120 // Assume loads from different basic blocks are unsafe to move.
1121 if (LoadI->getParent() != SI->getParent())
1122 return false;
1123 auto It = LoadI->getIterator();
1124 while (&*It != SI) {
1125 if (It->mayWriteToMemory())
1126 return false;
1127 It++;
1128 }
1129 return true;
1130}
1131
1132// For a given source instruction, collect its backwards dependence slice
1133// consisting of instructions exclusively computed for the purpose of producing
1134// the operands of the source instruction. As an approximation
1135// (sufficiently-accurate in practice), we populate this set with the
1136// instructions of the backwards dependence slice that only have one-use and
1137// form an one-use chain that leads to the source instruction.
1138void SelectOptimizeImpl::getExclBackwardsSlice(Instruction *I,
1139 std::stack<Instruction *> &Slice,
1140 Instruction *SI,
1141 bool ForSinking) {
1143 std::queue<Instruction *> Worklist;
1144 Worklist.push(I);
1145 while (!Worklist.empty()) {
1146 Instruction *II = Worklist.front();
1147 Worklist.pop();
1148
1149 // Avoid cycles.
1150 if (!Visited.insert(II).second)
1151 continue;
1152
1153 if (!II->hasOneUse())
1154 continue;
1155
1156 // Cannot soundly sink instructions with side-effects.
1157 // Terminator or phi instructions cannot be sunk.
1158 // Avoid sinking other select instructions (should be handled separetely).
1159 if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() ||
1160 isa<SelectInst>(II) || isa<PHINode>(II)))
1161 continue;
1162
1163 // Avoid sinking loads in order not to skip state-modifying instructions,
1164 // that may alias with the loaded address.
1165 // Only allow sinking of loads within the same basic block that are
1166 // conservatively proven to be safe.
1167 if (ForSinking && II->mayReadFromMemory() && !isSafeToSinkLoad(II, SI))
1168 continue;
1169
1170 // Avoid considering instructions with less frequency than the source
1171 // instruction (i.e., avoid colder code regions of the dependence slice).
1172 if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent()))
1173 continue;
1174
1175 // Eligible one-use instruction added to the dependence slice.
1176 Slice.push(II);
1177
1178 // Explore all the operands of the current instruction to expand the slice.
1179 for (Value *Op : II->operand_values())
1180 if (auto *OpI = dyn_cast<Instruction>(Op))
1181 Worklist.push(OpI);
1182 }
1183}
1184
1185bool SelectOptimizeImpl::isSelectHighlyPredictable(const SelectLike SI) {
1186 uint64_t TrueWeight, FalseWeight;
1187 if (extractBranchWeights(SI, TrueWeight, FalseWeight)) {
1188 uint64_t Max = std::max(TrueWeight, FalseWeight);
1189 uint64_t Sum = TrueWeight + FalseWeight;
1190 if (Sum != 0) {
1191 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
1192 if (Probability > TTI->getPredictableBranchThreshold())
1193 return true;
1194 }
1195 }
1196 return false;
1197}
1198
1199bool SelectOptimizeImpl::checkLoopHeuristics(const Loop *L,
1200 const CostInfo LoopCost[2]) {
1201 // Loop-level checks to determine if a non-predicated version (with branches)
1202 // of the loop is more profitable than its predicated version.
1203
1205 return true;
1206
1207 OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti",
1208 L->getHeader()->getFirstNonPHI());
1209
1210 if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
1211 LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
1212 ORmissL << "No select conversion in the loop due to no reduction of loop's "
1213 "critical path. ";
1214 EmitAndPrintRemark(ORE, ORmissL);
1215 return false;
1216 }
1217
1218 Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
1219 LoopCost[1].PredCost - LoopCost[1].NonPredCost};
1220
1221 // Profitably converting to branches need to reduce the loop's critical path
1222 // by at least some threshold (absolute gain of GainCycleThreshold cycles and
1223 // relative gain of 12.5%).
1224 if (Gain[1] < Scaled64::get(GainCycleThreshold) ||
1225 Gain[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) {
1226 Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost;
1227 ORmissL << "No select conversion in the loop due to small reduction of "
1228 "loop's critical path. Gain="
1229 << Gain[1].toString()
1230 << ", RelativeGain=" << RelativeGain.toString() << "%. ";
1231 EmitAndPrintRemark(ORE, ORmissL);
1232 return false;
1233 }
1234
1235 // If the loop's critical path involves loop-carried dependences, the gradient
1236 // of the gain needs to be at least GainGradientThreshold% (defaults to 25%).
1237 // This check ensures that the latency reduction for the loop's critical path
1238 // keeps decreasing with sufficient rate beyond the two analyzed loop
1239 // iterations.
1240 if (Gain[1] > Gain[0]) {
1241 Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) /
1242 (LoopCost[1].PredCost - LoopCost[0].PredCost);
1243 if (GradientGain < Scaled64::get(GainGradientThreshold)) {
1244 ORmissL << "No select conversion in the loop due to small gradient gain. "
1245 "GradientGain="
1246 << GradientGain.toString() << "%. ";
1247 EmitAndPrintRemark(ORE, ORmissL);
1248 return false;
1249 }
1250 }
1251 // If the gain decreases it is not profitable to convert.
1252 else if (Gain[1] < Gain[0]) {
1253 ORmissL
1254 << "No select conversion in the loop due to negative gradient gain. ";
1255 EmitAndPrintRemark(ORE, ORmissL);
1256 return false;
1257 }
1258
1259 // Non-predicated version of the loop is more profitable than its
1260 // predicated version.
1261 return true;
1262}
1263
1264// Computes instruction and loop-critical-path costs for both the predicated
1265// and non-predicated version of the given loop.
1266// Returns false if unable to compute these costs due to invalid cost of loop
1267// instruction(s).
1268bool SelectOptimizeImpl::computeLoopCosts(
1269 const Loop *L, const SelectGroups &SIGroups,
1270 DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {
1271 LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop "
1272 << L->getHeader()->getName() << "\n");
1273 const auto SImap = getSImap(SIGroups);
1274 const auto SGmap = getSGmap(SIGroups);
1275 // Compute instruction and loop-critical-path costs across two iterations for
1276 // both predicated and non-predicated version.
1277 const unsigned Iterations = 2;
1278 for (unsigned Iter = 0; Iter < Iterations; ++Iter) {
1279 // Cost of the loop's critical path.
1280 CostInfo &MaxCost = LoopCost[Iter];
1281 for (BasicBlock *BB : L->getBlocks()) {
1282 for (const Instruction &I : *BB) {
1283 if (I.isDebugOrPseudoInst())
1284 continue;
1285 // Compute the predicated and non-predicated cost of the instruction.
1286 Scaled64 IPredCost = Scaled64::getZero(),
1287 INonPredCost = Scaled64::getZero();
1288
1289 // Assume infinite resources that allow to fully exploit the available
1290 // instruction-level parallelism.
1291 // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost)
1292 for (const Use &U : I.operands()) {
1293 auto UI = dyn_cast<Instruction>(U.get());
1294 if (!UI)
1295 continue;
1296 if (InstCostMap.count(UI)) {
1297 IPredCost = std::max(IPredCost, InstCostMap[UI].PredCost);
1298 INonPredCost = std::max(INonPredCost, InstCostMap[UI].NonPredCost);
1299 }
1300 }
1301 auto ILatency = computeInstCost(&I);
1302 if (!ILatency) {
1303 OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I);
1304 ORmissL << "Invalid instruction cost preventing analysis and "
1305 "optimization of the inner-most loop containing this "
1306 "instruction. ";
1307 EmitAndPrintRemark(ORE, ORmissL);
1308 return false;
1309 }
1310 IPredCost += Scaled64::get(*ILatency);
1311 INonPredCost += Scaled64::get(*ILatency);
1312
1313 // For a select that can be converted to branch,
1314 // compute its cost as a branch (non-predicated cost).
1315 //
1316 // BranchCost = PredictedPathCost + MispredictCost
1317 // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb
1318 // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate
1319 if (SImap.contains(&I)) {
1320 auto SI = SImap.at(&I);
1321 const auto *SG = SGmap.at(&I);
1322 Scaled64 TrueOpCost = SI.getOpCostOnBranch(true, InstCostMap, TTI);
1323 Scaled64 FalseOpCost = SI.getOpCostOnBranch(false, InstCostMap, TTI);
1324 Scaled64 PredictedPathCost =
1325 getPredictedPathCost(TrueOpCost, FalseOpCost, SI);
1326
1327 Scaled64 CondCost = Scaled64::getZero();
1328 if (auto *CI = dyn_cast<Instruction>(SG->Condition))
1329 if (InstCostMap.count(CI))
1330 CondCost = InstCostMap[CI].NonPredCost;
1331 Scaled64 MispredictCost = getMispredictionCost(SI, CondCost);
1332
1333 INonPredCost = PredictedPathCost + MispredictCost;
1334 }
1335 LLVM_DEBUG(dbgs() << " " << ILatency << "/" << IPredCost << "/"
1336 << INonPredCost << " for " << I << "\n");
1337
1338 InstCostMap[&I] = {IPredCost, INonPredCost};
1339 MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
1340 MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
1341 }
1342 }
1343 LLVM_DEBUG(dbgs() << "Iteration " << Iter + 1
1344 << " MaxCost = " << MaxCost.PredCost << " "
1345 << MaxCost.NonPredCost << "\n");
1346 }
1347 return true;
1348}
1349
1351SelectOptimizeImpl::getSImap(const SelectGroups &SIGroups) {
1353 for (const SelectGroup &ASI : SIGroups)
1354 for (const SelectLike &SI : ASI.Selects)
1355 SImap.try_emplace(SI.getI(), SI);
1356 return SImap;
1357}
1358
1360SelectOptimizeImpl::getSGmap(const SelectGroups &SIGroups) {
1362 for (const SelectGroup &ASI : SIGroups)
1363 for (const SelectLike &SI : ASI.Selects)
1364 SImap.try_emplace(SI.getI(), &ASI);
1365 return SImap;
1366}
1367
1368std::optional<uint64_t>
1369SelectOptimizeImpl::computeInstCost(const Instruction *I) {
1370 InstructionCost ICost =
1372 if (auto OC = ICost.getValue())
1373 return std::optional<uint64_t>(*OC);
1374 return std::nullopt;
1375}
1376
1378SelectOptimizeImpl::getMispredictionCost(const SelectLike SI,
1379 const Scaled64 CondCost) {
1380 uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
1381
1382 // Account for the default misprediction rate when using a branch
1383 // (conservatively set to 25% by default).
1384 uint64_t MispredictRate = MispredictDefaultRate;
1385 // If the select condition is obviously predictable, then the misprediction
1386 // rate is zero.
1387 if (isSelectHighlyPredictable(SI))
1388 MispredictRate = 0;
1389
1390 // CondCost is included to account for cases where the computation of the
1391 // condition is part of a long dependence chain (potentially loop-carried)
1392 // that would delay detection of a misprediction and increase its cost.
1393 Scaled64 MispredictCost =
1394 std::max(Scaled64::get(MispredictPenalty), CondCost) *
1395 Scaled64::get(MispredictRate);
1396 MispredictCost /= Scaled64::get(100);
1397
1398 return MispredictCost;
1399}
1400
1401// Returns the cost of a branch when the prediction is correct.
1402// TrueCost * TrueProbability + FalseCost * FalseProbability.
1404SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
1405 const SelectLike SI) {
1406 Scaled64 PredPathCost;
1407 uint64_t TrueWeight, FalseWeight;
1408 if (extractBranchWeights(SI, TrueWeight, FalseWeight)) {
1409 uint64_t SumWeight = TrueWeight + FalseWeight;
1410 if (SumWeight != 0) {
1411 PredPathCost = TrueCost * Scaled64::get(TrueWeight) +
1412 FalseCost * Scaled64::get(FalseWeight);
1413 PredPathCost /= Scaled64::get(SumWeight);
1414 return PredPathCost;
1415 }
1416 }
1417 // Without branch weight metadata, we assume 75% for the one path and 25% for
1418 // the other, and pick the result with the biggest cost.
1419 PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,
1420 FalseCost * Scaled64::get(3) + TrueCost);
1421 PredPathCost /= Scaled64::get(4);
1422 return PredPathCost;
1423}
1424
1425bool SelectOptimizeImpl::isSelectKindSupported(const SelectLike SI) {
1427 if (SI.getType()->isVectorTy())
1429 else
1431 return TLI->isSelectSupported(SelectKind);
1432}
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
static bool runOnFunction(Function &F, bool PostInlining)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define DEBUG_TYPE
Hexagon Hardware Loops
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
#define P(N)
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI)
Optimize selects
static cl::opt< unsigned > ColdOperandMaxCostMultiplier("cold-operand-max-cost-multiplier", cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " "slice of a cold operand to be considered inexpensive."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > ColdOperandThreshold("cold-operand-threshold", cl::desc("Maximum frequency of path for an operand to be considered cold."), cl::init(20), cl::Hidden)
static cl::opt< bool > DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, cl::init(false), cl::desc("Disable loop-level heuristics."))
static cl::opt< unsigned > GainCycleThreshold("select-opti-loop-cycle-gain-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
static cl::opt< unsigned > MispredictDefaultRate("mispredict-default-rate", cl::Hidden, cl::init(25), cl::desc("Default mispredict rate (initialized to 25%)."))
static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE, DiagnosticInfoOptimizationBase &Rem)
static cl::opt< unsigned > GainGradientThreshold("select-opti-loop-gradient-gain-threshold", cl::desc("Gradient gain threshold (%)."), cl::init(25), cl::Hidden)
static cl::opt< unsigned > GainRelativeThreshold("select-opti-loop-relative-gain-threshold", cl::desc("Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), cl::init(8), cl::Hidden)
This file contains the declaration of the SelectOptimizePass class, its corresponding pass name is se...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
static const uint32_t IV[8]
Definition: blake3_impl.h:78
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:577
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Analysis pass which computes BlockFrequencyInfo.
Legacy analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:702
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:703
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:700
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:701
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
This class represents an Operation in the Expression.
Base class for non-instruction debug metadata records that have positions within IR.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition: DenseMap.h:226
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Common features for diagnostics dealing with optimization remarks that are used by both IR and MIR pa...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:97
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:468
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
iterator end() const
iterator begin() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:593
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:692
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
Simple representation of a scaled number.
Definition: ScaledNumber.h:493
static ScaledNumber get(uint64_t N)
Definition: ScaledNumber.h:526
static ScaledNumber getZero()
Definition: ScaledNumber.h:521
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:81
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
Analysis pass providing the TargetTransformInfo.
virtual bool isSelectSupported(SelectSupportKind) const
SelectSupportKind
Enum that describes what type of support for selects the target has.
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
Target-Independent Code Generator Pass Configuration Options.
Provide an instruction scheduling machine model to CodeGen passes.
const MCSchedModel * getMCSchedModel() const
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool shouldTreatInstructionLikeSelect(const Instruction *I) const
Should the Select Optimization pass treat the given instruction like a select, potentially converting...
@ TCK_Latency
The latency of instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
bool enableSelectOptimize() const
Should the Select Optimization pass be enabled and ran.
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
@ TCC_Expensive
The cost of a 'div' instruction on x86.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1732
FunctionPass * createSelectOptimizePass()
This pass converts conditional moves to conditional jumps when profitable.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
constexpr T divideNearest(U Numerator, V Denominator)
Returns (Numerator / Denominator) rounded by round-half-up.
Definition: MathExtras.h:467
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
void initializeSelectOptimizePass(PassRegistry &)
unsigned MispredictPenalty
Definition: MCSchedule.h:309