47#define DEBUG_TYPE "select-optimize"
50 "Number of select groups considered for conversion to branch");
52 "Number of select groups converted due to expensive cold operand");
54 "Number of select groups converted due to high-predictability");
56 "Number of select groups not converted due to unpredictability");
58 "Number of select groups not converted due to cold basic block");
60 "Number of select groups converted due to loop-level analysis");
61STATISTIC(NumSelectsConverted,
"Number of selects converted");
64 "cold-operand-threshold",
65 cl::desc(
"Maximum frequency of path for an operand to be considered cold."),
69 "cold-operand-max-cost-multiplier",
70 cl::desc(
"Maximum cost multiplier of TCC_expensive for the dependence "
71 "slice of a cold operand to be considered inexpensive."),
76 cl::desc(
"Gradient gain threshold (%)."),
81 cl::desc(
"Minimum gain per loop (in cycles) threshold."),
85 "select-opti-loop-relative-gain-threshold",
87 "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
92 cl::desc(
"Default mispredict rate (initialized to 25%)."));
97 cl::desc(
"Disable loop-level heuristics."));
101class SelectOptimizeImpl {
113 SelectOptimizeImpl() =
default;
124 Scaled64 NonPredCost;
135 bool Inverted =
false;
142 SelectLike(
Instruction *I,
bool Inverted =
false,
unsigned CondIdx = 0)
143 :
I(
I), Inverted(Inverted), CondIdx(CondIdx) {}
150 unsigned getConditionOpIndex() {
return CondIdx; };
156 Value *getTrueValue(
bool HonorInverts =
true)
const {
157 if (Inverted && HonorInverts)
158 return getFalseValue(
false);
159 if (
auto *Sel = dyn_cast<SelectInst>(I))
160 return Sel->getTrueValue();
163 if (isa<BinaryOperator>(I))
172 Value *getFalseValue(
bool HonorInverts =
true)
const {
173 if (Inverted && HonorInverts)
174 return getTrueValue(
false);
175 if (
auto *Sel = dyn_cast<SelectInst>(I))
176 return Sel->getFalseValue();
180 if (
auto *BO = dyn_cast<BinaryOperator>(I))
181 return BO->getOperand(1 - CondIdx);
189 Scaled64 getOpCostOnBranch(
192 auto *
V = IsTrue ? getTrueValue() : getFalseValue();
194 if (
auto *
IV = dyn_cast<Instruction>(V)) {
195 auto It = InstCostMap.
find(
IV);
196 return It != InstCostMap.
end() ? It->second.NonPredCost
207 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
208 {TTI::OK_UniformConstantValue, TTI::OP_PowerOf2});
210 if (
auto *OpI = dyn_cast<Instruction>(
I->getOperand(1 - CondIdx))) {
211 auto It = InstCostMap.
find(OpI);
212 if (It != InstCostMap.
end())
213 TotalCost += It->second.NonPredCost;
237 void optimizeSelectsBase(
Function &
F, SelectGroups &ProfSIGroups);
238 void optimizeSelectsInnerLoops(
Function &
F, SelectGroups &ProfSIGroups);
242 void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
245 void collectSelectGroups(
BasicBlock &BB, SelectGroups &SIGroups);
249 void findProfitableSIGroupsBase(SelectGroups &SIGroups,
250 SelectGroups &ProfSIGroups);
251 void findProfitableSIGroupsInnerLoops(
const Loop *L, SelectGroups &SIGroups,
252 SelectGroups &ProfSIGroups);
256 bool isConvertToBranchProfitableBase(
const SelectGroup &ASI);
261 bool hasExpensiveColdOperand(
const SelectGroup &ASI);
266 void getExclBackwardsSlice(
Instruction *
I, std::stack<Instruction *> &Slice,
270 bool isSelectHighlyPredictable(
const SelectLike SI);
274 bool checkLoopHeuristics(
const Loop *L,
const CostInfo LoopDepth[2]);
278 bool computeLoopCosts(
const Loop *L,
const SelectGroups &SIGroups,
284 getSImap(
const SelectGroups &SIGroups);
289 getSGmap(
const SelectGroups &SIGroups);
292 std::optional<uint64_t> computeInstCost(
const Instruction *
I);
295 Scaled64 getMispredictionCost(
const SelectLike SI,
const Scaled64 CondCost);
298 Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
299 const SelectLike SI);
302 bool isSelectKindSupported(
const SelectLike SI);
306 SelectOptimizeImpl Impl;
316 return Impl.runOnFunction(
F, *
this);
333 SelectOptimizeImpl Impl(TM);
334 return Impl.run(
F,
FAM);
337char SelectOptimize::ID = 0;
354 TSI = TM->getSubtargetImpl(
F);
370 .getCachedResult<ProfileSummaryAnalysis>(*
F.getParent());
371 assert(PSI &&
"This pass requires module analysis pass `profile-summary`!");
380 TSchedModel.
init(TSI);
382 bool Changed = optimizeSelects(
F);
388 TSI =
TM->getSubtargetImpl(
F);
408 TSchedModel.
init(TSI);
414 return optimizeSelects(
F);
417bool SelectOptimizeImpl::optimizeSelects(
Function &
F) {
419 SelectGroups ProfSIGroups;
421 optimizeSelectsBase(
F, ProfSIGroups);
423 optimizeSelectsInnerLoops(
F, ProfSIGroups);
427 convertProfitableSIGroups(ProfSIGroups);
430 return !ProfSIGroups.empty();
433void SelectOptimizeImpl::optimizeSelectsBase(
Function &
F,
434 SelectGroups &ProfSIGroups) {
436 SelectGroups SIGroups;
440 if (L &&
L->isInnermost())
442 collectSelectGroups(BB, SIGroups);
446 findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
449void SelectOptimizeImpl::optimizeSelectsInnerLoops(
Function &
F,
450 SelectGroups &ProfSIGroups) {
453 for (
unsigned long i = 0; i <
Loops.size(); ++i)
454 for (
Loop *ChildL :
Loops[i]->getSubLoops())
455 Loops.push_back(ChildL);
458 if (!
L->isInnermost())
461 SelectGroups SIGroups;
463 collectSelectGroups(*BB, SIGroups);
465 findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
481 SelectOptimizeImpl::SelectLike &SI,
bool isTrue,
484 Value *V = isTrue ? SI.getTrueValue() : SI.getFalseValue();
486 auto *
IV = dyn_cast<Instruction>(V);
487 if (
IV && OptSelects.count(
IV))
488 return isTrue ? OptSelects[
IV].first : OptSelects[
IV].second;
492 auto *BO = cast<BinaryOperator>(SI.getI());
493 assert((BO->getOpcode() == Instruction::Add ||
494 BO->getOpcode() == Instruction::Or ||
495 BO->getOpcode() == Instruction::Sub) &&
496 "Only currently handling Add, Or and Sub binary operators.");
498 auto *CBO = BO->clone();
499 auto CondIdx = SI.getConditionOpIndex();
500 auto *AuxI = cast<Instruction>(CBO->getOperand(CondIdx));
501 if (isa<ZExtInst>(AuxI) || isa<LShrOperator>(AuxI)) {
502 CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), 1));
504 assert((isa<AShrOperator>(AuxI) || isa<SExtInst>(AuxI)) &&
505 "Unexpected opcode");
506 CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), -1));
509 unsigned OtherIdx = 1 - CondIdx;
510 if (
auto *
IV = dyn_cast<Instruction>(CBO->getOperand(OtherIdx))) {
511 if (OptSelects.count(
IV))
512 CBO->setOperand(OtherIdx,
513 isTrue ? OptSelects[
IV].first : OptSelects[
IV].second);
515 CBO->insertBefore(
B->getTerminator());
519void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
520 for (SelectGroup &ASI : ProfSIGroups) {
560 typedef std::stack<Instruction *>::size_type StackSizeType;
561 StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;
562 for (SelectLike &SI : ASI.Selects) {
563 if (!isa<SelectInst>(
SI.getI()))
567 if (
auto *TI = dyn_cast_or_null<Instruction>(
SI.getTrueValue())) {
568 std::stack<Instruction *> TrueSlice;
569 getExclBackwardsSlice(TI, TrueSlice,
SI.getI(),
true);
570 maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
573 if (
auto *FI = dyn_cast_or_null<Instruction>(
SI.getFalseValue())) {
574 if (isa<SelectInst>(
SI.getI()) || !FI->hasOneUse()) {
575 std::stack<Instruction *> FalseSlice;
576 getExclBackwardsSlice(FI, FalseSlice,
SI.getI(),
true);
577 maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
593 for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {
594 for (
auto &S : TrueSlices) {
596 TrueSlicesInterleaved.
push_back(S.top());
601 for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {
602 for (
auto &S : FalseSlices) {
604 FalseSlicesInterleaved.
push_back(S.top());
611 SelectLike &
SI = ASI.Selects.front();
612 SelectLike &LastSI = ASI.Selects.back();
620 SplitPt.setHeadBit(
true);
622 BFI->setBlockFreq(EndBlock,
BFI->getBlockFreq(StartBlock));
629 auto DIt =
SI.getI()->getIterator();
630 auto NIt = ASI.Selects.begin();
631 while (&*DIt != LastSI.getI()) {
632 if (NIt != ASI.Selects.end() && &*DIt == NIt->getI())
639 for (
auto *DI : SinkInstrs)
657 std::next(LastSI.getI()->getIterator()));
662 BasicBlock *TrueBlock =
nullptr, *FalseBlock =
nullptr;
663 BranchInst *TrueBranch =
nullptr, *FalseBranch =
nullptr;
665 auto HasSelectLike = [](SelectGroup &SG,
bool IsTrue) {
666 for (
auto &SL : SG.Selects) {
667 if ((IsTrue ? SL.getTrueValue() : SL.getFalseValue()) ==
nullptr)
672 if (!TrueSlicesInterleaved.
empty() || HasSelectLike(ASI,
true)) {
676 TrueBranch->
setDebugLoc(LastSI.getI()->getDebugLoc());
677 for (
Instruction *TrueInst : TrueSlicesInterleaved)
678 TrueInst->moveBefore(TrueBranch);
680 if (!FalseSlicesInterleaved.
empty() || HasSelectLike(ASI,
false)) {
685 FalseBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
686 for (
Instruction *FalseInst : FalseSlicesInterleaved)
687 FalseInst->moveBefore(FalseBranch);
691 if (TrueBlock == FalseBlock) {
692 assert(TrueBlock ==
nullptr &&
693 "Unexpected basic block transform while optimizing select");
698 FalseBranch->setDebugLoc(
SI.getI()->getDebugLoc());
707 if (TrueBlock ==
nullptr) {
710 TrueBlock = StartBlock;
711 }
else if (FalseBlock ==
nullptr) {
714 FalseBlock = StartBlock;
721 IB.CreateFreeze(ASI.Condition, ASI.Condition->getName() +
".frozen");
729 for (SelectLike &SI : ASI.Selects) {
737 for (
auto &SG : ProfSIGroups) {
738 if (SG.Condition ==
SI.getI())
742 SI.getI()->replaceAllUsesWith(PN);
749 ++NumSelectsConverted;
751 IB.CreateCondBr(CondFr, TT, FT,
SI.getI());
754 for (SelectLike &SI : ASI.Selects)
755 SI.getI()->eraseFromParent();
759void SelectOptimizeImpl::collectSelectGroups(
BasicBlock &BB,
760 SelectGroups &SIGroups) {
770 struct SelectLikeInfo {
774 unsigned ConditionIdx;
785 auto ProcessSelectInfo = [&SelectInfo, &SeenCmp](
Instruction *
I) {
786 if (
auto *Cmp = dyn_cast<CmpInst>(
I)) {
788 return SelectInfo.
end();
793 Cond->getType()->isIntegerTy(1)) {
795 return SelectInfo.
insert({
I, {
Cond,
true, Inverted, 0}}).first;
799 return SelectInfo.
insert({
I, {
Cond,
true,
true, 0}}).first;
805 return SelectInfo.
insert({
I, {
Cond,
false, Inverted, 0}}).first;
810 I->getType()->getIntegerBitWidth() == Shift->
getZExtValue() + 1) {
811 for (
auto *CmpI : SeenCmp) {
812 auto Pred = CmpI->getPredicate();
813 if (Val != CmpI->getOperand(0))
816 match(CmpI->getOperand(1), m_ConstantInt<-1>())) ||
822 match(CmpI->getOperand(1), m_ConstantInt<-1>()))) {
825 return SelectInfo.
insert({
I, {CmpI,
true, Inverted, 0}}).first;
828 return SelectInfo.
end();
836 auto MatchZExtOrSExtPattern =
838 auto MatchShiftPattern =
843 if ((
match(
I, MatchZExtOrSExtPattern) &&
X->getType()->isIntegerTy(1)) ||
844 (
match(
I, MatchShiftPattern) &&
845 X->getType()->getIntegerBitWidth() == Shift->
getZExtValue() + 1)) {
846 if (
I->getOpcode() != Instruction::Add &&
847 I->getOpcode() != Instruction::Sub &&
848 I->getOpcode() != Instruction::Or)
849 return SelectInfo.
end();
851 if (
I->getOpcode() == Instruction::Or &&
I->getType()->isIntegerTy(1))
852 return SelectInfo.
end();
858 unsigned Idx =
I->getOpcode() == Instruction::Sub ? 1 : 0;
860 auto *
Op =
I->getOperand(
Idx);
861 auto It = SelectInfo.
find(
Op);
862 if (It != SelectInfo.
end() && It->second.IsAuxiliary) {
863 Cond = It->second.Cond;
864 bool Inverted = It->second.IsInverted;
865 return SelectInfo.
insert({
I, {
Cond,
false, Inverted,
Idx}}).first;
869 return SelectInfo.
end();
872 bool AlreadyProcessed =
false;
875 while (BBIt != BB.
end()) {
877 if (
I->isDebugOrPseudoInst())
880 if (!AlreadyProcessed)
881 It = ProcessSelectInfo(
I);
883 AlreadyProcessed =
false;
885 if (It == SelectInfo.
end() || It->second.IsAuxiliary)
893 if (!
Cond->getType()->isIntegerTy(1))
896 SelectGroup SIGroup = {
Cond, {}};
897 SIGroup.Selects.emplace_back(
I, It->second.IsInverted,
898 It->second.ConditionIdx);
902 if (!isSelectKindSupported(SIGroup.Selects.front()))
905 while (BBIt != BB.
end()) {
914 It = ProcessSelectInfo(NI);
915 if (It == SelectInfo.
end()) {
916 AlreadyProcessed =
true;
921 auto [CurrCond, IsAux, IsRev, CondIdx] = It->second;
922 if (
Cond != CurrCond) {
923 AlreadyProcessed =
true;
928 SIGroup.Selects.emplace_back(NI, IsRev, CondIdx);
932 dbgs() <<
"New Select group (" << SIGroup.Selects.size() <<
") with\n";
933 for (
auto &SI : SIGroup.Selects)
934 dbgs() <<
" " << *
SI.getI() <<
"\n";
937 SIGroups.push_back(SIGroup);
941void SelectOptimizeImpl::findProfitableSIGroupsBase(
942 SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
943 for (SelectGroup &ASI : SIGroups) {
944 ++NumSelectOptAnalyzed;
945 if (isConvertToBranchProfitableBase(ASI))
946 ProfSIGroups.push_back(ASI);
956void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops(
957 const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
958 NumSelectOptAnalyzed += SIGroups.size();
970 CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
971 {Scaled64::getZero(), Scaled64::getZero()}};
972 if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
973 !checkLoopHeuristics(L, LoopCost)) {
977 for (SelectGroup &ASI : SIGroups) {
980 Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
981 for (SelectLike &SI : ASI.Selects) {
982 SelectCost = std::max(SelectCost, InstCostMap[
SI.getI()].PredCost);
983 BranchCost = std::max(BranchCost, InstCostMap[
SI.getI()].NonPredCost);
985 if (BranchCost < SelectCost) {
987 ASI.Selects.front().getI());
988 OR <<
"Profitable to convert to branch (loop analysis). BranchCost="
989 << BranchCost.toString() <<
", SelectCost=" << SelectCost.toString()
992 ++NumSelectConvertedLoop;
993 ProfSIGroups.push_back(ASI);
996 ASI.Selects.front().getI());
997 ORmiss <<
"Select is more profitable (loop analysis). BranchCost="
998 << BranchCost.toString()
999 <<
", SelectCost=" << SelectCost.toString() <<
". ";
1005bool SelectOptimizeImpl::isConvertToBranchProfitableBase(
1006 const SelectGroup &ASI) {
1007 const SelectLike &
SI = ASI.Selects.front();
1016 ORmiss <<
"Not converted to branch because of cold basic block. ";
1022 if (
SI.getI()->getMetadata(LLVMContext::MD_unpredictable)) {
1024 ORmiss <<
"Not converted to branch because of unpredictable branch. ";
1032 ++NumSelectConvertedHighPred;
1033 OR <<
"Converted to branch because of highly predictable branch. ";
1040 if (hasExpensiveColdOperand(ASI)) {
1041 ++NumSelectConvertedExpColdOperand;
1042 OR <<
"Converted to branch because of expensive cold operand.";
1047 ORmiss <<
"Not profitable to convert to branch (base heuristic).";
1054 return (Numerator + (Denominator / 2)) / Denominator;
1059 if (isa<SelectInst>(SI.getI()))
1064bool SelectOptimizeImpl::hasExpensiveColdOperand(
const SelectGroup &ASI) {
1065 bool ColdOperand =
false;
1066 uint64_t TrueWeight, FalseWeight, TotalWeight;
1068 uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
1069 TotalWeight = TrueWeight + FalseWeight;
1074 ASI.Selects.front().getI());
1075 ORmiss <<
"Profile data available but missing branch-weights metadata for "
1076 "select instruction. ";
1083 for (SelectLike SI : ASI.Selects) {
1086 if (TrueWeight < FalseWeight) {
1087 ColdI = dyn_cast_or_null<Instruction>(
SI.getTrueValue());
1088 HotWeight = FalseWeight;
1090 ColdI = dyn_cast_or_null<Instruction>(
SI.getFalseValue());
1091 HotWeight = TrueWeight;
1094 std::stack<Instruction *> ColdSlice;
1095 getExclBackwardsSlice(ColdI, ColdSlice,
SI.getI());
1097 while (!ColdSlice.empty()) {
1121 if (LoadI->
getParent() != SI->getParent())
1124 while (&*It != SI) {
1125 if (It->mayWriteToMemory())
1138void SelectOptimizeImpl::getExclBackwardsSlice(
Instruction *
I,
1139 std::stack<Instruction *> &Slice,
1143 std::queue<Instruction *> Worklist;
1145 while (!Worklist.empty()) {
1153 if (!
II->hasOneUse())
1159 if (ForSinking && (
II->isTerminator() ||
II->mayHaveSideEffects() ||
1160 isa<SelectInst>(
II) || isa<PHINode>(
II)))
1172 if (
BFI->getBlockFreq(
II->getParent()) <
BFI->getBlockFreq(
I->getParent()))
1180 if (
auto *OpI = dyn_cast<Instruction>(
Op))
1185bool SelectOptimizeImpl::isSelectHighlyPredictable(
const SelectLike SI) {
1189 uint64_t Sum = TrueWeight + FalseWeight;
1199bool SelectOptimizeImpl::checkLoopHeuristics(
const Loop *L,
1200 const CostInfo LoopCost[2]) {
1208 L->getHeader()->getFirstNonPHI());
1210 if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
1211 LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
1212 ORmissL <<
"No select conversion in the loop due to no reduction of loop's "
1218 Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
1219 LoopCost[1].PredCost - LoopCost[1].NonPredCost};
1226 Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost;
1227 ORmissL <<
"No select conversion in the loop due to small reduction of "
1228 "loop's critical path. Gain="
1229 << Gain[1].toString()
1230 <<
", RelativeGain=" << RelativeGain.toString() <<
"%. ";
1240 if (Gain[1] > Gain[0]) {
1241 Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) /
1242 (LoopCost[1].PredCost - LoopCost[0].PredCost);
1244 ORmissL <<
"No select conversion in the loop due to small gradient gain. "
1246 << GradientGain.toString() <<
"%. ";
1252 else if (Gain[1] < Gain[0]) {
1254 <<
"No select conversion in the loop due to negative gradient gain. ";
1268bool SelectOptimizeImpl::computeLoopCosts(
1269 const Loop *L,
const SelectGroups &SIGroups,
1271 LLVM_DEBUG(
dbgs() <<
"Calculating Latency / IPredCost / INonPredCost of loop "
1272 <<
L->getHeader()->getName() <<
"\n");
1273 const auto SImap = getSImap(SIGroups);
1274 const auto SGmap = getSGmap(SIGroups);
1277 const unsigned Iterations = 2;
1278 for (
unsigned Iter = 0; Iter < Iterations; ++Iter) {
1280 CostInfo &MaxCost = LoopCost[Iter];
1283 if (
I.isDebugOrPseudoInst())
1286 Scaled64 IPredCost = Scaled64::getZero(),
1287 INonPredCost = Scaled64::getZero();
1292 for (
const Use &U :
I.operands()) {
1293 auto UI = dyn_cast<Instruction>(
U.get());
1296 if (InstCostMap.
count(UI)) {
1297 IPredCost = std::max(IPredCost, InstCostMap[UI].PredCost);
1298 INonPredCost = std::max(INonPredCost, InstCostMap[UI].NonPredCost);
1301 auto ILatency = computeInstCost(&
I);
1304 ORmissL <<
"Invalid instruction cost preventing analysis and "
1305 "optimization of the inner-most loop containing this "
1310 IPredCost += Scaled64::get(*ILatency);
1311 INonPredCost += Scaled64::get(*ILatency);
1319 if (SImap.contains(&
I)) {
1320 auto SI = SImap.at(&
I);
1321 const auto *SG = SGmap.at(&
I);
1322 Scaled64 TrueOpCost =
SI.getOpCostOnBranch(
true, InstCostMap,
TTI);
1323 Scaled64 FalseOpCost =
SI.getOpCostOnBranch(
false, InstCostMap,
TTI);
1324 Scaled64 PredictedPathCost =
1325 getPredictedPathCost(TrueOpCost, FalseOpCost, SI);
1327 Scaled64 CondCost = Scaled64::getZero();
1328 if (
auto *CI = dyn_cast<Instruction>(SG->Condition))
1329 if (InstCostMap.
count(CI))
1330 CondCost = InstCostMap[CI].NonPredCost;
1331 Scaled64 MispredictCost = getMispredictionCost(SI, CondCost);
1333 INonPredCost = PredictedPathCost + MispredictCost;
1336 << INonPredCost <<
" for " <<
I <<
"\n");
1338 InstCostMap[&
I] = {IPredCost, INonPredCost};
1339 MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
1340 MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
1344 <<
" MaxCost = " << MaxCost.PredCost <<
" "
1345 << MaxCost.NonPredCost <<
"\n");
1351SelectOptimizeImpl::getSImap(
const SelectGroups &SIGroups) {
1353 for (
const SelectGroup &ASI : SIGroups)
1354 for (
const SelectLike &SI : ASI.Selects)
1360SelectOptimizeImpl::getSGmap(
const SelectGroups &SIGroups) {
1362 for (
const SelectGroup &ASI : SIGroups)
1363 for (
const SelectLike &SI : ASI.Selects)
1368std::optional<uint64_t>
1369SelectOptimizeImpl::computeInstCost(
const Instruction *
I) {
1373 return std::optional<uint64_t>(*OC);
1374 return std::nullopt;
1378SelectOptimizeImpl::getMispredictionCost(
const SelectLike SI,
1379 const Scaled64 CondCost) {
1387 if (isSelectHighlyPredictable(SI))
1393 Scaled64 MispredictCost =
1394 std::max(Scaled64::get(MispredictPenalty), CondCost) *
1395 Scaled64::get(MispredictRate);
1396 MispredictCost /= Scaled64::get(100);
1398 return MispredictCost;
1404SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
1405 const SelectLike SI) {
1406 Scaled64 PredPathCost;
1409 uint64_t SumWeight = TrueWeight + FalseWeight;
1410 if (SumWeight != 0) {
1411 PredPathCost = TrueCost * Scaled64::get(TrueWeight) +
1412 FalseCost * Scaled64::get(FalseWeight);
1413 PredPathCost /= Scaled64::get(SumWeight);
1414 return PredPathCost;
1419 PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,
1420 FalseCost * Scaled64::get(3) + TrueCost);
1421 PredPathCost /= Scaled64::get(4);
1422 return PredPathCost;
1425bool SelectOptimizeImpl::isSelectKindSupported(
const SelectLike SI) {
1427 if (
SI.getType()->isVectorTy())
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool runOnFunction(Function &F, bool PostInlining)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI)
static cl::opt< unsigned > ColdOperandMaxCostMultiplier("cold-operand-max-cost-multiplier", cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " "slice of a cold operand to be considered inexpensive."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > ColdOperandThreshold("cold-operand-threshold", cl::desc("Maximum frequency of path for an operand to be considered cold."), cl::init(20), cl::Hidden)
static cl::opt< bool > DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, cl::init(false), cl::desc("Disable loop-level heuristics."))
static cl::opt< unsigned > GainCycleThreshold("select-opti-loop-cycle-gain-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
static cl::opt< unsigned > MispredictDefaultRate("mispredict-default-rate", cl::Hidden, cl::init(25), cl::desc("Default mispredict rate (initialized to 25%)."))
static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE, DiagnosticInfoOptimizationBase &Rem)
static cl::opt< unsigned > GainGradientThreshold("select-opti-loop-gradient-gain-threshold", cl::desc("Gradient gain threshold (%)."), cl::init(25), cl::Hidden)
static cl::opt< unsigned > GainRelativeThreshold("select-opti-loop-relative-gain-threshold", cl::desc("Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), cl::init(8), cl::Hidden)
This file contains the declaration of the SelectOptimizePass class, its corresponding pass name is se...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static const uint32_t IV[8]
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes BlockFrequencyInfo.
Legacy analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_SGT
signed greater than
@ ICMP_SGE
signed greater or equal
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This class represents an Operation in the Expression.
Base class for non-instruction debug metadata records that have positions within IR.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Common features for diagnostics dealing with optimization remarks that are used by both IR and MIR pa...
std::string getMsg() const
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Analysis pass that exposes the LoopInfo for a function.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Represents a single loop in the control flow graph.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
Simple representation of a scaled number.
static ScaledNumber get(uint64_t N)
static ScaledNumber getZero()
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
bool insert(const value_type &X)
Insert a new element into the SetVector.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
virtual bool isSelectSupported(SelectSupportKind) const
SelectSupportKind
Enum that describes what type of support for selects the target has.
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
Provide an instruction scheduling machine model to CodeGen passes.
const MCSchedModel * getMCSchedModel() const
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void takeName(Value *V)
Transfer the name from V to this value.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
OneUse_match< T > m_OneUse(const T &SubPattern)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
FunctionPass * createSelectOptimizePass()
This pass converts conditional moves to conditional jumps when profitable.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T divideNearest(U Numerator, V Denominator)
Returns (Numerator / Denominator) rounded by round-half-up.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
void initializeSelectOptimizePass(PassRegistry &)
unsigned MispredictPenalty