45 #define DEBUG_TYPE "select-optimize"
48 "Number of select groups considered for conversion to branch");
49 STATISTIC(NumSelectConvertedExpColdOperand,
50 "Number of select groups converted due to expensive cold operand");
52 "Number of select groups converted due to high-predictability");
54 "Number of select groups not converted due to unpredictability");
56 "Number of select groups not converted due to cold basic block");
58 "Number of select groups converted due to loop-level analysis");
59 STATISTIC(NumSelectsConverted,
"Number of selects converted");
62 "cold-operand-threshold",
63 cl::desc(
"Maximum frequency of path for an operand to be considered cold."),
67 "cold-operand-max-cost-multiplier",
68 cl::desc(
"Maximum cost multiplier of TCC_expensive for the dependence "
69 "slice of a cold operand to be considered inexpensive."),
74 cl::desc(
"Gradient gain threshold (%)."),
79 cl::desc(
"Minimum gain per loop (in cycles) threshold."),
83 "select-opti-loop-relative-gain-threshold",
85 "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
90 cl::desc(
"Default mispredict rate (initialized to 25%)."));
95 cl::desc(
"Disable loop-level heuristics."));
106 std::unique_ptr<BlockFrequencyInfo>
BFI;
107 std::unique_ptr<BranchProbabilityInfo> BPI;
153 void optimizeSelectsBase(
Function &
F, SelectGroups &ProfSIGroups);
154 void optimizeSelectsInnerLoops(
Function &
F, SelectGroups &ProfSIGroups);
158 void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
161 void collectSelectGroups(
BasicBlock &
BB, SelectGroups &SIGroups);
165 void findProfitableSIGroupsBase(SelectGroups &SIGroups,
166 SelectGroups &ProfSIGroups);
167 void findProfitableSIGroupsInnerLoops(
const Loop *L, SelectGroups &SIGroups,
168 SelectGroups &ProfSIGroups);
182 void getExclBackwardsSlice(
Instruction *
I, std::stack<Instruction *> &Slice,
183 bool ForSinking =
false);
190 bool checkLoopHeuristics(
const Loop *L,
const CostInfo LoopDepth[2]);
194 bool computeLoopCosts(
const Loop *L,
const SelectGroups &SIGroups,
233 TSI =
TM->getSubtargetImpl(
F);
234 TLI = TSI->getTargetLowering();
244 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
245 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
246 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
249 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
250 ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
251 TSchedModel.init(TSI);
257 return optimizeSelects(
F);
260 bool SelectOptimize::optimizeSelects(
Function &
F) {
262 SelectGroups ProfSIGroups;
264 optimizeSelectsBase(
F, ProfSIGroups);
266 optimizeSelectsInnerLoops(
F, ProfSIGroups);
270 convertProfitableSIGroups(ProfSIGroups);
273 return !ProfSIGroups.empty();
276 void SelectOptimize::optimizeSelectsBase(
Function &
F,
277 SelectGroups &ProfSIGroups) {
279 SelectGroups SIGroups;
282 Loop *L = LI->getLoopFor(&
BB);
285 collectSelectGroups(
BB, SIGroups);
289 findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
292 void SelectOptimize::optimizeSelectsInnerLoops(
Function &
F,
293 SelectGroups &ProfSIGroups) {
296 for (
unsigned long i = 0;
i <
Loops.size(); ++
i)
298 Loops.push_back(ChildL);
304 SelectGroups SIGroups;
306 collectSelectGroups(*
BB, SIGroups);
308 findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
321 DefSI = dyn_cast<SelectInst>(V)) {
322 assert(DefSI->getCondition() ==
SI->getCondition() &&
323 "The condition of DefSI does not match with SI");
324 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
326 assert(V &&
"Failed to get select true/false value");
330 void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
331 for (SelectGroup &ASI : ProfSIGroups) {
371 typedef std::stack<Instruction *>::size_type StackSizeType;
372 StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;
376 if (
auto *TI = dyn_cast<Instruction>(
SI->getTrueValue())) {
377 std::stack<Instruction *> TrueSlice;
378 getExclBackwardsSlice(TI, TrueSlice,
true);
379 maxTrueSliceLen =
std::max(maxTrueSliceLen, TrueSlice.size());
380 TrueSlices.push_back(TrueSlice);
382 if (
auto *FI = dyn_cast<Instruction>(
SI->getFalseValue())) {
383 std::stack<Instruction *> FalseSlice;
384 getExclBackwardsSlice(FI, FalseSlice,
true);
385 maxFalseSliceLen =
std::max(maxFalseSliceLen, FalseSlice.size());
386 FalseSlices.push_back(FalseSlice);
400 for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {
401 for (
auto &
S : TrueSlices) {
403 TrueSlicesInterleaved.push_back(
S.top());
408 for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {
409 for (
auto &
S : FalseSlices) {
411 FalseSlicesInterleaved.push_back(
S.top());
423 BFI->setBlockFreq(EndBlock,
BFI->getBlockFreq(StartBlock).getFrequency());
430 auto DIt =
SI->getIterator();
431 while (&*DIt != LastSI) {
432 if (DIt->isDebugOrPseudoInst())
433 DebugPseudoINS.push_back(&*DIt);
436 for (
auto DI : DebugPseudoINS) {
442 BasicBlock *TrueBlock =
nullptr, *FalseBlock =
nullptr;
443 BranchInst *TrueBranch =
nullptr, *FalseBranch =
nullptr;
444 if (!TrueSlicesInterleaved.empty()) {
449 for (
Instruction *TrueInst : TrueSlicesInterleaved)
450 TrueInst->moveBefore(TrueBranch);
452 if (!FalseSlicesInterleaved.empty()) {
457 for (
Instruction *FalseInst : FalseSlicesInterleaved)
458 FalseInst->moveBefore(FalseBranch);
462 if (TrueBlock == FalseBlock) {
463 assert(TrueBlock ==
nullptr &&
464 "Unexpected basic block transform while optimizing select");
469 FalseBranch->setDebugLoc(
SI->getDebugLoc());
478 if (TrueBlock ==
nullptr) {
481 TrueBlock = StartBlock;
482 }
else if (FalseBlock ==
nullptr) {
485 FalseBlock = StartBlock;
492 IB.CreateFreeze(
SI->getCondition(),
SI->getName() +
".frozen");
493 IB.CreateCondBr(CondFr, TT, FT,
SI);
496 INS.
insert(ASI.begin(), ASI.end());
500 for (
auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
509 SI->replaceAllUsesWith(PN);
510 SI->eraseFromParent();
512 ++NumSelectsConverted;
517 void SelectOptimize::collectSelectGroups(
BasicBlock &
BB,
518 SelectGroups &SIGroups) {
520 while (BBIt !=
BB.end()) {
524 SIGroup.push_back(
SI);
525 while (BBIt !=
BB.end()) {
529 SIGroup.push_back(NSI);
540 if (!isSelectKindSupported(
SI))
543 SIGroups.push_back(SIGroup);
548 void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups,
549 SelectGroups &ProfSIGroups) {
550 for (SelectGroup &ASI : SIGroups) {
551 ++NumSelectOptAnalyzed;
552 if (isConvertToBranchProfitableBase(ASI))
553 ProfSIGroups.push_back(ASI);
557 void SelectOptimize::findProfitableSIGroupsInnerLoops(
558 const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
559 NumSelectOptAnalyzed += SIGroups.size();
571 CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
572 {Scaled64::getZero(), Scaled64::getZero()}};
573 if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
574 !checkLoopHeuristics(L, LoopCost)) {
578 for (SelectGroup &ASI : SIGroups) {
581 Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
583 SelectCost =
std::max(SelectCost, InstCostMap[
SI].PredCost);
584 BranchCost =
std::max(BranchCost, InstCostMap[
SI].NonPredCost);
586 if (BranchCost < SelectCost) {
588 OR <<
"Profitable to convert to branch (loop analysis). BranchCost="
589 << BranchCost.toString() <<
", SelectCost=" << SelectCost.
toString()
592 ++NumSelectConvertedLoop;
593 ProfSIGroups.push_back(ASI);
596 ORmiss <<
"Select is more profitable (loop analysis). BranchCost="
597 << BranchCost.toString()
598 <<
", SelectCost=" << SelectCost.
toString() <<
". ";
604 bool SelectOptimize::isConvertToBranchProfitableBase(
611 if (PSI->isColdBlock(
SI->getParent(),
BFI.get())) {
613 ORmiss <<
"Not converted to branch because of cold basic block. ";
619 if (
SI->getMetadata(LLVMContext::MD_unpredictable)) {
621 ORmiss <<
"Not converted to branch because of unpredictable branch. ";
628 if (isSelectHighlyPredictable(
SI) && TLI->isPredictableSelectExpensive()) {
629 ++NumSelectConvertedHighPred;
630 OR <<
"Converted to branch because of highly predictable branch. ";
637 if (hasExpensiveColdOperand(ASI)) {
638 ++NumSelectConvertedExpColdOperand;
639 OR <<
"Converted to branch because of expensive cold operand.";
644 ORmiss <<
"Not profitable to convert to branch (base heuristic).";
651 return (Numerator + (Denominator / 2)) / Denominator;
654 bool SelectOptimize::hasExpensiveColdOperand(
656 bool ColdOperand =
false;
657 uint64_t TrueWeight, FalseWeight, TotalWeight;
658 if (ASI.front()->extractProfMetadata(TrueWeight, FalseWeight)) {
660 TotalWeight = TrueWeight + FalseWeight;
663 }
else if (PSI->hasProfileSummary()) {
665 ORmiss <<
"Profile data available but missing branch-weights metadata for "
666 "select instruction. ";
676 if (TrueWeight < FalseWeight) {
677 ColdI = dyn_cast<Instruction>(
SI->getTrueValue());
678 HotWeight = FalseWeight;
680 ColdI = dyn_cast<Instruction>(
SI->getFalseValue());
681 HotWeight = TrueWeight;
684 std::stack<Instruction *> ColdSlice;
685 getExclBackwardsSlice(ColdI, ColdSlice);
687 while (!ColdSlice.empty()) {
712 void SelectOptimize::getExclBackwardsSlice(
Instruction *
I,
713 std::stack<Instruction *> &Slice,
716 std::queue<Instruction *> Worklist;
718 while (!Worklist.empty()) {
723 if (!Visited.
insert(II).second)
733 isa<SelectInst>(II) || isa<PHINode>(II)))
746 if (
auto *OpI = dyn_cast<Instruction>(II->
getOperand(k)))
751 bool SelectOptimize::isSelectHighlyPredictable(
const SelectInst *
SI) {
753 if (
SI->extractProfMetadata(TrueWeight, FalseWeight)) {
755 uint64_t Sum = TrueWeight + FalseWeight;
765 bool SelectOptimize::checkLoopHeuristics(
const Loop *L,
766 const CostInfo LoopCost[2]) {
776 if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
777 LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
778 ORmissL <<
"No select conversion in the loop due to no reduction of loop's "
784 Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
785 LoopCost[1].PredCost - LoopCost[1].NonPredCost};
793 ORmissL <<
"No select conversion in the loop due to small reduction of "
794 "loop's critical path. Gain="
796 <<
", RelativeGain=" << RelativeGain.
toString() <<
"%. ";
806 if (Gain[1] > Gain[0]) {
808 (LoopCost[1].PredCost - LoopCost[0].PredCost);
810 ORmissL <<
"No select conversion in the loop due to small gradient gain. "
812 << GradientGain.
toString() <<
"%. ";
818 else if (Gain[1] < Gain[0]) {
820 <<
"No select conversion in the loop due to negative gradient gain. ";
834 bool SelectOptimize::computeLoopCosts(
835 const Loop *L,
const SelectGroups &SIGroups,
837 const auto &SIset = getSIset(SIGroups);
840 const unsigned Iterations = 2;
841 for (
unsigned Iter = 0; Iter < Iterations; ++Iter) {
843 CostInfo &MaxCost = LoopCost[Iter];
846 if (
I.isDebugOrPseudoInst())
849 Scaled64 IPredCost = Scaled64::getZero(),
850 INonPredCost = Scaled64::getZero();
855 for (
const Use &U :
I.operands()) {
856 auto UI = dyn_cast<Instruction>(U.get());
859 if (InstCostMap.
count(UI)) {
860 IPredCost =
std::max(IPredCost, InstCostMap[UI].PredCost);
861 INonPredCost =
std::max(INonPredCost, InstCostMap[UI].NonPredCost);
864 auto ILatency = computeInstCost(&
I);
867 ORmissL <<
"Invalid instruction cost preventing analysis and "
868 "optimization of the inner-most loop containing this "
882 if (SIset.contains(&
I)) {
883 auto SI = dyn_cast<SelectInst>(&
I);
885 Scaled64 TrueOpCost = Scaled64::getZero(),
886 FalseOpCost = Scaled64::getZero();
887 if (
auto *TI = dyn_cast<Instruction>(
SI->getTrueValue()))
888 if (InstCostMap.
count(TI))
889 TrueOpCost = InstCostMap[TI].NonPredCost;
890 if (
auto *FI = dyn_cast<Instruction>(
SI->getFalseValue()))
891 if (InstCostMap.
count(FI))
892 FalseOpCost = InstCostMap[FI].NonPredCost;
894 getPredictedPathCost(TrueOpCost, FalseOpCost,
SI);
896 Scaled64 CondCost = Scaled64::getZero();
897 if (
auto *CI = dyn_cast<Instruction>(
SI->getCondition()))
898 if (InstCostMap.
count(CI))
899 CondCost = InstCostMap[CI].NonPredCost;
900 Scaled64 MispredictCost = getMispredictionCost(
SI, CondCost);
902 INonPredCost = PredictedPathCost + MispredictCost;
905 InstCostMap[&
I] = {IPredCost, INonPredCost};
906 MaxCost.PredCost =
std::max(MaxCost.PredCost, IPredCost);
907 MaxCost.NonPredCost =
std::max(MaxCost.NonPredCost, INonPredCost);
915 SelectOptimize::getSIset(
const SelectGroups &SIGroups) {
917 for (
const SelectGroup &ASI : SIGroups)
932 SelectOptimize::getMispredictionCost(
const SelectInst *
SI,
934 uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
941 if (isSelectHighlyPredictable(
SI))
952 return MispredictCost;
962 if (
SI->extractProfMetadata(TrueWeight, FalseWeight)) {
963 uint64_t SumWeight = TrueWeight + FalseWeight;
964 if (SumWeight != 0) {
979 bool SelectOptimize::isSelectKindSupported(
SelectInst *
SI) {
980 bool VectorCond = !
SI->getCondition()->getType()->isIntegerTy(1);
984 if (
SI->getType()->isVectorTy())
988 return TLI->isSelectSupported(SelectKind);