47#define DEBUG_TYPE "select-optimize" 
   50          "Number of select groups considered for conversion to branch");
 
   52          "Number of select groups converted due to expensive cold operand");
 
   54          "Number of select groups converted due to high-predictability");
 
   56          "Number of select groups not converted due to unpredictability");
 
   58          "Number of select groups not converted due to cold basic block");
 
   60          "Number of select groups converted due to loop-level analysis");
 
   61STATISTIC(NumSelectsConverted, 
"Number of selects converted");
 
   64    "cold-operand-threshold",
 
   65    cl::desc(
"Maximum frequency of path for an operand to be considered cold."),
 
   69    "cold-operand-max-cost-multiplier",
 
   70    cl::desc(
"Maximum cost multiplier of TCC_expensive for the dependence " 
   71             "slice of a cold operand to be considered inexpensive."),
 
   76                          cl::desc(
"Gradient gain threshold (%)."),
 
   81                       cl::desc(
"Minimum gain per loop (in cycles) threshold."),
 
   85    "select-opti-loop-relative-gain-threshold",
 
   87        "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
 
   92    cl::desc(
"Default mispredict rate (initialized to 25%)."));
 
   97                               cl::desc(
"Disable loop-level heuristics."));
 
  101class SelectOptimizeImpl {
 
  113  SelectOptimizeImpl() = 
default;
 
  118  using Scaled64 = ScaledNumber<uint64_t>;
 
  124    Scaled64 NonPredCost;
 
  135    bool Inverted = 
false;
 
  142    SelectLike(Instruction *I, 
bool Inverted = 
false, 
unsigned CondIdx = 0)
 
  143        : I(I), Inverted(Inverted), CondIdx(CondIdx) {}
 
  150    unsigned getConditionOpIndex() { 
return CondIdx; };
 
  156    Value *getTrueValue(
bool HonorInverts = 
true)
 const {
 
  157      if (Inverted && HonorInverts)
 
  158        return getFalseValue(
false);
 
  160        return Sel->getTrueValue();
 
  172    Value *getFalseValue(
bool HonorInverts = 
true)
 const {
 
  173      if (Inverted && HonorInverts)
 
  174        return getTrueValue(
false);
 
  176        return Sel->getFalseValue();
 
  181        return BO->getOperand(1 - CondIdx);
 
  189    Scaled64 getOpCostOnBranch(
 
  190        bool IsTrue, 
const DenseMap<const Instruction *, CostInfo> &InstCostMap,
 
  191        const TargetTransformInfo *
TTI) {
 
  192      auto *
V = IsTrue ? getTrueValue() : getFalseValue();
 
  195          auto It = InstCostMap.
find(
IV);
 
  196          return It != InstCostMap.
end() ? It->second.NonPredCost
 
  207          {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
 
  208          {TTI::OK_UniformConstantValue, TTI::OP_PowerOf2});
 
  211        auto It = InstCostMap.find(OpI);
 
  212        if (It != InstCostMap.end())
 
  213          TotalCost += It->second.NonPredCost;
 
  227  using SelectGroups = SmallVector<SelectGroup, 2>;
 
  231  bool optimizeSelects(Function &
F);
 
  237  void optimizeSelectsBase(Function &
F, SelectGroups &ProfSIGroups);
 
  238  void optimizeSelectsInnerLoops(Function &
F, SelectGroups &ProfSIGroups);
 
  242  void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
 
  245  void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
 
  249  void findProfitableSIGroupsBase(SelectGroups &SIGroups,
 
  250                                  SelectGroups &ProfSIGroups);
 
  251  void findProfitableSIGroupsInnerLoops(
const Loop *L, SelectGroups &SIGroups,
 
  252                                        SelectGroups &ProfSIGroups);
 
  256  bool isConvertToBranchProfitableBase(
const SelectGroup &ASI);
 
  261  bool hasExpensiveColdOperand(
const SelectGroup &ASI);
 
  266  void getExclBackwardsSlice(Instruction *
I, std::stack<Instruction *> &Slice,
 
  267                             Instruction *SI, 
bool ForSinking = 
false);
 
  270  bool isSelectHighlyPredictable(
const SelectLike SI);
 
  274  bool checkLoopHeuristics(
const Loop *L, 
const CostInfo LoopDepth[2]);
 
  278  bool computeLoopCosts(
const Loop *L, 
const SelectGroups &SIGroups,
 
  279                        DenseMap<const Instruction *, CostInfo> &InstCostMap,
 
  283  SmallDenseMap<const Instruction *, SelectLike, 2>
 
  284  getSImap(
const SelectGroups &SIGroups);
 
  288  SmallDenseMap<const Instruction *, const SelectGroup *, 2>
 
  289  getSGmap(
const SelectGroups &SIGroups);
 
  292  std::optional<uint64_t> computeInstCost(
const Instruction *
I);
 
  295  Scaled64 getMispredictionCost(
const SelectLike SI, 
const Scaled64 CondCost);
 
  298  Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
 
  299                                const SelectLike SI);
 
  302  bool isSelectKindSupported(
const SelectLike SI);
 
  306  SelectOptimizeImpl Impl;
 
  311  SelectOptimize() : FunctionPass(ID) {
 
  316    return Impl.runOnFunction(
F, *
this);
 
  319  void getAnalysisUsage(AnalysisUsage &AU)
 const override {
 
  325    AU.
addRequired<OptimizationRemarkEmitterWrapperPass>();
 
  333  SelectOptimizeImpl Impl(TM);
 
  334  return Impl.run(
F, 
FAM);
 
 
  337char SelectOptimize::ID = 0;
 
  354  TSI = TM->getSubtargetImpl(
F);
 
  366  if (!
TTI->enableSelectOptimize())
 
  370            .getCachedResult<ProfileSummaryAnalysis>(*
F.getParent());
 
  371  assert(PSI && 
"This pass requires module analysis pass `profile-summary`!");
 
  380  TSchedModel.
init(TSI);
 
  388  TSI = 
TM->getSubtargetImpl(
F);
 
  401  if (!
TTI->enableSelectOptimize())
 
  408  TSchedModel.
init(TSI);
 
  414  return optimizeSelects(
F);
 
  417bool SelectOptimizeImpl::optimizeSelects(
Function &
F) {
 
  419  SelectGroups ProfSIGroups;
 
  421  optimizeSelectsBase(
F, ProfSIGroups);
 
  423  optimizeSelectsInnerLoops(
F, ProfSIGroups);
 
  427  convertProfitableSIGroups(ProfSIGroups);
 
  430  return !ProfSIGroups.empty();
 
  433void SelectOptimizeImpl::optimizeSelectsBase(
Function &
F,
 
  434                                             SelectGroups &ProfSIGroups) {
 
  436  SelectGroups SIGroups;
 
  440    if (L && 
L->isInnermost())
 
  442    collectSelectGroups(BB, SIGroups);
 
  446  findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
 
  449void SelectOptimizeImpl::optimizeSelectsInnerLoops(
Function &
F,
 
  450                                                   SelectGroups &ProfSIGroups) {
 
  453  for (
unsigned long i = 0; i < 
Loops.size(); ++i)
 
  457    if (!
L->isInnermost())
 
  460    SelectGroups SIGroups;
 
  462      collectSelectGroups(*BB, SIGroups);
 
  464    findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
 
  480    SelectOptimizeImpl::SelectLike &
SI, 
bool isTrue,
 
  483  Value *V = isTrue ? 
SI.getTrueValue() : 
SI.getFalseValue();
 
  486      if (
auto It = OptSelects.find(
IV); It != OptSelects.end())
 
  487        return isTrue ? It->second.first : It->second.second;
 
  492  assert((BO->getOpcode() == Instruction::Add ||
 
  493          BO->getOpcode() == Instruction::Or ||
 
  494          BO->getOpcode() == Instruction::Sub) &&
 
  495         "Only currently handling Add, Or and Sub binary operators.");
 
  497  auto *CBO = BO->clone();
 
  498  auto CondIdx = 
SI.getConditionOpIndex();
 
  501    CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), 1));
 
  504           "Unexpected opcode");
 
  505    CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), -1));
 
  508  unsigned OtherIdx = 1 - CondIdx;
 
  510    if (
auto It = OptSelects.find(
IV); It != OptSelects.end())
 
  511      CBO->setOperand(OtherIdx, isTrue ? It->second.first : It->second.second);
 
  513  CBO->insertBefore(
B->getTerminator()->getIterator());
 
 
  517void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
 
  518  for (SelectGroup &ASI : ProfSIGroups) {
 
  558    typedef std::stack<Instruction *>::size_type StackSizeType;
 
  559    StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;
 
  560    for (SelectLike &
SI : ASI.Selects) {
 
  566        std::stack<Instruction *> TrueSlice;
 
  567        getExclBackwardsSlice(TI, TrueSlice, 
SI.getI(), 
true);
 
  568        maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
 
  573          std::stack<Instruction *> FalseSlice;
 
  574          getExclBackwardsSlice(FI, FalseSlice, 
SI.getI(), 
true);
 
  575          maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
 
  591    for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {
 
  592      for (
auto &S : TrueSlices) {
 
  594          TrueSlicesInterleaved.
push_back(S.top());
 
  599    for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {
 
  600      for (
auto &S : FalseSlices) {
 
  602          FalseSlicesInterleaved.
push_back(S.top());
 
  609    SelectLike &
SI = ASI.Selects.front();
 
  610    SelectLike &LastSI = ASI.Selects.back();
 
  618    SplitPt.setHeadBit(
true);
 
  620    BFI->setBlockFreq(EndBlock, 
BFI->getBlockFreq(StartBlock));
 
  627    auto DIt = 
SI.getI()->getIterator();
 
  628    auto NIt = ASI.Selects.begin();
 
  629    while (&*DIt != LastSI.getI()) {
 
  630      if (NIt != ASI.Selects.end() && &*DIt == NIt->getI())
 
  637    for (
auto *DI : SinkInstrs)
 
  638      DI->moveBeforePreserving(InsertionPoint);
 
  655                        std::next(LastSI.getI()->getIterator()));
 
  660    BasicBlock *TrueBlock = 
nullptr, *FalseBlock = 
nullptr;
 
  661    BranchInst *TrueBranch = 
nullptr, *FalseBranch = 
nullptr;
 
  663    auto HasSelectLike = [](SelectGroup &SG, 
bool IsTrue) {
 
  664      for (
auto &SL : SG.Selects) {
 
  665        if ((IsTrue ? SL.getTrueValue() : SL.getFalseValue()) == 
nullptr)
 
  670    if (!TrueSlicesInterleaved.
empty() || HasSelectLike(ASI, 
true)) {
 
  674      TrueBranch->
setDebugLoc(LastSI.getI()->getDebugLoc());
 
  675      for (
Instruction *TrueInst : TrueSlicesInterleaved)
 
  678    if (!FalseSlicesInterleaved.
empty() || HasSelectLike(ASI, 
false)) {
 
  683      FalseBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
 
  684      for (
Instruction *FalseInst : FalseSlicesInterleaved)
 
  685        FalseInst->moveBefore(FalseBranch->getIterator());
 
  689    if (TrueBlock == FalseBlock) {
 
  690      assert(TrueBlock == 
nullptr &&
 
  691             "Unexpected basic block transform while optimizing select");
 
  696      FalseBranch->setDebugLoc(
SI.getI()->getDebugLoc());
 
  705    if (TrueBlock == 
nullptr) {
 
  708      TrueBlock = StartBlock;
 
  709    } 
else if (FalseBlock == 
nullptr) {
 
  712      FalseBlock = StartBlock;
 
  719        IB.CreateFreeze(ASI.Condition, ASI.Condition->getName() + 
".frozen");
 
  726    InsertionPoint = EndBlock->
begin();
 
  727    for (SelectLike &
SI : ASI.Selects) {
 
  735        for (
auto &SG : ProfSIGroups) {
 
  736          if (SG.Condition == 
SI.getI())
 
  740      SI.getI()->replaceAllUsesWith(PN);
 
  747      ++NumSelectsConverted;
 
  749    IB.CreateCondBr(CondFr, TT, FT, 
SI.getI());
 
  752    for (SelectLike &
SI : ASI.Selects)
 
  753      SI.getI()->eraseFromParent();
 
  757void SelectOptimizeImpl::collectSelectGroups(
BasicBlock &BB,
 
  758                                             SelectGroups &SIGroups) {
 
  768  struct SelectLikeInfo {
 
  772    unsigned ConditionIdx;
 
  783  auto ProcessSelectInfo = [&SelectInfo, &SeenCmp](
Instruction *
I) {
 
  786      return SelectInfo.
end();
 
  791        Cond->getType()->isIntegerTy(1)) {
 
  793      return SelectInfo.
insert({
I, {
Cond, 
true, Inverted, 0}}).first;
 
  797      return SelectInfo.
insert({
I, {
Cond, 
true, 
true, 0}}).first;
 
  803      return SelectInfo.
insert({
I, {
Cond, 
false, Inverted, 0}}).first;
 
  808        I->getType()->getIntegerBitWidth() == Shift->
getZExtValue() + 1) {
 
  809      for (
auto *CmpI : SeenCmp) {
 
  810        auto Pred = CmpI->getPredicate();
 
  811        if (Val != CmpI->getOperand(0))
 
  823          return SelectInfo.
insert({
I, {CmpI, 
true, Inverted, 0}}).first;
 
  826      return SelectInfo.
end();
 
  834    auto MatchZExtOrSExtPattern =
 
  836    auto MatchShiftPattern =
 
  841    if ((
match(
I, MatchZExtOrSExtPattern) && 
X->getType()->isIntegerTy(1)) ||
 
  842        (
match(
I, MatchShiftPattern) &&
 
  843         X->getType()->getIntegerBitWidth() == Shift->
getZExtValue() + 1)) {
 
  844      if (
I->getOpcode() != Instruction::Add &&
 
  845          I->getOpcode() != Instruction::Sub &&
 
  846          I->getOpcode() != Instruction::Or)
 
  847        return SelectInfo.
end();
 
  849      if (
I->getOpcode() == Instruction::Or && 
I->getType()->isIntegerTy(1))
 
  850        return SelectInfo.
end();
 
  856      unsigned Idx = 
I->getOpcode() == Instruction::Sub ? 1 : 0;
 
  857      for (; Idx < 2; Idx++) {
 
  858        auto *
Op = 
I->getOperand(Idx);
 
  859        auto It = SelectInfo.
find(
Op);
 
  860        if (It != SelectInfo.
end() && It->second.IsAuxiliary) {
 
  861          Cond = It->second.Cond;
 
  862          bool Inverted = It->second.IsInverted;
 
  863          return SelectInfo.
insert({
I, {
Cond, 
false, Inverted, Idx}}).first;
 
  867    return SelectInfo.
end();
 
  870  bool AlreadyProcessed = 
false;
 
  873  while (BBIt != BB.
end()) {
 
  875    if (
I->isDebugOrPseudoInst())
 
  878    if (!AlreadyProcessed)
 
  879      It = ProcessSelectInfo(
I);
 
  881      AlreadyProcessed = 
false;
 
  883    if (It == SelectInfo.
end() || It->second.IsAuxiliary)
 
  886    if (!
TTI->shouldTreatInstructionLikeSelect(
I))
 
  891    if (!
Cond->getType()->isIntegerTy(1))
 
  894    SelectGroup SIGroup = {
Cond, {}};
 
  895    SIGroup.Selects.emplace_back(
I, It->second.IsInverted,
 
  896                                 It->second.ConditionIdx);
 
  900    if (!isSelectKindSupported(SIGroup.Selects.front()))
 
  903    while (BBIt != BB.
end()) {
 
  912      It = ProcessSelectInfo(NI);
 
  913      if (It == SelectInfo.
end()) {
 
  914        AlreadyProcessed = 
true;
 
  919      auto [CurrCond, IsAux, IsRev, CondIdx] = It->second;
 
  920      if (
Cond != CurrCond) {
 
  921        AlreadyProcessed = 
true;
 
  926        SIGroup.Selects.emplace_back(NI, IsRev, CondIdx);
 
  930      dbgs() << 
"New Select group (" << SIGroup.Selects.size() << 
") with\n";
 
  931      for (
auto &
SI : SIGroup.Selects)
 
  932        dbgs() << 
"  " << *
SI.getI() << 
"\n";
 
  935    SIGroups.push_back(SIGroup);
 
  939void SelectOptimizeImpl::findProfitableSIGroupsBase(
 
  940    SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
 
  941  for (SelectGroup &ASI : SIGroups) {
 
  942    ++NumSelectOptAnalyzed;
 
  943    if (isConvertToBranchProfitableBase(ASI))
 
  944      ProfSIGroups.push_back(ASI);
 
  954void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops(
 
  955    const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
 
  956  NumSelectOptAnalyzed += SIGroups.size();
 
  968  CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
 
  969                          {Scaled64::getZero(), Scaled64::getZero()}};
 
  970  if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
 
  971      !checkLoopHeuristics(L, LoopCost)) {
 
  975  for (SelectGroup &ASI : SIGroups) {
 
  978    Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
 
  979    for (SelectLike &
SI : ASI.Selects) {
 
  980      const auto &ICM = InstCostMap[
SI.getI()];
 
  981      SelectCost = std::max(SelectCost, ICM.PredCost);
 
  982      BranchCost = std::max(BranchCost, ICM.NonPredCost);
 
  984    if (BranchCost < SelectCost) {
 
  986                            ASI.Selects.front().getI());
 
  987      OR << 
"Profitable to convert to branch (loop analysis). BranchCost=" 
  988         << BranchCost.toString() << 
", SelectCost=" << SelectCost.toString()
 
  991      ++NumSelectConvertedLoop;
 
  992      ProfSIGroups.push_back(ASI);
 
  995                                      ASI.Selects.front().getI());
 
  996      ORmiss << 
"Select is more profitable (loop analysis). BranchCost=" 
  997             << BranchCost.toString()
 
  998             << 
", SelectCost=" << SelectCost.toString() << 
". ";
 
 1004bool SelectOptimizeImpl::isConvertToBranchProfitableBase(
 
 1005    const SelectGroup &ASI) {
 
 1006  const SelectLike &
SI = ASI.Selects.front();
 
 1015    ORmiss << 
"Not converted to branch because of cold basic block. ";
 
 1021  if (
SI.getI()->getMetadata(LLVMContext::MD_unpredictable)) {
 
 1023    ORmiss << 
"Not converted to branch because of unpredictable branch. ";
 
 1031    ++NumSelectConvertedHighPred;
 
 1032    OR << 
"Converted to branch because of highly predictable branch. ";
 
 1039  if (hasExpensiveColdOperand(ASI)) {
 
 1040    ++NumSelectConvertedExpColdOperand;
 
 1041    OR << 
"Converted to branch because of expensive cold operand.";
 
 1049  auto *BB = 
SI.getI()->getParent();
 
 1051  if (L && !
L->isInnermost() && 
L->getLoopLatch() == BB &&
 
 1052      ASI.Selects.size() >= 3) {
 
 1053    OR << 
"Converted to branch because select group in the latch block is big.";
 
 1058  ORmiss << 
"Not profitable to convert to branch (base heuristic).";
 
 1065  return (Numerator + (Denominator / 2)) / Denominator;
 
 
 1075bool SelectOptimizeImpl::hasExpensiveColdOperand(
const SelectGroup &ASI) {
 
 1076  bool ColdOperand = 
false;
 
 1077  uint64_t TrueWeight, FalseWeight, TotalWeight;
 
 1079    uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
 
 1080    TotalWeight = TrueWeight + FalseWeight;
 
 1085                                    ASI.Selects.front().getI());
 
 1086    ORmiss << 
"Profile data available but missing branch-weights metadata for " 
 1087              "select instruction. ";
 
 1094  for (SelectLike 
SI : ASI.Selects) {
 
 1097    if (TrueWeight < FalseWeight) {
 
 1099      HotWeight = FalseWeight;
 
 1102      HotWeight = TrueWeight;
 
 1105      std::stack<Instruction *> ColdSlice;
 
 1106      getExclBackwardsSlice(ColdI, ColdSlice, 
SI.getI());
 
 1108      while (!ColdSlice.empty()) {
 
 1109        SliceCost += 
TTI->getInstructionCost(ColdSlice.top(),
 
 1135  while (&*It != 
SI) {
 
 1136    if (It->mayWriteToMemory())
 
 
 1149void SelectOptimizeImpl::getExclBackwardsSlice(
Instruction *
I,
 
 1150                                               std::stack<Instruction *> &Slice,
 
 1154  std::queue<Instruction *> Worklist;
 
 1156  while (!Worklist.empty()) {
 
 1164    if (!
II->hasOneUse())
 
 1170    if (ForSinking && (
II->isTerminator() || 
II->mayHaveSideEffects() ||
 
 1183    if (
BFI->getBlockFreq(
II->getParent()) < 
BFI->getBlockFreq(
I->getParent()))
 
 1196bool SelectOptimizeImpl::isSelectHighlyPredictable(
const SelectLike 
SI) {
 
 1197  uint64_t TrueWeight, FalseWeight;
 
 1199    uint64_t 
Max = std::max(TrueWeight, FalseWeight);
 
 1200    uint64_t Sum = TrueWeight + FalseWeight;
 
 1203      if (Probability > 
TTI->getPredictableBranchThreshold())
 
 1210bool SelectOptimizeImpl::checkLoopHeuristics(
const Loop *L,
 
 1211                                             const CostInfo LoopCost[2]) {
 
 1219                                   &*
L->getHeader()->getFirstNonPHIIt());
 
 1221  if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
 
 1222      LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
 
 1223    ORmissL << 
"No select conversion in the loop due to no reduction of loop's " 
 1229  Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
 
 1230                      LoopCost[1].PredCost - LoopCost[1].NonPredCost};
 
 1237    Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost;
 
 1238    ORmissL << 
"No select conversion in the loop due to small reduction of " 
 1239               "loop's critical path. Gain=" 
 1240            << Gain[1].toString()
 
 1241            << 
", RelativeGain=" << RelativeGain.toString() << 
"%. ";
 
 1251  if (Gain[1] > Gain[0]) {
 
 1252    Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) /
 
 1253                            (LoopCost[1].PredCost - LoopCost[0].PredCost);
 
 1255      ORmissL << 
"No select conversion in the loop due to small gradient gain. " 
 1257              << GradientGain.toString() << 
"%. ";
 
 1263  else if (Gain[1] < Gain[0]) {
 
 1265        << 
"No select conversion in the loop due to negative gradient gain. ";
 
 1279bool SelectOptimizeImpl::computeLoopCosts(
 
 1280    const Loop *L, 
const SelectGroups &SIGroups,
 
 1282  LLVM_DEBUG(
dbgs() << 
"Calculating Latency / IPredCost / INonPredCost of loop " 
 1283                    << 
L->getHeader()->getName() << 
"\n");
 
 1284  const auto SImap = getSImap(SIGroups);
 
 1285  const auto SGmap = getSGmap(SIGroups);
 
 1288  const unsigned Iterations = 2;
 
 1289  for (
unsigned Iter = 0; Iter < Iterations; ++Iter) {
 
 1291    CostInfo &MaxCost = LoopCost[Iter];
 
 1294        if (
I.isDebugOrPseudoInst())
 
 1297        Scaled64 IPredCost = Scaled64::getZero(),
 
 1298                 INonPredCost = Scaled64::getZero();
 
 1303        for (
const Use &U : 
I.operands()) {
 
 1307          if (
auto It = InstCostMap.
find(UI); It != InstCostMap.
end()) {
 
 1308            IPredCost = std::max(IPredCost, It->second.PredCost);
 
 1309            INonPredCost = std::max(INonPredCost, It->second.NonPredCost);
 
 1312        auto ILatency = computeInstCost(&
I);
 
 1315          ORmissL << 
"Invalid instruction cost preventing analysis and " 
 1316                     "optimization of the inner-most loop containing this " 
 1321        IPredCost += Scaled64::get(*ILatency);
 
 1322        INonPredCost += Scaled64::get(*ILatency);
 
 1330        if (
auto It = SImap.find(&
I); It != SImap.end()) {
 
 1331          auto SI = It->second;
 
 1332          const auto *SG = SGmap.at(&
I);
 
 1333          Scaled64 TrueOpCost = 
SI.getOpCostOnBranch(
true, InstCostMap, 
TTI);
 
 1334          Scaled64 FalseOpCost = 
SI.getOpCostOnBranch(
false, InstCostMap, 
TTI);
 
 1335          Scaled64 PredictedPathCost =
 
 1336              getPredictedPathCost(TrueOpCost, FalseOpCost, 
SI);
 
 1338          Scaled64 CondCost = Scaled64::getZero();
 
 1340            if (
auto It = InstCostMap.
find(CI); It != InstCostMap.
end())
 
 1341              CondCost = It->second.NonPredCost;
 
 1342          Scaled64 MispredictCost = getMispredictionCost(
SI, CondCost);
 
 1344          INonPredCost = PredictedPathCost + MispredictCost;
 
 1347                          << INonPredCost << 
" for " << 
I << 
"\n");
 
 1349        InstCostMap[&
I] = {IPredCost, INonPredCost};
 
 1350        MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
 
 1351        MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
 
 1355                      << 
" MaxCost = " << MaxCost.PredCost << 
" " 
 1356                      << MaxCost.NonPredCost << 
"\n");
 
 1362SelectOptimizeImpl::getSImap(
const SelectGroups &SIGroups) {
 
 1364  for (
const SelectGroup &ASI : SIGroups)
 
 1365    for (
const SelectLike &
SI : ASI.Selects)
 
 1371SelectOptimizeImpl::getSGmap(
const SelectGroups &SIGroups) {
 
 1373  for (
const SelectGroup &ASI : SIGroups)
 
 1374    for (
const SelectLike &
SI : ASI.Selects)
 
 1379std::optional<uint64_t>
 
 1380SelectOptimizeImpl::computeInstCost(
const Instruction *
I) {
 
 1384    return std::optional<uint64_t>(ICost.
getValue());
 
 1385  return std::nullopt;
 
 1389SelectOptimizeImpl::getMispredictionCost(
const SelectLike 
SI,
 
 1390                                         const Scaled64 CondCost) {
 
 1398  if (isSelectHighlyPredictable(
SI))
 
 1404  Scaled64 MispredictCost =
 
 1405      std::max(Scaled64::get(MispredictPenalty), CondCost) *
 
 1406      Scaled64::get(MispredictRate);
 
 1407  MispredictCost /= Scaled64::get(100);
 
 1409  return MispredictCost;
 
 1415SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
 
 1416                                         const SelectLike 
SI) {
 
 1417  Scaled64 PredPathCost;
 
 1418  uint64_t TrueWeight, FalseWeight;
 
 1420    uint64_t SumWeight = TrueWeight + FalseWeight;
 
 1421    if (SumWeight != 0) {
 
 1422      PredPathCost = TrueCost * Scaled64::get(TrueWeight) +
 
 1423                     FalseCost * Scaled64::get(FalseWeight);
 
 1424      PredPathCost /= Scaled64::get(SumWeight);
 
 1425      return PredPathCost;
 
 1430  PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,
 
 1431                          FalseCost * Scaled64::get(3) + TrueCost);
 
 1432  PredPathCost /= Scaled64::get(4);
 
 1433  return PredPathCost;
 
 1436bool SelectOptimizeImpl::isSelectKindSupported(
const SelectLike 
SI) {
 
 1438  if (
SI.getType()->isVectorTy())
 
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
 
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
 
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
 
static bool runOnFunction(Function &F, bool PostInlining)
 
print mir2vec MIR2Vec Vocabulary Printer Pass
 
uint64_t IntrinsicInst * II
 
FunctionAnalysisManager FAM
 
#define INITIALIZE_PASS_DEPENDENCY(depName)
 
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
 
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
 
This file contains the declarations for profiling metadata utility functions.
 
const SmallVectorImpl< MachineOperand > & Cond
 
const GCNTargetMachine & getTM(const GCNSubtarget *STI)
 
static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI)
 
static cl::opt< unsigned > ColdOperandMaxCostMultiplier("cold-operand-max-cost-multiplier", cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " "slice of a cold operand to be considered inexpensive."), cl::init(1), cl::Hidden)
 
static cl::opt< unsigned > ColdOperandThreshold("cold-operand-threshold", cl::desc("Maximum frequency of path for an operand to be considered cold."), cl::init(20), cl::Hidden)
 
static cl::opt< bool > DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, cl::init(false), cl::desc("Disable loop-level heuristics."))
 
static cl::opt< unsigned > GainCycleThreshold("select-opti-loop-cycle-gain-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
 
static cl::opt< unsigned > MispredictDefaultRate("mispredict-default-rate", cl::Hidden, cl::init(25), cl::desc("Default mispredict rate (initialized to 25%)."))
 
static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE, DiagnosticInfoOptimizationBase &Rem)
 
static cl::opt< unsigned > GainGradientThreshold("select-opti-loop-gradient-gain-threshold", cl::desc("Gradient gain threshold (%)."), cl::init(25), cl::Hidden)
 
static cl::opt< unsigned > GainRelativeThreshold("select-opti-loop-relative-gain-threshold", cl::desc("Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), cl::init(8), cl::Hidden)
 
This file contains the declaration of the SelectOptimizePass class, its corresponding pass name is se...
 
This file implements a set that has insertion order iteration characteristics.
 
This file defines the SmallVector class.
 
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
 
#define STATISTIC(VARNAME, DESC)
 
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
 
static SymbolRef::Type getType(const Symbol *Sym)
 
This file describes how to lower LLVM code to machine code.
 
Target-Independent Code Generator Pass Configuration Options pass.
 
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
 
static const uint32_t IV[8]
 
AnalysisUsage & addRequired()
 
LLVM Basic Block Representation.
 
iterator begin()
Instruction iterator methods.
 
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
 
const Function * getParent() const
Return the enclosing method, or null if none.
 
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
 
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
 
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
 
InstListType::iterator iterator
Instruction iterators...
 
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
 
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
 
Analysis pass which computes BlockFrequencyInfo.
 
Legacy analysis pass which computes BlockFrequencyInfo.
 
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
 
Conditional or Unconditional Branch instruction.
 
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
 
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
 
@ ICMP_SLT
signed less than
 
@ ICMP_SLE
signed less or equal
 
@ ICMP_SGT
signed greater than
 
@ ICMP_SGE
signed greater or equal
 
This is the shared class of boolean and integer constants.
 
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
 
Base class for non-instruction debug metadata records that have positions within IR.
 
LLVM_ABI void removeFromParent()
 
iterator find(const_arg_type_t< KeyT > Val)
 
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
 
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
 
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
 
Common features for diagnostics dealing with optimization remarks that are used by both IR and MIR pa...
 
std::string getMsg() const
 
FunctionPass class - This class is used to implement most global optimizations.
 
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
 
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
 
LLVM_ABI bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
 
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
 
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
 
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
 
Analysis pass that exposes the LoopInfo for a function.
 
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
 
The legacy pass manager's analysis pass to compute loop information.
 
Represents a single loop in the control flow graph.
 
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
 
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
 
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
 
Pass interface - Implemented by all 'passes'.
 
A set of analyses that are preserved following a run of a transformation pass.
 
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
 
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
 
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
 
Analysis providing profile information.
 
bool hasProfileSummary() const
Returns true if profile summary is available.
 
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
 
Simple representation of a scaled number.
 
static ScaledNumber get(uint64_t N)
 
static ScaledNumber getZero()
 
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
 
bool insert(const value_type &X)
Insert a new element into the SetVector.
 
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
 
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
 
A SetVector that performs no allocations if smaller than a certain size.
 
void push_back(const T &Elt)
 
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
 
Analysis pass providing the TargetTransformInfo.
 
virtual bool isSelectSupported(SelectSupportKind) const
 
SelectSupportKind
Enum that describes what type of support for selects the target has.
 
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
 
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
 
Primary interface to the complete machine description for the target machine.
 
Target-Independent Code Generator Pass Configuration Options.
 
Provide an instruction scheduling machine model to CodeGen passes.
 
const MCSchedModel * getMCSchedModel() const
 
LLVM_ABI void init(const TargetSubtargetInfo *TSInfo, bool EnableSModel=true, bool EnableSItins=true)
Initialize the machine model for instruction scheduling.
 
TargetSubtargetInfo - Generic base class for all target subtargets.
 
virtual const TargetLowering * getTargetLowering() const
 
bool isIntegerTy() const
True if this is an instance of IntegerType.
 
A Use represents the edge between a Value definition and its users.
 
LLVM Value Representation.
 
Type * getType() const
All values are typed, get the type of this value.
 
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
 
const ParentTy * getParent() const
 
self_iterator getIterator()
 
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
 
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
 
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
 
bool match(Val *V, const Pattern &P)
 
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
 
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
 
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
 
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
 
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
 
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
 
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
 
initializer< Ty > init(const Ty &Val)
 
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
 
friend class Instruction
Iterator for Instructions in a `BasicBlock.
 
This is an optimization pass for GlobalISel generic memory operations.
 
FunctionAddr VTableAddr Value
 
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
 
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
 
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
 
LLVM_ABI FunctionPass * createSelectOptimizePass()
This pass converts conditional moves to conditional jumps when profitable.
 
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
 
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
 
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
 
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
 
constexpr T divideNearest(U Numerator, V Denominator)
Returns (Numerator / Denominator) rounded by round-half-up.
 
auto dyn_cast_or_null(const Y &Val)
 
LLVM_ABI void initializeSelectOptimizePass(PassRegistry &)
 
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
 
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
 
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
 
DWARFExpression::Operation Op
 
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
 
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
 
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
 
unsigned MispredictPenalty