78#define DEBUG_TYPE "x86-cmov-conversion"
80STATISTIC(NumOfSkippedCmovGroups,
"Number of unsupported CMOV-groups");
81STATISTIC(NumOfCmovGroupCandidate,
"Number of CMOV-group candidates");
82STATISTIC(NumOfLoopCandidate,
"Number of CMOV-conversion profitable loops");
83STATISTIC(NumOfOptimizedCmovGroups,
"Number of optimized CMOV-groups");
88 cl::desc(
"Enable the X86 cmov-to-branch optimization."),
93 cl::desc(
"Minimum gain per loop (in cycles) threshold."),
97 "x86-cmov-converter-force-mem-operand",
98 cl::desc(
"Convert cmovs to branches whenever they have memory operands."),
102 "x86-cmov-converter-force-all",
103 cl::desc(
"Convert all cmovs to branches."),
138 CmovGroups &CmovInstGroups,
139 bool IncludeLoads =
false);
148 CmovGroups &CmovInstGroups);
158char X86CmovConverterPass::ID = 0;
160void X86CmovConverterPass::getAnalysisUsage(
AnalysisUsage &AU)
const {
178 bool Changed =
false;
179 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
184 TSchedModel.init(&STI);
192 CmovGroups AllCmovGroups;
196 if (collectCmovCandidates(
Blocks, AllCmovGroups,
true)) {
197 for (
auto &Group : AllCmovGroups) {
207 convertCmovInstsToBranches(Group);
242 for (
int i = 0; i < (int)
Loops.size(); ++i)
244 Loops.push_back(Child);
248 if (!CurrLoop->getSubLoops().empty())
252 CmovGroups CmovInstGroups;
254 if (!collectCmovCandidates(CurrLoop->getBlocks(), CmovInstGroups))
257 if (!checkForProfitableCmovCandidates(CurrLoop->getBlocks(),
262 for (
auto &Group : CmovInstGroups)
263 convertCmovInstsToBranches(Group);
269bool X86CmovConverterPass::collectCmovCandidates(
300 bool FoundNonCMOVInst =
false;
302 bool SkipGroup =
false;
304 for (
auto &
I : *
MBB) {
306 if (
I.isDebugInstr())
313 !
I.getFlag(MachineInstr::MIFlag::Unpredictable) &&
314 (IncludeLoads || !
I.mayLoad())) {
321 FoundNonCMOVInst =
false;
327 if (FoundNonCMOVInst || (
CC != FirstCC &&
CC != FirstOppCC))
334 else if (
CC != MemOpCC)
341 MRI->use_nodbg_instructions(
I.defs().begin()->getReg()),
343 return UseI.getOpcode() == X86::SUBREG_TO_REG;
355 FoundNonCMOVInst =
true;
358 if (
I.definesRegister(X86::EFLAGS,
nullptr)) {
362 CmovInstGroups.push_back(Group);
364 ++NumOfSkippedCmovGroups;
373 CmovInstGroups.push_back(Group);
375 ++NumOfSkippedCmovGroups;
378 NumOfCmovGroupCandidate += CmovInstGroups.size();
379 return !CmovInstGroups.empty();
391 divideCeil(TrueOpDepth * 3 + FalseOpDepth, 4),
392 divideCeil(FalseOpDepth * 3 + TrueOpDepth, 4));
395bool X86CmovConverterPass::checkForProfitableCmovCandidates(
404 static const unsigned LoopIterations = 2;
406 DepthInfo LoopDepth[LoopIterations] = {{0, 0}, {0, 0}};
407 enum { PhyRegType = 0, VirRegType = 1, RegTypeNum = 2 };
415 DepthMap[
nullptr] = {0, 0};
418 for (
auto &Group : CmovInstGroups)
419 CmovInstructions.
insert(Group.begin(), Group.end());
444 for (DepthInfo &
MaxDepth : LoopDepth) {
447 RegDefMaps[PhyRegType].
clear();
450 if (
MI.isDebugInstr())
452 unsigned MIDepth = 0;
453 unsigned MIDepthOpt = 0;
454 bool IsCMOV = CmovInstructions.
count(&
MI);
455 for (
auto &MO :
MI.uses()) {
457 if (!MO.isReg() || !MO.isUse())
460 auto &RDM = RegDefMaps[
Reg.isVirtual()];
462 OperandToDefMap[&MO] =
DefMI;
464 MIDepth = std::max(MIDepth,
Info.Depth);
466 MIDepthOpt = std::max(MIDepthOpt,
Info.OptDepth);
472 DepthMap[OperandToDefMap.
lookup(&
MI.getOperand(1))].OptDepth,
473 DepthMap[OperandToDefMap.
lookup(&
MI.getOperand(2))].OptDepth);
476 for (
auto &MO :
MI.operands()) {
477 if (!MO.isReg() || !MO.isDef())
480 RegDefMaps[
Reg.isVirtual()][
Reg] = &
MI;
483 unsigned Latency = TSchedModel.computeInstrLatency(&
MI);
491 unsigned Diff[LoopIterations] = {LoopDepth[0].Depth - LoopDepth[0].OptDepth,
492 LoopDepth[1].Depth - LoopDepth[1].OptDepth};
524 bool WorthOptLoop =
false;
525 if (Diff[1] == Diff[0])
526 WorthOptLoop = Diff[0] * 8 >= LoopDepth[0].Depth;
527 else if (Diff[1] > Diff[0])
529 (Diff[1] - Diff[0]) * 2 >= (LoopDepth[1].
Depth - LoopDepth[0].
Depth) &&
530 (Diff[1] * 8 >= LoopDepth[1].Depth);
535 ++NumOfLoopCandidate;
548 unsigned MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
549 CmovGroups TempGroups;
551 for (
auto &Group : TempGroups) {
552 bool WorthOpGroup =
true;
553 for (
auto *
MI : Group) {
557 auto UIs =
MRI->use_instructions(
MI->defs().begin()->getReg());
558 if (!UIs.empty() && ++UIs.begin() == UIs.end()) {
559 unsigned Op = UIs.begin()->getOpcode();
560 if (
Op == X86::MOV64rm ||
Op == X86::MOV32rm) {
561 WorthOpGroup =
false;
567 DepthMap[OperandToDefMap.
lookup(&
MI->getOperand(4))].Depth;
569 DepthMap[OperandToDefMap.
lookup(&
MI->getOperand(1))].Depth,
570 DepthMap[OperandToDefMap.
lookup(&
MI->getOperand(2))].Depth);
571 if (ValCost > CondCost || (CondCost - ValCost) * 4 < MispredictPenalty) {
572 WorthOpGroup =
false;
578 CmovInstGroups.push_back(Group);
581 return !CmovInstGroups.empty();
585 if (
MI->killsRegister(X86::EFLAGS,
nullptr))
594 for (
auto I = std::next(ItrMI), E = BB->
end();
I != E; ++
I) {
595 if (
I->readsRegister(X86::EFLAGS,
nullptr))
597 if (
I->definesRegister(X86::EFLAGS,
nullptr))
603 if (Succ->isLiveIn(X86::EFLAGS))
615 "Last instruction in a CMOV group must be a CMOV instruction");
618 for (
auto I =
First->getIterator(), E =
Last->getIterator();
I != E;
I++) {
619 if (
I->isDebugInstr())
625 for (
auto *
MI : DBGInstructions)
629void X86CmovConverterPass::convertCmovInstsToBranches(
631 assert(!Group.
empty() &&
"No CMOV instructions to convert");
632 ++NumOfOptimizedCmovGroups;
690 F->insert(It, FalseMBB);
691 F->insert(It, SinkMBB);
736 auto FRIt = FalseBBRegRewriteTable.
find(FalseReg);
737 if (FRIt == FalseBBRegRewriteTable.
end())
739 FalseReg = FRIt->second;
741 FalseBBRegRewriteTable[
MI.getOperand(0).getReg()] = FalseReg;
749 "Can only handle memory-operand cmov instructions with a condition "
750 "opposite to the selected branch direction.");
778 unsigned OldDebugInstrNum =
MI.peekDebugInstrNum();
784 assert(Unfolded &&
"Should never fail to unfold a loading cmov!");
790 "Last new instruction isn't the expected CMOV!");
793 if (&*MIItBegin == &
MI)
796 if (OldDebugInstrNum)
797 NewCMOV->setDebugInstrNum(OldDebugInstrNum);
801 for (
auto *NewMI : NewMIs) {
803 FalseMBB->
insert(FalseInsertionPoint, NewMI);
805 for (
auto &MOp : NewMI->uses()) {
808 auto It = FalseBBRegRewriteTable.
find(MOp.getReg());
809 if (It == FalseBBRegRewriteTable.
end())
812 MOp.setReg(It->second);
818 MOp.setIsKill(
false);
824 FalseBBRegRewriteTable[NewCMOV->getOperand(0).getReg()] = TmpReg;
836 Register DestReg = MIIt->getOperand(0).getReg();
837 Register Op1Reg = MIIt->getOperand(1).getReg();
838 Register Op2Reg = MIIt->getOperand(2).getReg();
846 auto Op1Itr = RegRewriteTable.
find(Op1Reg);
847 if (Op1Itr != RegRewriteTable.
end())
848 Op1Reg = Op1Itr->second.first;
850 auto Op2Itr = RegRewriteTable.
find(Op2Reg);
851 if (Op2Itr != RegRewriteTable.
end())
852 Op2Reg = Op2Itr->second.second;
857 MIB =
BuildMI(*SinkMBB, SinkInsertionPoint,
DL,
TII->get(X86::PHI), DestReg)
868 if (
unsigned InstrNum = MIIt->peekDebugInstrNum())
872 RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
877 if (MIItBegin != MIItEnd)
878 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
885 L->addBasicBlockToLoop(FalseMBB, *MLI);
886 L->addBasicBlockToLoop(SinkMBB, *MLI);
897 return new X86CmovConverterPass();
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
const HexagonInstrInfo * TII
static const unsigned MaxDepth
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))
Disable the select optimization pass.
static cl::opt< bool > ForceAll("x86-cmov-converter-force-all", cl::desc("Convert all cmovs to branches."), cl::init(false), cl::Hidden)
static bool checkEFLAGSLive(MachineInstr *MI)
static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth)
static cl::opt< unsigned > GainCycleThreshold("x86-cmov-converter-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
static cl::opt< bool > ForceMemOperand("x86-cmov-converter-force-mem-operand", cl::desc("Convert cmovs to branches whenever they have memory operands."), cl::init(true), cl::Hidden)
static void packCmovGroup(MachineInstr *First, MachineInstr *Last)
Given /p First CMOV instruction and /p Last CMOV instruction representing a group of CMOV instruction...
static cl::opt< bool > EnableCmovConverter("x86-cmov-converter", cl::desc("Enable the X86 cmov-to-branch optimization."), cl::init(true), cl::Hidden)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
This class represents an Operation in the Expression.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
FunctionPass class - This class is used to implement most global optimizations.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
void setDebugInstrNum(unsigned Num)
Set instruction number of this MachineInstr.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
self_iterator getIterator()
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Reg
All possible values of the reg field in the ModR/M byte.
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
CondCode getCondFromCMov(const MachineInstr &MI)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createX86CmovConverterPass()
This pass converts X86 cmov instructions into branch when profitable.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CGPassBuilderOption getCGPassBuilderOption()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.