78#define DEBUG_TYPE "x86-cmov-conversion"
80STATISTIC(NumOfSkippedCmovGroups,
"Number of unsupported CMOV-groups");
81STATISTIC(NumOfCmovGroupCandidate,
"Number of CMOV-group candidates");
82STATISTIC(NumOfLoopCandidate,
"Number of CMOV-conversion profitable loops");
83STATISTIC(NumOfOptimizedCmovGroups,
"Number of optimized CMOV-groups");
88 cl::desc(
"Enable the X86 cmov-to-branch optimization."),
93 cl::desc(
"Minimum gain per loop (in cycles) threshold."),
97 "x86-cmov-converter-force-mem-operand",
98 cl::desc(
"Convert cmovs to branches whenever they have memory operands."),
102 "x86-cmov-converter-force-all",
103 cl::desc(
"Convert all cmovs to branches."),
138 CmovGroups &CmovInstGroups,
139 bool IncludeLoads =
false);
148 CmovGroups &CmovInstGroups);
158char X86CmovConverterPass::ID = 0;
160void X86CmovConverterPass::getAnalysisUsage(
AnalysisUsage &AU)
const {
178 bool Changed =
false;
179 MLI = &getAnalysis<MachineLoopInfo>();
184 TSchedModel.init(&STI);
192 CmovGroups AllCmovGroups;
196 if (collectCmovCandidates(
Blocks, AllCmovGroups,
true)) {
197 for (
auto &Group : AllCmovGroups) {
207 convertCmovInstsToBranches(Group);
242 for (
int i = 0; i < (int)
Loops.size(); ++i)
244 Loops.push_back(Child);
248 if (!CurrLoop->getSubLoops().empty())
252 CmovGroups CmovInstGroups;
254 if (!collectCmovCandidates(CurrLoop->getBlocks(), CmovInstGroups))
257 if (!checkForProfitableCmovCandidates(CurrLoop->getBlocks(),
262 for (
auto &Group : CmovInstGroups)
263 convertCmovInstsToBranches(Group);
269bool X86CmovConverterPass::collectCmovCandidates(
300 bool FoundNonCMOVInst =
false;
302 bool SkipGroup =
false;
304 for (
auto &
I : *
MBB) {
306 if (
I.isDebugInstr())
317 FoundNonCMOVInst =
false;
323 if (FoundNonCMOVInst || (
CC != FirstCC &&
CC != FirstOppCC))
330 else if (
CC != MemOpCC)
337 MRI->use_nodbg_instructions(
I.defs().begin()->getReg()),
339 return UseI.getOpcode() == X86::SUBREG_TO_REG;
351 FoundNonCMOVInst =
true;
354 if (
I.definesRegister(X86::EFLAGS)) {
358 CmovInstGroups.push_back(Group);
360 ++NumOfSkippedCmovGroups;
369 CmovInstGroups.push_back(Group);
371 ++NumOfSkippedCmovGroups;
374 NumOfCmovGroupCandidate += CmovInstGroups.size();
375 return !CmovInstGroups.empty();
387 divideCeil(TrueOpDepth * 3 + FalseOpDepth, 4),
388 divideCeil(FalseOpDepth * 3 + TrueOpDepth, 4));
391bool X86CmovConverterPass::checkForProfitableCmovCandidates(
400 static const unsigned LoopIterations = 2;
402 DepthInfo LoopDepth[LoopIterations] = {{0, 0}, {0, 0}};
403 enum { PhyRegType = 0, VirRegType = 1, RegTypeNum = 2 };
411 DepthMap[
nullptr] = {0, 0};
414 for (
auto &Group : CmovInstGroups)
415 CmovInstructions.
insert(Group.begin(), Group.end());
440 for (DepthInfo &
MaxDepth : LoopDepth) {
443 RegDefMaps[PhyRegType].
clear();
446 if (
MI.isDebugInstr())
448 unsigned MIDepth = 0;
449 unsigned MIDepthOpt = 0;
450 bool IsCMOV = CmovInstructions.
count(&
MI);
451 for (
auto &MO :
MI.uses()) {
453 if (!MO.isReg() || !MO.isUse())
456 auto &RDM = RegDefMaps[
Reg.isVirtual()];
458 OperandToDefMap[&MO] =
DefMI;
460 MIDepth = std::max(MIDepth,
Info.Depth);
462 MIDepthOpt = std::max(MIDepthOpt,
Info.OptDepth);
468 DepthMap[OperandToDefMap.
lookup(&
MI.getOperand(1))].OptDepth,
469 DepthMap[OperandToDefMap.
lookup(&
MI.getOperand(2))].OptDepth);
472 for (
auto &MO :
MI.operands()) {
473 if (!MO.isReg() || !MO.isDef())
476 RegDefMaps[
Reg.isVirtual()][
Reg] = &
MI;
479 unsigned Latency = TSchedModel.computeInstrLatency(&
MI);
487 unsigned Diff[LoopIterations] = {LoopDepth[0].Depth - LoopDepth[0].OptDepth,
488 LoopDepth[1].Depth - LoopDepth[1].OptDepth};
520 bool WorthOptLoop =
false;
521 if (Diff[1] == Diff[0])
522 WorthOptLoop = Diff[0] * 8 >= LoopDepth[0].Depth;
523 else if (Diff[1] > Diff[0])
525 (Diff[1] - Diff[0]) * 2 >= (LoopDepth[1].
Depth - LoopDepth[0].
Depth) &&
526 (Diff[1] * 8 >= LoopDepth[1].Depth);
531 ++NumOfLoopCandidate;
544 unsigned MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
545 CmovGroups TempGroups;
547 for (
auto &Group : TempGroups) {
548 bool WorthOpGroup =
true;
549 for (
auto *
MI : Group) {
553 auto UIs =
MRI->use_instructions(
MI->defs().begin()->getReg());
554 if (!UIs.empty() && ++UIs.begin() == UIs.end()) {
555 unsigned Op = UIs.begin()->getOpcode();
556 if (Op == X86::MOV64rm || Op == X86::MOV32rm) {
557 WorthOpGroup =
false;
563 DepthMap[OperandToDefMap.
lookup(&
MI->getOperand(4))].Depth;
565 DepthMap[OperandToDefMap.
lookup(&
MI->getOperand(1))].Depth,
566 DepthMap[OperandToDefMap.
lookup(&
MI->getOperand(2))].Depth);
567 if (ValCost > CondCost || (CondCost - ValCost) * 4 < MispredictPenalty) {
568 WorthOpGroup =
false;
574 CmovInstGroups.push_back(Group);
577 return !CmovInstGroups.empty();
581 if (
MI->killsRegister(X86::EFLAGS))
590 for (
auto I = std::next(ItrMI),
E = BB->
end();
I !=
E; ++
I) {
591 if (
I->readsRegister(X86::EFLAGS))
593 if (
I->definesRegister(X86::EFLAGS))
599 if (Succ->isLiveIn(X86::EFLAGS))
611 "Last instruction in a CMOV group must be a CMOV instruction");
614 for (
auto I = First->getIterator(),
E =
Last->getIterator();
I !=
E;
I++) {
615 if (
I->isDebugInstr())
621 for (
auto *
MI : DBGInstructions)
625void X86CmovConverterPass::convertCmovInstsToBranches(
627 assert(!Group.
empty() &&
"No CMOV instructions to convert");
628 ++NumOfOptimizedCmovGroups;
686 F->insert(It, FalseMBB);
687 F->insert(It, SinkMBB);
732 auto FRIt = FalseBBRegRewriteTable.
find(FalseReg);
733 if (FRIt == FalseBBRegRewriteTable.
end())
735 FalseReg = FRIt->second;
737 FalseBBRegRewriteTable[
MI.getOperand(0).getReg()] = FalseReg;
745 "Can only handle memory-operand cmov instructions with a condition "
746 "opposite to the selected branch direction.");
778 assert(Unfolded &&
"Should never fail to unfold a loading cmov!");
784 "Last new instruction isn't the expected CMOV!");
787 if (&*MIItBegin == &
MI)
792 for (
auto *NewMI : NewMIs) {
794 FalseMBB->
insert(FalseInsertionPoint, NewMI);
796 for (
auto &MOp : NewMI->uses()) {
799 auto It = FalseBBRegRewriteTable.
find(MOp.getReg());
800 if (It == FalseBBRegRewriteTable.
end())
803 MOp.setReg(It->second);
809 MOp.setIsKill(
false);
815 FalseBBRegRewriteTable[NewCMOV->getOperand(0).getReg()] = TmpReg;
827 Register DestReg = MIIt->getOperand(0).getReg();
828 Register Op1Reg = MIIt->getOperand(1).getReg();
829 Register Op2Reg = MIIt->getOperand(2).getReg();
837 auto Op1Itr = RegRewriteTable.
find(Op1Reg);
838 if (Op1Itr != RegRewriteTable.
end())
839 Op1Reg = Op1Itr->second.first;
841 auto Op2Itr = RegRewriteTable.
find(Op2Reg);
842 if (Op2Itr != RegRewriteTable.
end())
843 Op2Reg = Op2Itr->second.second;
848 MIB =
BuildMI(*SinkMBB, SinkInsertionPoint,
DL,
TII->get(X86::PHI), DestReg)
858 RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
866 L->addBasicBlockToLoop(FalseMBB, MLI->getBase());
867 L->addBasicBlockToLoop(SinkMBB, MLI->getBase());
878 return new X86CmovConverterPass();
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
const HexagonInstrInfo * TII
static const unsigned MaxDepth
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))
Disable the select optimization pass.
static cl::opt< bool > ForceAll("x86-cmov-converter-force-all", cl::desc("Convert all cmovs to branches."), cl::init(false), cl::Hidden)
static bool checkEFLAGSLive(MachineInstr *MI)
static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth)
static cl::opt< unsigned > GainCycleThreshold("x86-cmov-converter-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
static cl::opt< bool > ForceMemOperand("x86-cmov-converter-force-mem-operand", cl::desc("Convert cmovs to branches whenever they have memory operands."), cl::init(true), cl::Hidden)
static void packCmovGroup(MachineInstr *First, MachineInstr *Last)
Given /p First CMOV instruction and /p Last CMOV instruction representing a group of CMOV instruction...
static cl::opt< bool > EnableCmovConverter("x86-cmov-converter", cl::desc("Enable the X86 cmov-to-branch optimization."), cl::init(true), cl::Hidden)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
FunctionPass class - This class is used to implement most global optimizations.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
self_iterator getIterator()
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Reg
All possible values of the reg field in the ModR/M byte.
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
CondCode getCondFromCMov(const MachineInstr &MI)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
FunctionPass * createX86CmovConverterPass()
This pass converts X86 cmov instructions into branch when profitable.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
CGPassBuilderOption getCGPassBuilderOption()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.