Go to the documentation of this file.
77 #define DEBUG_TYPE "x86-cmov-conversion"
79 STATISTIC(NumOfSkippedCmovGroups,
"Number of unsupported CMOV-groups");
80 STATISTIC(NumOfCmovGroupCandidate,
"Number of CMOV-group candidates");
81 STATISTIC(NumOfLoopCandidate,
"Number of CMOV-conversion profitable loops");
82 STATISTIC(NumOfOptimizedCmovGroups,
"Number of optimized CMOV-groups");
87 cl::desc(
"Enable the X86 cmov-to-branch optimization."),
92 cl::desc(
"Minimum gain per loop (in cycles) threshold."),
96 "x86-cmov-converter-force-mem-operand",
97 cl::desc(
"Convert cmovs to branches whenever they have memory operands."),
101 "x86-cmov-converter-force-all",
102 cl::desc(
"Convert all cmovs to branches."),
112 StringRef getPassName()
const override {
return "X86 cmov Conversion"; }
137 CmovGroups &CmovInstGroups,
138 bool IncludeLoads =
false);
147 CmovGroups &CmovInstGroups);
159 void X86CmovConverterPass::getAnalysisUsage(
AnalysisUsage &AU)
const {
173 bool Changed =
false;
174 MLI = &getAnalysis<MachineLoopInfo>();
179 TSchedModel.init(&STI);
187 CmovGroups AllCmovGroups;
190 Blocks.push_back(&
MBB);
191 if (collectCmovCandidates(Blocks, AllCmovGroups,
true)) {
192 for (
auto &Group : AllCmovGroups) {
202 convertCmovInstsToBranches(Group);
239 Loops.push_back(Child);
243 if (!CurrLoop->getSubLoops().empty())
247 CmovGroups CmovInstGroups;
249 if (!collectCmovCandidates(CurrLoop->getBlocks(), CmovInstGroups))
252 if (!checkForProfitableCmovCandidates(CurrLoop->getBlocks(),
257 for (
auto &Group : CmovInstGroups)
258 convertCmovInstsToBranches(Group);
264 bool X86CmovConverterPass::collectCmovCandidates(
288 for (
auto *
MBB : Blocks) {
295 bool FoundNonCMOVInst =
false;
297 bool SkipGroup =
false;
299 for (
auto &
I : *
MBB) {
301 if (
I.isDebugInstr())
312 FoundNonCMOVInst =
false;
318 if (FoundNonCMOVInst || (CC != FirstCC && CC != FirstOppCC))
325 else if (CC != MemOpCC)
334 return UseI.getOpcode() == X86::SUBREG_TO_REG;
346 FoundNonCMOVInst =
true;
349 if (
I.definesRegister(X86::EFLAGS)) {
353 CmovInstGroups.push_back(Group);
355 ++NumOfSkippedCmovGroups;
364 CmovInstGroups.push_back(Group);
366 ++NumOfSkippedCmovGroups;
369 NumOfCmovGroupCandidate += CmovInstGroups.size();
370 return !CmovInstGroups.empty();
382 divideCeil(TrueOpDepth * 3 + FalseOpDepth, 4),
383 divideCeil(FalseOpDepth * 3 + TrueOpDepth, 4));
386 bool X86CmovConverterPass::checkForProfitableCmovCandidates(
395 static const unsigned LoopIterations = 2;
397 DepthInfo LoopDepth[LoopIterations] = {{0, 0}, {0, 0}};
398 enum { PhyRegType = 0, VirRegType = 1, RegTypeNum = 2 };
406 DepthMap[
nullptr] = {0, 0};
409 for (
auto &Group : CmovInstGroups)
410 CmovInstructions.
insert(Group.begin(), Group.end());
435 for (
unsigned I = 0;
I < LoopIterations; ++
I) {
437 for (
auto *
MBB : Blocks) {
439 RegDefMaps[PhyRegType].
clear();
442 if (
MI.isDebugInstr())
444 unsigned MIDepth = 0;
445 unsigned MIDepthOpt = 0;
446 bool IsCMOV = CmovInstructions.
count(&
MI);
447 for (
auto &MO :
MI.uses()) {
449 if (!MO.isReg() || !MO.isUse())
452 auto &RDM = RegDefMaps[
Reg.isVirtual()];
454 OperandToDefMap[&MO] =
DefMI;
464 DepthMap[OperandToDefMap.
lookup(&
MI.getOperand(1))].OptDepth,
465 DepthMap[OperandToDefMap.
lookup(&
MI.getOperand(2))].OptDepth);
468 for (
auto &MO :
MI.operands()) {
469 if (!MO.isReg() || !MO.isDef())
472 RegDefMaps[
Reg.isVirtual()][
Reg] = &
MI;
475 unsigned Latency = TSchedModel.computeInstrLatency(&
MI);
483 unsigned Diff[LoopIterations] = {LoopDepth[0].Depth - LoopDepth[0].OptDepth,
484 LoopDepth[1].Depth - LoopDepth[1].OptDepth};
516 bool WorthOptLoop =
false;
517 if (Diff[1] == Diff[0])
518 WorthOptLoop = Diff[0] * 8 >= LoopDepth[0].Depth;
519 else if (Diff[1] > Diff[0])
521 (Diff[1] - Diff[0]) * 2 >= (LoopDepth[1].
Depth - LoopDepth[0].
Depth) &&
522 (Diff[1] * 8 >= LoopDepth[1].Depth);
527 ++NumOfLoopCandidate;
540 unsigned MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
541 CmovGroups TempGroups;
543 for (
auto &Group : TempGroups) {
544 bool WorthOpGroup =
true;
545 for (
auto *
MI : Group) {
550 if (!UIs.empty() && ++UIs.begin() == UIs.end()) {
551 unsigned Op = UIs.begin()->getOpcode();
552 if (
Op == X86::MOV64rm ||
Op == X86::MOV32rm) {
553 WorthOpGroup =
false;
559 DepthMap[OperandToDefMap.
lookup(&
MI->getOperand(4))].Depth;
561 DepthMap[OperandToDefMap.
lookup(&
MI->getOperand(1))].Depth,
562 DepthMap[OperandToDefMap.
lookup(&
MI->getOperand(2))].Depth);
563 if (ValCost > CondCost || (CondCost - ValCost) * 4 < MispredictPenalty) {
564 WorthOpGroup =
false;
570 CmovInstGroups.push_back(Group);
573 return !CmovInstGroups.empty();
577 if (
MI->killsRegister(X86::EFLAGS))
586 for (
auto I = std::next(ItrMI),
E =
BB->end();
I !=
E; ++
I) {
587 if (
I->readsRegister(X86::EFLAGS))
589 if (
I->definesRegister(X86::EFLAGS))
595 if (Succ->isLiveIn(X86::EFLAGS))
607 "Last instruction in a CMOV group must be a CMOV instruction");
610 for (
auto I =
First->getIterator(),
E = Last->getIterator();
I !=
E;
I++) {
611 if (
I->isDebugInstr())
612 DBGInstructions.push_back(&*
I);
617 for (
auto *
MI : DBGInstructions)
621 void X86CmovConverterPass::convertCmovInstsToBranches(
623 assert(!Group.empty() &&
"No CMOV instructions to convert");
624 ++NumOfOptimizedCmovGroups;
682 F->insert(It, FalseMBB);
683 F->insert(It, SinkMBB);
728 auto FRIt = FalseBBRegRewriteTable.
find(FalseReg);
729 if (FRIt == FalseBBRegRewriteTable.
end())
731 FalseReg = FRIt->second;
733 FalseBBRegRewriteTable[
MI.getOperand(0).getReg()] = FalseReg;
741 "Can only handle memory-operand cmov instructions with a condition "
742 "opposite to the selected branch direction.");
774 assert(Unfolded &&
"Should never fail to unfold a loading cmov!");
780 "Last new instruction isn't the expected CMOV!");
783 if (&*MIItBegin == &
MI)
788 for (
auto *NewMI : NewMIs) {
790 FalseMBB->
insert(FalseInsertionPoint, NewMI);
792 for (
auto &MOp : NewMI->uses()) {
795 auto It = FalseBBRegRewriteTable.
find(MOp.getReg());
796 if (It == FalseBBRegRewriteTable.
end())
799 MOp.setReg(It->second);
805 MOp.setIsKill(
false);
811 FalseBBRegRewriteTable[NewCMOV->getOperand(0).getReg()] = TmpReg;
823 Register DestReg = MIIt->getOperand(0).getReg();
824 Register Op1Reg = MIIt->getOperand(1).getReg();
825 Register Op2Reg = MIIt->getOperand(2).getReg();
833 auto Op1Itr = RegRewriteTable.
find(Op1Reg);
834 if (Op1Itr != RegRewriteTable.
end())
835 Op1Reg = Op1Itr->second.first;
837 auto Op2Itr = RegRewriteTable.
find(Op2Reg);
838 if (Op2Itr != RegRewriteTable.
end())
839 Op2Reg = Op2Itr->second.second;
844 MIB =
BuildMI(*SinkMBB, SinkInsertionPoint,
DL,
TII->get(X86::PHI), DestReg)
854 RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
862 L->addBasicBlockToLoop(FalseMBB, MLI->getBase());
863 L->addBasicBlockToLoop(SinkMBB, MLI->getBase());
874 return new X86CmovConverterPass();
static void packCmovGroup(MachineInstr *First, MachineInstr *Last)
Given /p First CMOV instruction and /p Last CMOV instruction representing a group of CMOV instruction...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
This is an optimization pass for GlobalISel generic memory operations.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
virtual const TargetInstrInfo * getInstrInfo() const
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Reg
All possible values of the reg field in the ModR/M byte.
static bool checkEFLAGSLive(MachineInstr *MI)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth)
static cl::opt< bool > ForceAll("x86-cmov-converter-force-all", cl::desc("Convert all cmovs to branches."), cl::init(false), cl::Hidden)
LLVM_NODISCARD T pop_back_val()
unsigned const TargetRegisterInfo * TRI
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
LLVM Basic Block Representation.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
CondCode getCondFromCMov(const MachineInstr &MI)
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
TargetInstrInfo - Interface to description of machine instruction set.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static cl::opt< bool > ForceMemOperand("x86-cmov-converter-force-mem-operand", cl::desc("Convert cmovs to branches whenever they have memory operands."), cl::init(true), cl::Hidden)
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Represent the analysis usage information of a pass.
const HexagonInstrInfo * TII
into llvm powi allowing the code generator to produce balanced multiplication trees First
STATISTIC(NumFunctions, "Total number of functions")
Analysis containing CSE Info
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Provide an instruction scheduling machine model to CodeGen passes.
Representation of each machine instruction.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
initializer< Ty > init(const Ty &Val)
iterator find(const_arg_type_t< KeyT > Val)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
static cl::opt< bool > EnableCmovConverter("x86-cmov-converter", cl::desc("Enable the X86 cmov-to-branch optimization."), cl::init(true), cl::Hidden)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
static cl::opt< unsigned > GainCycleThreshold("x86-cmov-converter-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
MachineInstrBundleIterator< MachineInstr > iterator
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
StringRef - Represent a constant reference to a string, i.e.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
self_iterator getIterator()
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
TargetSubtargetInfo - Generic base class for all target subtargets.
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
static const unsigned MaxDepth
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Function & getFunction()
Return the LLVM function that this machine code represents.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Iterator for intrusive lists based on ilist_node.
FunctionPass * createX86CmovConverterPass()
This pass converts X86 cmov instructions into branch when profitable.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
MachineInstrBuilder MachineInstrBuilder & DefMI
Align max(MaybeAlign Lhs, Align Rhs)
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
FunctionPass class - This class is used to implement most global optimizations.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
AnalysisUsage & addRequired()
INITIALIZE_PASS_BEGIN(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion", false, false) INITIALIZE_PASS_END(X86CmovConverterPass
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.