42 #define DEBUG_TYPE "aarch64-ccmp"
48 cl::desc(
"Maximum number of instructions per speculated block."));
54 STATISTIC(NumConsidered,
"Number of ccmps considered");
55 STATISTIC(NumPhiRejs,
"Number of ccmps rejected (PHI)");
56 STATISTIC(NumPhysRejs,
"Number of ccmps rejected (Physregs)");
57 STATISTIC(NumPhi2Rejs,
"Number of ccmps rejected (PHI2)");
58 STATISTIC(NumHeadBranchRejs,
"Number of ccmps rejected (Head branch)");
59 STATISTIC(NumCmpBranchRejs,
"Number of ccmps rejected (CmpBB branch)");
60 STATISTIC(NumCmpTermRejs,
"Number of ccmps rejected (CmpBB is cbz...)");
61 STATISTIC(NumImmRangeRejs,
"Number of ccmps rejected (Imm out of range)");
62 STATISTIC(NumLiveDstRejs,
"Number of ccmps rejected (Cmp dest live)");
63 STATISTIC(NumMultNZCVUses,
"Number of ccmps rejected (NZCV used)");
64 STATISTIC(NumUnknNZCVDefs,
"Number of ccmps rejected (NZCV def unknown)");
66 STATISTIC(NumSpeculateRejs,
"Number of ccmps rejected (Can't speculate)");
68 STATISTIC(NumConverted,
"Number of ccmp instructions created");
69 STATISTIC(NumCompBranches,
"Number of cbz/cbnz branches converted");
171 bool trivialTailPHIs();
174 void updateTailPHIs();
177 bool isDeadDef(
unsigned DstReg);
206 int expectedCodeSizeDelta()
const;
212 bool SSACCmpConv::trivialTailPHIs() {
213 for (
auto &
I : *Tail) {
216 unsigned HeadReg = 0, CmpBBReg = 0;
218 for (
unsigned oi = 1, oe =
I.getNumOperands(); oi != oe; oi += 2) {
220 unsigned Reg =
I.getOperand(oi).getReg();
222 assert((!HeadReg || HeadReg == Reg) &&
"Inconsistent PHI operands");
226 assert((!CmpBBReg || CmpBBReg == Reg) &&
"Inconsistent PHI operands");
230 if (HeadReg != CmpBBReg)
238 void SSACCmpConv::updateTailPHIs() {
239 for (
auto &
I : *Tail) {
243 for (
unsigned oi =
I.getNumOperands(); oi > 2; oi -= 2) {
245 if (
I.getOperand(oi - 1).getMBB() == CmpBB) {
246 I.RemoveOperand(oi - 1);
247 I.RemoveOperand(oi - 2);
255 bool SSACCmpConv::isDeadDef(
unsigned DstReg) {
257 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
263 return MRI->use_nodbg_empty(DstReg);
271 if (Cond[0].getImm() != -1) {
272 assert(Cond.
size() == 1 &&
"Unknown Cond array format");
277 switch (Cond[1].getImm()) {
284 assert(Cond.
size() == 3 &&
"Unknown Cond array format");
289 assert(Cond.
size() == 3 &&
"Unknown Cond array format");
300 if (!I->readsRegister(AArch64::NZCV)) {
301 switch (I->getOpcode()) {
310 DEBUG(
dbgs() <<
"Flags not used by terminator: " << *I);
317 assert(!I->isTerminator() &&
"Spurious terminator");
318 switch (I->getOpcode()) {
320 case AArch64::SUBSWri:
321 case AArch64::SUBSXri:
323 case AArch64::ADDSWri:
324 case AArch64::ADDSXri:
327 if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
328 DEBUG(
dbgs() <<
"Immediate out of range for ccmp: " << *I);
333 case AArch64::SUBSWrr:
334 case AArch64::SUBSXrr:
335 case AArch64::ADDSWrr:
336 case AArch64::ADDSXrr:
337 if (isDeadDef(I->getOperand(0).getReg()))
339 DEBUG(
dbgs() <<
"Can't convert compare with live destination: " << *I);
342 case AArch64::FCMPSrr:
343 case AArch64::FCMPDrr:
344 case AArch64::FCMPESrr:
345 case AArch64::FCMPEDrr:
350 MIOperands::PhysRegInfo PRI =
357 DEBUG(
dbgs() <<
"Can't create ccmp with multiple uses: " << *I);
362 if (PRI.Defined || PRI.Clobbered) {
363 DEBUG(
dbgs() <<
"Not convertible compare: " << *I);
391 if (I.isDebugValue())
410 DEBUG(
dbgs() <<
"Won't speculate load: " << I);
415 bool DontMoveAcrossStore =
true;
416 if (!I.isSafeToMove(
nullptr, DontMoveAcrossStore)) {
417 DEBUG(
dbgs() <<
"Can't speculate: " << I);
422 if (&I != CmpMI && I.modifiesRegister(AArch64::NZCV, TRI)) {
423 DEBUG(
dbgs() <<
"Clobbers flags: " << I);
435 Tail = CmpBB =
nullptr;
437 if (Head->succ_size() != 2)
453 if (!CmpBB->isSuccessor(Tail))
457 DEBUG(
dbgs() <<
"\nTriangle: BB#" << Head->getNumber() <<
" -> BB#"
458 << CmpBB->getNumber() <<
" -> BB#" << Tail->getNumber() <<
'\n');
467 if (!trivialTailPHIs()) {
468 DEBUG(
dbgs() <<
"Can't handle phis in Tail.\n");
473 if (!Tail->livein_empty()) {
474 DEBUG(
dbgs() <<
"Can't handle live-in physregs in Tail.\n");
481 if (!CmpBB->empty() && CmpBB->front().isPHI()) {
482 DEBUG(
dbgs() <<
"Can't handle phis in CmpBB.\n");
487 if (!CmpBB->livein_empty()) {
488 DEBUG(
dbgs() <<
"Can't handle live-in physregs in CmpBB.\n");
497 DEBUG(
dbgs() <<
"Head branch not analyzable.\n");
504 if (!TBB || HeadCond.
empty()) {
505 DEBUG(
dbgs() <<
"AnalyzeBranch didn't find conditional branch in Head.\n");
511 DEBUG(
dbgs() <<
"Unsupported branch type on Head\n");
518 assert(TBB == Tail &&
"Unexpected TBB");
525 DEBUG(
dbgs() <<
"CmpBB branch not analyzable.\n");
530 if (!TBB || CmpBBCond.
empty()) {
531 DEBUG(
dbgs() <<
"AnalyzeBranch didn't find conditional branch in CmpBB.\n");
536 if (!
parseCond(CmpBBCond, CmpBBTailCC)) {
537 DEBUG(
dbgs() <<
"Unsupported branch type on CmpBB\n");
549 CmpMI = findConvertibleCompare(CmpBB);
553 if (!canSpeculateInstrs(CmpBB, CmpMI)) {
561 DEBUG(
dbgs() <<
"Merging BB#" << CmpBB->getNumber() <<
" into BB#"
562 << Head->getNumber() <<
":\n" << *CmpBB);
567 Head->removeSuccessor(CmpBB,
true);
568 CmpBB->removeSuccessor(Tail,
true);
569 Head->transferSuccessorsAndUpdatePHIs(CmpBB);
570 DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
575 if (HeadCond[0].getImm() == -1) {
578 switch (HeadCond[1].getImm()) {
581 Opc = AArch64::SUBSWri;
585 Opc = AArch64::SUBSXri;
593 MRI->createVirtualRegister(
TII->getRegClass(MCID, 0, TRI, *MF));
595 BuildMI(*Head, Head->end(), TermDL, MCID)
601 MRI->constrainRegClass(HeadCond[2].
getReg(),
602 TII->getRegClass(MCID, 1, TRI, *MF));
605 Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end());
610 unsigned FirstOp = 1;
611 bool isZBranch =
false;
615 case AArch64::SUBSWri: Opc = AArch64::CCMPWi;
break;
616 case AArch64::SUBSWrr: Opc = AArch64::CCMPWr;
break;
617 case AArch64::SUBSXri: Opc = AArch64::CCMPXi;
break;
618 case AArch64::SUBSXrr: Opc = AArch64::CCMPXr;
break;
619 case AArch64::ADDSWri: Opc = AArch64::CCMNWi;
break;
620 case AArch64::ADDSWrr: Opc = AArch64::CCMNWr;
break;
621 case AArch64::ADDSXri: Opc = AArch64::CCMNXi;
break;
622 case AArch64::ADDSXrr: Opc = AArch64::CCMNXr;
break;
623 case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0;
break;
624 case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0;
break;
625 case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0;
break;
626 case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0;
break;
629 Opc = AArch64::CCMPWi;
635 Opc = AArch64::CCMPXi;
649 TII->getRegClass(MCID, 0, TRI, *MF));
652 TII->getRegClass(MCID, 1, TRI, *MF));
665 bool isNZ = CmpMI->
getOpcode() == AArch64::CBNZW ||
672 Head->updateTerminator();
675 CmpBB->eraseFromParent();
680 int SSACCmpConv::expectedCodeSizeDelta()
const {
685 if (HeadCond[0].getImm() == -1) {
686 switch (HeadCond[1].getImm()) {
741 return "AArch64 Conditional Compares";
748 void invalidateTraces();
756 "AArch64 CCMP Pass",
false,
false)
763 return new AArch64ConditionalCompares();
766 void AArch64ConditionalCompares::getAnalysisUsage(
AnalysisUsage &AU)
const {
777 void AArch64ConditionalCompares::updateDomTree(
784 assert(Node != HeadNode &&
"Cannot erase the head node");
785 assert(Node->
getIDom() == HeadNode &&
"CmpBB should be dominated by Head");
787 DomTree->changeImmediateDominator(Node->
getChildren().back(), HeadNode);
788 DomTree->eraseNode(RemovedMBB);
798 Loops->removeBlock(RemovedMBB);
802 void AArch64ConditionalCompares::invalidateTraces() {
803 Traces->invalidate(CmpConv.Head);
804 Traces->invalidate(CmpConv.CmpBB);
822 int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
823 DEBUG(
dbgs() <<
"Code size delta: " << CodeSizeDelta <<
'\n');
826 if (CodeSizeDelta < 0)
828 if (CodeSizeDelta > 0) {
829 DEBUG(
dbgs() <<
"Code size is increasing, give up on this one.\n");
841 unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
846 unsigned CmpBBDepth =
848 DEBUG(
dbgs() <<
"Head depth: " << HeadDepth
849 <<
"\nCmpBB depth: " << CmpBBDepth <<
'\n');
850 if (CmpBBDepth > HeadDepth + DelayLimit) {
851 DEBUG(
dbgs() <<
"Branch delay would be larger than " << DelayLimit
859 DEBUG(
dbgs() <<
"Resources: " << ResDepth <<
'\n');
864 if (ResDepth > HeadDepth) {
865 DEBUG(
dbgs() <<
"Too many instructions to speculate.\n");
872 bool Changed =
false;
876 CmpConv.convert(RemovedBlocks);
878 updateDomTree(RemovedBlocks);
879 updateLoops(RemovedBlocks);
884 bool AArch64ConditionalCompares::runOnMachineFunction(
MachineFunction &MF) {
885 DEBUG(
dbgs() <<
"********** AArch64 Conditional Compares **********\n"
886 <<
"********** Function: " << MF.
getName() <<
'\n');
894 DomTree = &getAnalysis<MachineDominatorTree>();
895 Loops = getAnalysisIfAvailable<MachineLoopInfo>();
896 Traces = &getAnalysis<MachineTraceMetrics>();
900 bool Changed =
false;
901 CmpConv.runOnMachineFunction(MF);
909 if (tryConvert(I->getBlock()))
Pass interface - Implemented by all 'passes'.
unsigned succ_size() const
void push_back(const T &Elt)
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
const std::vector< DomTreeNodeBase< NodeT > * > & getChildren() const
STATISTIC(NumFunctions,"Total number of functions")
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
aarch64 AArch64 CCMP false
static CondCode getInvertedCondCode(CondCode Code)
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares,"aarch64-ccmp","AArch64 CCMP Pass", false, false) INITIALIZE_PASS_END(AArch64ConditionalCompares
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
FunctionPass * createAArch64ConditionalCompares()
static unsigned InstrCount
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
static cl::opt< unsigned > BlockInstrLimit("aarch64-ccmp-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block."))
DomTreeNodeBase< NodeT > * getIDom() const
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
AnalysisUsage & addRequired()
static cl::opt< bool > Stress("aarch64-stress-ccmp", cl::Hidden, cl::desc("Turn all knobs to 11"))
#define INITIALIZE_PASS_DEPENDENCY(depName)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Reg
All possible values of the reg field in the ModR/M byte.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
static const char * getCondCodeName(CondCode Code)
PhysRegInfo analyzePhysReg(unsigned Reg, const TargetRegisterInfo *TRI)
analyzePhysReg - Analyze how the current instruction or bundle uses a physical register.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Select the trace through a block that has the fewest instructions.
bool livein_empty() const
Base class for the actual dominator tree node.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
size_t size() const
size - Get the array size.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
TargetInstrInfo - Interface to description of machine instruction set.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
unsigned const MachineRegisterInfo * MRI
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineOperand & getOperand(unsigned i) const
Represent the analysis usage information of a pass.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
FunctionPass class - This class is used to implement most global optimizations.
succ_iterator succ_begin()
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
static bool parseCond(ArrayRef< MachineOperand > Cond, AArch64CC::CondCode &CC)
MIOperands - Iterate over operands of a single instruction.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
void initializeAArch64ConditionalComparesPass(PassRegistry &)
iterator_range< df_iterator< T > > depth_first(const T &G)
unsigned getReg() const
getReg - Returns the register number.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
virtual const TargetInstrInfo * getInstrInfo() const
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
StringRef - Represent a constant reference to a string, i.e.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Machine model for scheduling, bundling, and heuristics.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
unsigned pred_size() const
size_t getNumChildren() const