45 #define DEBUG_TYPE "aarch64-ccmp"
51 cl::desc(
"Maximum number of instructions per speculated block."));
57 STATISTIC(NumConsidered,
"Number of ccmps considered");
58 STATISTIC(NumPhiRejs,
"Number of ccmps rejected (PHI)");
59 STATISTIC(NumPhysRejs,
"Number of ccmps rejected (Physregs)");
60 STATISTIC(NumPhi2Rejs,
"Number of ccmps rejected (PHI2)");
61 STATISTIC(NumHeadBranchRejs,
"Number of ccmps rejected (Head branch)");
62 STATISTIC(NumCmpBranchRejs,
"Number of ccmps rejected (CmpBB branch)");
63 STATISTIC(NumCmpTermRejs,
"Number of ccmps rejected (CmpBB is cbz...)");
64 STATISTIC(NumImmRangeRejs,
"Number of ccmps rejected (Imm out of range)");
65 STATISTIC(NumLiveDstRejs,
"Number of ccmps rejected (Cmp dest live)");
66 STATISTIC(NumMultNZCVUses,
"Number of ccmps rejected (NZCV used)");
67 STATISTIC(NumUnknNZCVDefs,
"Number of ccmps rejected (NZCV def unknown)");
69 STATISTIC(NumSpeculateRejs,
"Number of ccmps rejected (Can't speculate)");
71 STATISTIC(NumConverted,
"Number of ccmp instructions created");
72 STATISTIC(NumCompBranches,
"Number of cbz/cbnz branches converted");
174 bool trivialTailPHIs();
177 void updateTailPHIs();
180 bool isDeadDef(
unsigned DstReg);
209 int expectedCodeSizeDelta()
const;
215 bool SSACCmpConv::trivialTailPHIs() {
216 for (
auto &
I : *Tail) {
219 unsigned HeadReg = 0, CmpBBReg = 0;
221 for (
unsigned oi = 1, oe =
I.getNumOperands(); oi != oe; oi += 2) {
223 unsigned Reg =
I.getOperand(oi).getReg();
225 assert((!HeadReg || HeadReg == Reg) &&
"Inconsistent PHI operands");
229 assert((!CmpBBReg || CmpBBReg == Reg) &&
"Inconsistent PHI operands");
233 if (HeadReg != CmpBBReg)
241 void SSACCmpConv::updateTailPHIs() {
242 for (
auto &
I : *Tail) {
246 for (
unsigned oi =
I.getNumOperands(); oi > 2; oi -= 2) {
248 if (
I.getOperand(oi - 1).getMBB() == CmpBB) {
249 I.RemoveOperand(oi - 1);
250 I.RemoveOperand(oi - 2);
258 bool SSACCmpConv::isDeadDef(
unsigned DstReg) {
260 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
266 return MRI->use_nodbg_empty(DstReg);
274 if (Cond[0].getImm() != -1) {
275 assert(Cond.
size() == 1 &&
"Unknown Cond array format");
280 switch (Cond[1].getImm()) {
287 assert(Cond.
size() == 3 &&
"Unknown Cond array format");
292 assert(Cond.
size() == 3 &&
"Unknown Cond array format");
304 switch (I->getOpcode()) {
313 DEBUG(
dbgs() <<
"Flags not used by terminator: " << *I);
320 assert(!I->isTerminator() &&
"Spurious terminator");
321 switch (I->getOpcode()) {
323 case AArch64::SUBSWri:
324 case AArch64::SUBSXri:
326 case AArch64::ADDSWri:
327 case AArch64::ADDSXri:
330 if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
331 DEBUG(
dbgs() <<
"Immediate out of range for ccmp: " << *I);
336 case AArch64::SUBSWrr:
337 case AArch64::SUBSXrr:
338 case AArch64::ADDSWrr:
339 case AArch64::ADDSXrr:
340 if (isDeadDef(I->getOperand(0).getReg()))
342 DEBUG(
dbgs() <<
"Can't convert compare with live destination: " << *I);
345 case AArch64::FCMPSrr:
346 case AArch64::FCMPDrr:
347 case AArch64::FCMPESrr:
348 case AArch64::FCMPEDrr:
353 MIOperands::PhysRegInfo PRI =
360 DEBUG(
dbgs() <<
"Can't create ccmp with multiple uses: " << *I);
366 DEBUG(
dbgs() <<
"Not convertible compare: " << *I);
389 unsigned InstrCount = 0;
394 if (I.isDebugValue())
413 DEBUG(
dbgs() <<
"Won't speculate load: " << I);
418 bool DontMoveAcrossStore =
true;
419 if (!I.isSafeToMove(
nullptr, DontMoveAcrossStore)) {
420 DEBUG(
dbgs() <<
"Can't speculate: " << I);
426 DEBUG(
dbgs() <<
"Clobbers flags: " << I);
438 Tail = CmpBB =
nullptr;
440 if (Head->succ_size() != 2)
456 if (!CmpBB->isSuccessor(Tail))
460 DEBUG(
dbgs() <<
"\nTriangle: BB#" << Head->getNumber() <<
" -> BB#"
461 << CmpBB->getNumber() <<
" -> BB#" << Tail->getNumber() <<
'\n');
470 if (!trivialTailPHIs()) {
471 DEBUG(
dbgs() <<
"Can't handle phis in Tail.\n");
476 if (!Tail->livein_empty()) {
477 DEBUG(
dbgs() <<
"Can't handle live-in physregs in Tail.\n");
484 if (!CmpBB->empty() && CmpBB->front().isPHI()) {
485 DEBUG(
dbgs() <<
"Can't handle phis in CmpBB.\n");
490 if (!CmpBB->livein_empty()) {
491 DEBUG(
dbgs() <<
"Can't handle live-in physregs in CmpBB.\n");
500 DEBUG(
dbgs() <<
"Head branch not analyzable.\n");
507 if (!TBB || HeadCond.
empty()) {
508 DEBUG(
dbgs() <<
"AnalyzeBranch didn't find conditional branch in Head.\n");
514 DEBUG(
dbgs() <<
"Unsupported branch type on Head\n");
521 assert(TBB == Tail &&
"Unexpected TBB");
528 DEBUG(
dbgs() <<
"CmpBB branch not analyzable.\n");
533 if (!TBB || CmpBBCond.
empty()) {
534 DEBUG(
dbgs() <<
"AnalyzeBranch didn't find conditional branch in CmpBB.\n");
539 if (!
parseCond(CmpBBCond, CmpBBTailCC)) {
540 DEBUG(
dbgs() <<
"Unsupported branch type on CmpBB\n");
552 CmpMI = findConvertibleCompare(CmpBB);
556 if (!canSpeculateInstrs(CmpBB, CmpMI)) {
564 DEBUG(
dbgs() <<
"Merging BB#" << CmpBB->getNumber() <<
" into BB#"
565 << Head->getNumber() <<
":\n" << *CmpBB);
570 Head->removeSuccessor(CmpBB);
571 CmpBB->removeSuccessor(Tail);
572 Head->transferSuccessorsAndUpdatePHIs(CmpBB);
573 DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
578 if (HeadCond[0].getImm() == -1) {
581 switch (HeadCond[1].getImm()) {
584 Opc = AArch64::SUBSWri;
588 Opc = AArch64::SUBSXri;
596 MRI->createVirtualRegister(
TII->getRegClass(MCID, 0, TRI, *MF));
598 BuildMI(*Head, Head->end(), TermDL, MCID)
604 MRI->constrainRegClass(HeadCond[2].
getReg(),
605 TII->getRegClass(MCID, 1, TRI, *MF));
608 Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end());
613 unsigned FirstOp = 1;
614 bool isZBranch =
false;
618 case AArch64::SUBSWri: Opc = AArch64::CCMPWi;
break;
619 case AArch64::SUBSWrr: Opc = AArch64::CCMPWr;
break;
620 case AArch64::SUBSXri: Opc = AArch64::CCMPXi;
break;
621 case AArch64::SUBSXrr: Opc = AArch64::CCMPXr;
break;
622 case AArch64::ADDSWri: Opc = AArch64::CCMNWi;
break;
623 case AArch64::ADDSWrr: Opc = AArch64::CCMNWr;
break;
624 case AArch64::ADDSXri: Opc = AArch64::CCMNXi;
break;
625 case AArch64::ADDSXrr: Opc = AArch64::CCMNXr;
break;
626 case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0;
break;
627 case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0;
break;
628 case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0;
break;
629 case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0;
break;
632 Opc = AArch64::CCMPWi;
638 Opc = AArch64::CCMPXi;
652 TII->getRegClass(MCID, 0, TRI, *MF));
655 TII->getRegClass(MCID, 1, TRI, *MF));
668 bool isNZ = CmpMI->
getOpcode() == AArch64::CBNZW ||
675 Head->updateTerminator();
678 CmpBB->eraseFromParent();
683 int SSACCmpConv::expectedCodeSizeDelta()
const {
688 if (HeadCond[0].getImm() == -1) {
689 switch (HeadCond[1].getImm()) {
741 const char *getPassName()
const override {
742 return "AArch64 Conditional Compares";
749 void invalidateTraces();
761 "AArch64 CCMP Pass",
false,
false)
769 return new AArch64ConditionalCompares();
772 void AArch64ConditionalCompares::getAnalysisUsage(
AnalysisUsage &AU)
const {
784 void AArch64ConditionalCompares::updateDomTree(
789 for (
unsigned i = 0, e = Removed.
size(); i != e; ++i) {
791 assert(Node != HeadNode &&
"Cannot erase the head node");
792 assert(Node->
getIDom() == HeadNode &&
"CmpBB should be dominated by Head");
794 DomTree->changeImmediateDominator(Node->
getChildren().back(), HeadNode);
795 DomTree->eraseNode(Removed[i]);
804 for (
unsigned i = 0, e = Removed.
size(); i != e; ++i)
805 Loops->removeBlock(Removed[i]);
809 void AArch64ConditionalCompares::invalidateTraces() {
810 Traces->invalidate(CmpConv.Head);
811 Traces->invalidate(CmpConv.CmpBB);
829 int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
830 DEBUG(
dbgs() <<
"Code size delta: " << CodeSizeDelta <<
'\n');
833 if (CodeSizeDelta < 0)
835 if (CodeSizeDelta > 0) {
836 DEBUG(
dbgs() <<
"Code size is increasing, give up on this one.\n");
848 unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
853 unsigned CmpBBDepth =
855 DEBUG(
dbgs() <<
"Head depth: " << HeadDepth
856 <<
"\nCmpBB depth: " << CmpBBDepth <<
'\n');
857 if (CmpBBDepth > HeadDepth + DelayLimit) {
858 DEBUG(
dbgs() <<
"Branch delay would be larger than " << DelayLimit
866 DEBUG(
dbgs() <<
"Resources: " << ResDepth <<
'\n');
871 if (ResDepth > HeadDepth) {
872 DEBUG(
dbgs() <<
"Too many instructions to speculate.\n");
879 bool Changed =
false;
883 CmpConv.convert(RemovedBlocks);
885 updateDomTree(RemovedBlocks);
886 updateLoops(RemovedBlocks);
891 bool AArch64ConditionalCompares::runOnMachineFunction(
MachineFunction &MF) {
892 DEBUG(
dbgs() <<
"********** AArch64 Conditional Compares **********\n"
893 <<
"********** Function: " << MF.
getName() <<
'\n');
898 DomTree = &getAnalysis<MachineDominatorTree>();
899 Loops = getAnalysisIfAvailable<MachineLoopInfo>();
900 Traces = &getAnalysis<MachineTraceMetrics>();
904 bool Changed =
false;
905 CmpConv.runOnMachineFunction(MF);
913 if (tryConvert(I->getBlock()))
Pass interface - Implemented by all 'passes'.
unsigned succ_size() const
void push_back(const T &Elt)
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
STATISTIC(NumFunctions,"Total number of functions")
int getNumber() const
getNumber - MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a M...
aarch64 AArch64 CCMP false
static CondCode getInvertedCondCode(CondCode Code)
iterator getFirstTerminator()
getFirstTerminator - returns an iterator to the first terminator instruction of this basic block...
Describe properties that are true of each instruction in the target description file.
static bool isVirtualRegister(unsigned Reg)
isVirtualRegister - Return true if the specified register number is in the virtual register namespace...
INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares,"aarch64-ccmp","AArch64 CCMP Pass", false, false) INITIALIZE_PASS_END(AArch64ConditionalCompares
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
FunctionPass * createAArch64ConditionalCompares()
static cl::opt< unsigned > BlockInstrLimit("aarch64-ccmp-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block."))
AnalysisUsage & addRequired()
static cl::opt< bool > Stress("aarch64-stress-ccmp", cl::Hidden, cl::desc("Turn all knobs to 11"))
#define INITIALIZE_PASS_DEPENDENCY(depName)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Reg
All possible values of the reg field in the ModR/M byte.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
static const char * getCondCodeName(CondCode Code)
PhysRegInfo analyzePhysReg(unsigned Reg, const TargetRegisterInfo *TRI)
analyzePhysReg - Analyze how the current instruction or bundle uses a physical register.
const MachineInstrBuilder & addImm(int64_t Val) const
addImm - Add a new immediate operand.
Select the trace through a block that has the fewest instructions.
bool livein_empty() const
Base class for the actual dominator tree node.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
size_t size() const
size - Get the array size.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
TargetInstrInfo - Interface to description of machine instruction set.
bundle_iterator< MachineInstr, instr_iterator > iterator
initializer< Ty > init(const Ty &Val)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned RemoveBranch(MachineBasicBlock &MBB) const override
const MachineOperand & getOperand(unsigned i) const
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
const std::vector< DomTreeNodeBase< NodeT > * > & getChildren() const
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
succ_iterator succ_begin()
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
DomTreeNodeBase< NodeT > * getIDom() const
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
static bool parseCond(ArrayRef< MachineOperand > Cond, AArch64CC::CondCode &CC)
MIOperands - Iterate over operands of a single instruction.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
size_t getNumChildren() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
InstrCycles getInstrCycles(const MachineInstr *MI) const
Return the depth and height of MI.
Representation of each machine instruction.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
void initializeAArch64ConditionalComparesPass(PassRegistry &)
iterator_range< df_iterator< T > > depth_first(const T &G)
unsigned getReg() const
getReg - Returns the register number.
virtual const TargetInstrInfo * getInstrInfo() const
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Machine model for scheduling, bundling, and heuristics.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
unsigned pred_size() const
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
Function must be optimized for size first.