41#define DEBUG_TYPE "aarch64-ccmp"
47 cl::desc(
"Maximum number of instructions per speculated block."));
53STATISTIC(NumConsidered,
"Number of ccmps considered");
54STATISTIC(NumPhiRejs,
"Number of ccmps rejected (PHI)");
55STATISTIC(NumPhysRejs,
"Number of ccmps rejected (Physregs)");
56STATISTIC(NumPhi2Rejs,
"Number of ccmps rejected (PHI2)");
57STATISTIC(NumHeadBranchRejs,
"Number of ccmps rejected (Head branch)");
58STATISTIC(NumCmpBranchRejs,
"Number of ccmps rejected (CmpBB branch)");
59STATISTIC(NumCmpTermRejs,
"Number of ccmps rejected (CmpBB is cbz...)");
60STATISTIC(NumImmRangeRejs,
"Number of ccmps rejected (Imm out of range)");
61STATISTIC(NumLiveDstRejs,
"Number of ccmps rejected (Cmp dest live)");
62STATISTIC(NumMultNZCVUses,
"Number of ccmps rejected (NZCV used)");
63STATISTIC(NumUnknNZCVDefs,
"Number of ccmps rejected (NZCV def unknown)");
65STATISTIC(NumSpeculateRejs,
"Number of ccmps rejected (Can't speculate)");
67STATISTIC(NumConverted,
"Number of ccmp instructions created");
68STATISTIC(NumCompBranches,
"Number of cbz/cbnz branches converted");
171 bool trivialTailPHIs();
174 void updateTailPHIs();
177 bool isDeadDef(
unsigned DstReg);
200 bool canConvert(MachineBasicBlock *
MBB);
204 void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks);
208 int expectedCodeSizeDelta()
const;
215 MI->getOpcode() == TargetOpcode::COPY) {
216 if (
MI->getOperand(1).getReg().isPhysical())
218 Reg =
MI->getOperand(1).getReg();
225bool SSACCmpConv::trivialTailPHIs() {
226 for (
auto &
I : *
Tail) {
229 unsigned HeadReg = 0, CmpBBReg = 0;
231 for (
unsigned oi = 1, oe =
I.getNumOperands(); oi != oe; oi += 2) {
232 MachineBasicBlock *
MBB =
I.getOperand(oi + 1).getMBB();
235 assert((!HeadReg || HeadReg ==
Reg) &&
"Inconsistent PHI operands");
239 assert((!CmpBBReg || CmpBBReg ==
Reg) &&
"Inconsistent PHI operands");
243 if (HeadReg != CmpBBReg)
251void SSACCmpConv::updateTailPHIs() {
252 for (
auto &
I : *
Tail) {
256 for (
unsigned oi =
I.getNumOperands(); oi > 2; oi -= 2) {
258 if (
I.getOperand(oi - 1).getMBB() == CmpBB) {
259 I.removeOperand(oi - 1);
260 I.removeOperand(oi - 2);
268bool SSACCmpConv::isDeadDef(
unsigned DstReg) {
270 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
272 if (!Register::isVirtualRegister(DstReg))
285 assert(
Cond.size() == 1 &&
"Unknown Cond array format");
297 assert(
Cond.size() == 3 &&
"Unknown Cond array format");
302 assert(
Cond.size() == 3 &&
"Unknown Cond array format");
308MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *
MBB) {
313 if (!
I->readsRegister(AArch64::NZCV,
nullptr)) {
314 switch (
I->getOpcode()) {
330 assert(!
I->isTerminator() &&
"Spurious terminator");
331 switch (
I->getOpcode()) {
333 case AArch64::SUBSWri:
334 case AArch64::SUBSXri:
336 case AArch64::ADDSWri:
337 case AArch64::ADDSXri:
340 if (
I->getOperand(3).getImm() || !
isUInt<5>(
I->getOperand(2).getImm())) {
346 case AArch64::SUBSWrr:
347 case AArch64::SUBSXrr:
348 case AArch64::ADDSWrr:
349 case AArch64::ADDSXrr:
350 if (isDeadDef(
I->getOperand(0).getReg()))
352 LLVM_DEBUG(
dbgs() <<
"Can't convert compare with live destination: "
356 case AArch64::FCMPSrr:
357 case AArch64::FCMPDrr:
358 case AArch64::FCMPESrr:
359 case AArch64::FCMPEDrr:
391bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *
MBB,
392 const MachineInstr *CmpMI) {
405 if (
I.isDebugInstr())
429 bool DontMoveAcrossStore =
true;
430 if (!
I.isSafeToMove(DontMoveAcrossStore)) {
436 if (&
I != CmpMI &&
I.modifiesRegister(AArch64::NZCV,
TRI)) {
447bool SSACCmpConv::canConvert(MachineBasicBlock *
MBB) {
449 Tail = CmpBB =
nullptr;
453 MachineBasicBlock *Succ0 = Head->
succ_begin()[0];
454 MachineBasicBlock *Succ1 = Head->
succ_begin()[1];
482 if (!trivialTailPHIs()) {
488 if (!
Tail->livein_empty()) {
503 LLVM_DEBUG(
dbgs() <<
"Can't handle live-in physregs in CmpBB.\n");
510 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
521 dbgs() <<
"analyzeBranch didn't find conditional branch in Head.\n");
548 dbgs() <<
"analyzeBranch didn't find conditional branch in CmpBB.\n");
553 if (!
parseCond(CmpBBCond, CmpBBTailCC)) {
564 <<
", CmpBB->Tail on "
567 CmpMI = findConvertibleCompare(CmpBB);
571 if (!canSpeculateInstrs(CmpBB, CmpMI)) {
578void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
607 Head2Tail + Head2CmpBB * CmpBB2Tail);
626 if (HeadCond[0].
getImm() == -1) {
629 switch (HeadCond[1].
getImm()) {
632 Opc = AArch64::SUBSWri;
636 Opc = AArch64::SUBSXri;
641 const MCInstrDesc &MCID =
TII->get(
Opc);
646 .
addReg(DestReg, RegState::Define | RegState::Dead)
659 unsigned FirstOp = 1;
660 bool isZBranch =
false;
664 case AArch64::SUBSWri:
Opc = AArch64::CCMPWi;
break;
665 case AArch64::SUBSWrr:
Opc = AArch64::CCMPWr;
break;
666 case AArch64::SUBSXri:
Opc = AArch64::CCMPXi;
break;
667 case AArch64::SUBSXrr:
Opc = AArch64::CCMPXr;
break;
668 case AArch64::ADDSWri:
Opc = AArch64::CCMNWi;
break;
669 case AArch64::ADDSWrr:
Opc = AArch64::CCMNWr;
break;
670 case AArch64::ADDSXri:
Opc = AArch64::CCMNXi;
break;
671 case AArch64::ADDSXrr:
Opc = AArch64::CCMNXr;
break;
672 case AArch64::FCMPSrr:
Opc = AArch64::FCCMPSrr; FirstOp = 0;
break;
673 case AArch64::FCMPDrr:
Opc = AArch64::FCCMPDrr; FirstOp = 0;
break;
674 case AArch64::FCMPESrr:
Opc = AArch64::FCCMPESrr; FirstOp = 0;
break;
675 case AArch64::FCMPEDrr:
Opc = AArch64::FCCMPEDrr; FirstOp = 0;
break;
678 Opc = AArch64::CCMPWi;
684 Opc = AArch64::CCMPXi;
696 const MCInstrDesc &MCID =
TII->get(
Opc);
698 TII->getRegClass(MCID, 0));
701 TII->getRegClass(MCID, 1));
713 bool isNZ = CmpMI->
getOpcode() == AArch64::CBNZW ||
727int SSACCmpConv::expectedCodeSizeDelta()
const {
732 if (HeadCond[0].
getImm() == -1) {
733 switch (HeadCond[1].
getImm()) {
767class AArch64ConditionalCompares :
public MachineFunctionPass {
768 const MachineBranchProbabilityInfo *MBPI;
769 const TargetInstrInfo *
TII;
770 const TargetRegisterInfo *
TRI;
771 MCSchedModel SchedModel;
774 MachineRegisterInfo *MRI;
775 MachineDominatorTree *DomTree;
776 MachineLoopInfo *
Loops;
777 MachineTraceMetrics *Traces;
783 AArch64ConditionalCompares() : MachineFunctionPass(
ID) {}
784 void getAnalysisUsage(AnalysisUsage &AU)
const override;
785 bool runOnMachineFunction(MachineFunction &MF)
override;
786 StringRef getPassName()
const override {
787 return "AArch64 Conditional Compares";
791 bool tryConvert(MachineBasicBlock *);
794 void invalidateTraces();
799char AArch64ConditionalCompares::ID = 0;
802 "AArch64 CCMP Pass",
false,
false)
810 return new AArch64ConditionalCompares();
813void AArch64ConditionalCompares::getAnalysisUsage(
AnalysisUsage &AU)
const {
825void AArch64ConditionalCompares::updateDomTree(
830 for (MachineBasicBlock *RemovedMBB : Removed) {
832 assert(Node != HeadNode &&
"Cannot erase the head node");
833 assert(
Node->getIDom() == HeadNode &&
"CmpBB should be dominated by Head");
834 while (!
Node->isLeaf())
845 for (MachineBasicBlock *RemovedMBB : Removed)
846 Loops->removeBlock(RemovedMBB);
850void AArch64ConditionalCompares::invalidateTraces() {
858bool AArch64ConditionalCompares::shouldConvert() {
863 MinInstr = Traces->
getEnsemble(MachineTraceStrategy::TS_MinInstrCount);
870 int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
871 LLVM_DEBUG(
dbgs() <<
"Code size delta: " << CodeSizeDelta <<
'\n');
874 if (CodeSizeDelta < 0)
876 if (CodeSizeDelta > 0) {
877 LLVM_DEBUG(
dbgs() <<
"Code size is increasing, give up on this one.\n");
894 unsigned CmpBBDepth =
897 <<
"\nCmpBB depth: " << CmpBBDepth <<
'\n');
898 if (CmpBBDepth > HeadDepth + DelayLimit) {
899 LLVM_DEBUG(
dbgs() <<
"Branch delay would be larger than " << DelayLimit
912 if (ResDepth > HeadDepth) {
919bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *
MBB) {
923 SmallVector<MachineBasicBlock *, 4> RemovedBlocks;
924 CmpConv.convert(RemovedBlocks);
926 updateDomTree(RemovedBlocks);
927 updateLoops(RemovedBlocks);
928 for (MachineBasicBlock *
MBB : RemovedBlocks)
934bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
935 LLVM_DEBUG(
dbgs() <<
"********** AArch64 Conditional Compares **********\n"
936 <<
"********** Function: " << MF.
getName() <<
'\n');
944 DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
945 Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
946 MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
947 Traces = &getAnalysis<MachineTraceMetricsWrapperPass>().getMTM();
952 CmpConv.runOnMachineFunction(MF, MBPI);
960 if (tryConvert(
I->getBlock()))
static Register lookThroughCopies(Register Reg, MachineRegisterInfo *MRI)
static cl::opt< bool > Stress("aarch64-stress-ccmp", cl::Hidden, cl::desc("Turn all knobs to 11"))
static cl::opt< unsigned > BlockInstrLimit("aarch64-ccmp-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block."))
static bool parseCond(ArrayRef< MachineOperand > Cond, AArch64CC::CondCode &CC)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static unsigned InstrCount
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
static cl::opt< bool > Stress("stress-early-ifcvt", cl::Hidden, cl::desc("Turn all knobs to 11"))
static cl::opt< unsigned > BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block."))
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
void eraseNode(NodeT *BB)
eraseNode - Removes a node from the dominator tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
FunctionPass class - This class is used to implement most global optimizations.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
unsigned pred_size() const
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
LLVM_ABI void setSuccProbability(succ_iterator I, BranchProbability Prob)
Set successor probability of a given iterator.
succ_iterator succ_begin()
bool livein_empty() const
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
LLVM_ABI void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
bool hasSuccessorProbabilities() const
Return true if any of the successors have probabilities attached to them.
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
LLVM_ABI bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
Analysis pass which computes a MachineDominatorTree.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Trace getTrace(const MachineBasicBlock *MBB)
Get the trace that passes through MBB.
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
Ensemble * getEnsemble(MachineTraceStrategy)
Get the trace ensemble representing the given trace selection strategy.
void invalidate(const MachineBasicBlock *MBB)
Invalidate cached information about MBB.
Wrapper class representing virtual and physical registers.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static const char * getCondCodeName(CondCode Code)
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
initializer< Ty > init(const Ty &Val)
NodeAddr< NodeBase * > Node
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI PhysRegInfo AnalyzePhysRegInBundle(const MachineInstr &MI, Register Reg, const TargetRegisterInfo *TRI)
AnalyzePhysRegInBundle - Analyze how the current instruction or bundle uses a physical register.
FunctionPass * createAArch64ConditionalCompares()
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
DomTreeNodeBase< MachineBasicBlock > MachineDomTreeNode
ArrayRef(const T &OneElt) -> ArrayRef< T >
iterator_range< df_iterator< T > > depth_first(const T &G)
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
unsigned MispredictPenalty
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
bool Read
Reg or one of its aliases is read.
bool Defined
Reg or one of its aliases is defined.
bool Clobbered
There is a regmask operand indicating Reg is clobbered.