42#define DEBUG_TYPE "aarch64-ccmp"
48 cl::desc(
"Maximum number of instructions per speculated block."));
54STATISTIC(NumConsidered,
"Number of ccmps considered");
55STATISTIC(NumPhiRejs,
"Number of ccmps rejected (PHI)");
56STATISTIC(NumPhysRejs,
"Number of ccmps rejected (Physregs)");
57STATISTIC(NumPhi2Rejs,
"Number of ccmps rejected (PHI2)");
58STATISTIC(NumHeadBranchRejs,
"Number of ccmps rejected (Head branch)");
59STATISTIC(NumCmpBranchRejs,
"Number of ccmps rejected (CmpBB branch)");
60STATISTIC(NumCmpTermRejs,
"Number of ccmps rejected (CmpBB is cbz...)");
61STATISTIC(NumImmRangeRejs,
"Number of ccmps rejected (Imm out of range)");
62STATISTIC(NumLiveDstRejs,
"Number of ccmps rejected (Cmp dest live)");
63STATISTIC(NumMultNZCVUses,
"Number of ccmps rejected (NZCV used)");
64STATISTIC(NumUnknNZCVDefs,
"Number of ccmps rejected (NZCV def unknown)");
66STATISTIC(NumSpeculateRejs,
"Number of ccmps rejected (Can't speculate)");
68STATISTIC(NumConverted,
"Number of ccmp instructions created");
69STATISTIC(NumCompBranches,
"Number of cbz/cbnz branches converted");
172 bool trivialTailPHIs();
175 void updateTailPHIs();
178 bool isDeadDef(
unsigned DstReg);
209 int expectedCodeSizeDelta()
const;
215bool SSACCmpConv::trivialTailPHIs() {
216 for (
auto &
I : *
Tail) {
219 unsigned HeadReg = 0, CmpBBReg = 0;
221 for (
unsigned oi = 1, oe =
I.getNumOperands(); oi != oe; oi += 2) {
225 assert((!HeadReg || HeadReg == Reg) &&
"Inconsistent PHI operands");
229 assert((!CmpBBReg || CmpBBReg == Reg) &&
"Inconsistent PHI operands");
233 if (HeadReg != CmpBBReg)
241void SSACCmpConv::updateTailPHIs() {
242 for (
auto &
I : *
Tail) {
246 for (
unsigned oi =
I.getNumOperands(); oi > 2; oi -= 2) {
248 if (
I.getOperand(oi - 1).getMBB() == CmpBB) {
249 I.removeOperand(oi - 1);
250 I.removeOperand(oi - 2);
258bool SSACCmpConv::isDeadDef(
unsigned DstReg) {
260 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
266 return MRI->use_nodbg_empty(DstReg);
274 if (
Cond[0].getImm() != -1) {
275 assert(
Cond.size() == 1 &&
"Unknown Cond array format");
280 switch (
Cond[1].getImm()) {
287 assert(
Cond.size() == 3 &&
"Unknown Cond array format");
292 assert(
Cond.size() == 3 &&
"Unknown Cond array format");
303 if (!
I->readsRegister(AArch64::NZCV)) {
304 switch (
I->getOpcode()) {
320 assert(!
I->isTerminator() &&
"Spurious terminator");
321 switch (
I->getOpcode()) {
323 case AArch64::SUBSWri:
324 case AArch64::SUBSXri:
326 case AArch64::ADDSWri:
327 case AArch64::ADDSXri:
330 if (
I->getOperand(3).getImm() || !isUInt<5>(
I->getOperand(2).getImm())) {
336 case AArch64::SUBSWrr:
337 case AArch64::SUBSXrr:
338 case AArch64::ADDSWrr:
339 case AArch64::ADDSXrr:
340 if (isDeadDef(
I->getOperand(0).getReg()))
342 LLVM_DEBUG(
dbgs() <<
"Can't convert compare with live destination: "
346 case AArch64::FCMPSrr:
347 case AArch64::FCMPDrr:
348 case AArch64::FCMPESrr:
349 case AArch64::FCMPEDrr:
395 if (
I.isDebugInstr())
419 bool DontMoveAcrossStore =
true;
420 if (!
I.isSafeToMove(
nullptr, DontMoveAcrossStore)) {
426 if (&
I != CmpMI &&
I.modifiesRegister(AArch64::NZCV,
TRI)) {
439 Tail = CmpBB =
nullptr;
472 if (!trivialTailPHIs()) {
478 if (!
Tail->livein_empty()) {
493 LLVM_DEBUG(
dbgs() <<
"Can't handle live-in physregs in CmpBB.\n");
511 dbgs() <<
"analyzeBranch didn't find conditional branch in Head.\n");
538 dbgs() <<
"analyzeBranch didn't find conditional branch in CmpBB.\n");
543 if (!
parseCond(CmpBBCond, CmpBBTailCC)) {
554 <<
", CmpBB->Tail on "
557 CmpMI = findConvertibleCompare(CmpBB);
561 if (!canSpeculateInstrs(CmpBB, CmpMI)) {
597 Head2Tail + Head2CmpBB * CmpBB2Tail);
616 if (HeadCond[0].getImm() == -1) {
619 switch (HeadCond[1].getImm()) {
622 Opc = AArch64::SUBSWri;
626 Opc = AArch64::SUBSXri;
634 MRI->createVirtualRegister(
TII->getRegClass(MCID, 0,
TRI, *MF));
642 MRI->constrainRegClass(HeadCond[2].
getReg(),
643 TII->getRegClass(MCID, 1,
TRI, *MF));
651 unsigned FirstOp = 1;
652 bool isZBranch =
false;
656 case AArch64::SUBSWri: Opc = AArch64::CCMPWi;
break;
657 case AArch64::SUBSWrr: Opc = AArch64::CCMPWr;
break;
658 case AArch64::SUBSXri: Opc = AArch64::CCMPXi;
break;
659 case AArch64::SUBSXrr: Opc = AArch64::CCMPXr;
break;
660 case AArch64::ADDSWri: Opc = AArch64::CCMNWi;
break;
661 case AArch64::ADDSWrr: Opc = AArch64::CCMNWr;
break;
662 case AArch64::ADDSXri: Opc = AArch64::CCMNXi;
break;
663 case AArch64::ADDSXrr: Opc = AArch64::CCMNXr;
break;
664 case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0;
break;
665 case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0;
break;
666 case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0;
break;
667 case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0;
break;
670 Opc = AArch64::CCMPWi;
676 Opc = AArch64::CCMPXi;
690 TII->getRegClass(MCID, 0,
TRI, *MF));
693 TII->getRegClass(MCID, 1,
TRI, *MF));
705 bool isNZ = CmpMI->
getOpcode() == AArch64::CBNZW ||
720int SSACCmpConv::expectedCodeSizeDelta()
const {
725 if (HeadCond[0].getImm() == -1) {
726 switch (HeadCond[1].getImm()) {
782 return "AArch64 Conditional Compares";
789 void invalidateTraces();
794char AArch64ConditionalCompares::ID = 0;
797 "AArch64 CCMP Pass",
false,
false)
805 return new AArch64ConditionalCompares();
808void AArch64ConditionalCompares::getAnalysisUsage(
AnalysisUsage &AU)
const {
820void AArch64ConditionalCompares::updateDomTree(
827 assert(
Node != HeadNode &&
"Cannot erase the head node");
828 assert(
Node->getIDom() == HeadNode &&
"CmpBB should be dominated by Head");
829 while (
Node->getNumChildren())
830 DomTree->changeImmediateDominator(
Node->back(), HeadNode);
831 DomTree->eraseNode(RemovedMBB);
841 Loops->removeBlock(RemovedMBB);
845void AArch64ConditionalCompares::invalidateTraces() {
846 Traces->invalidate(CmpConv.Head);
847 Traces->invalidate(CmpConv.CmpBB);
853bool AArch64ConditionalCompares::shouldConvert() {
858 MinInstr = Traces->getEnsemble(MachineTraceStrategy::TS_MinInstrCount);
865 int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
866 LLVM_DEBUG(
dbgs() <<
"Code size delta: " << CodeSizeDelta <<
'\n');
869 if (CodeSizeDelta < 0)
871 if (CodeSizeDelta > 0) {
872 LLVM_DEBUG(
dbgs() <<
"Code size is increasing, give up on this one.\n");
884 unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
888 Trace.getInstrCycles(*CmpConv.Head->getFirstTerminator()).Depth;
889 unsigned CmpBBDepth =
890 Trace.getInstrCycles(*CmpConv.CmpBB->getFirstTerminator()).Depth;
892 <<
"\nCmpBB depth: " << CmpBBDepth <<
'\n');
893 if (CmpBBDepth > HeadDepth + DelayLimit) {
894 LLVM_DEBUG(
dbgs() <<
"Branch delay would be larger than " << DelayLimit
901 unsigned ResDepth =
Trace.getResourceDepth(
true);
907 if (ResDepth > HeadDepth) {
915 bool Changed =
false;
919 CmpConv.convert(RemovedBlocks);
921 updateDomTree(RemovedBlocks);
922 updateLoops(RemovedBlocks);
927bool AArch64ConditionalCompares::runOnMachineFunction(
MachineFunction &MF) {
928 LLVM_DEBUG(
dbgs() <<
"********** AArch64 Conditional Compares **********\n"
929 <<
"********** Function: " << MF.
getName() <<
'\n');
937 DomTree = &getAnalysis<MachineDominatorTree>();
938 Loops = getAnalysisIfAvailable<MachineLoopInfo>();
939 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
940 Traces = &getAnalysis<MachineTraceMetrics>();
944 bool Changed =
false;
945 CmpConv.runOnMachineFunction(MF, MBPI);
953 if (tryConvert(
I->getBlock()))
unsigned const MachineRegisterInfo * MRI
static cl::opt< bool > Stress("aarch64-stress-ccmp", cl::Hidden, cl::desc("Turn all knobs to 11"))
static cl::opt< unsigned > BlockInstrLimit("aarch64-ccmp-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block."))
static bool parseCond(ArrayRef< MachineOperand > Cond, AArch64CC::CondCode &CC)
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static unsigned InstrCount
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Base class for the actual dominator tree node.
FunctionPass class - This class is used to implement most global optimizations.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
Describe properties that are true of each instruction in the target description file.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
void setSuccProbability(succ_iterator I, BranchProbability Prob)
Set successor probability of a given iterator.
succ_iterator succ_begin()
bool livein_empty() const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
bool hasSuccessorProbabilities() const
Return true if any of the successors have probabilities attached to them.
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static const char * getCondCodeName(CondCode Code)
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ Define
Register definition.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
PhysRegInfo AnalyzePhysRegInBundle(const MachineInstr &MI, Register Reg, const TargetRegisterInfo *TRI)
AnalyzePhysRegInBundle - Analyze how the current instruction or bundle uses a physical register.
FunctionPass * createAArch64ConditionalCompares()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
iterator_range< df_iterator< T > > depth_first(const T &G)
void initializeAArch64ConditionalComparesPass(PassRegistry &)
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Machine model for scheduling, bundling, and heuristics.
Information about how a physical register Reg is used by a set of operands.
bool Read
Reg or one of its aliases is read.
bool Defined
Reg or one of its aliases is defined.
bool Clobbered
There is a regmask operand indicating Reg is clobbered.