36#define DEBUG_TYPE "arm-mve-vpt-opts"
40 cl::desc(
"Enable merging Loop End and Dec instructions."),
45 cl::desc(
"Enable setting lr as a predicate in tail predication regions."),
68 return "ARM MVE TailPred and VPT Optimisation Pass";
88char MVETPAndVPTOptimisations::ID = 0;
93 "ARM MVE TailPred and VPT Optimisations pass",
false,
102 while (
MI &&
MI->getOpcode() == TargetOpcode::COPY &&
103 MI->getOperand(1).getReg().isVirtual())
104 MI =
MRI->getVRegDef(
MI->getOperand(1).getReg());
116 if (!Header || !Latch) {
124 if (
T.getOpcode() == ARM::t2LoopEnd &&
T.getOperand(1).getMBB() == Header) {
128 if (
T.getOpcode() == ARM::t2LoopEndDec &&
129 T.getOperand(2).getMBB() == Header) {
149 if (LoopEnd->
getOpcode() == ARM::t2LoopEndDec)
154 if (!LoopDec || LoopDec->
getOpcode() != ARM::t2LoopDec) {
155 LLVM_DEBUG(
dbgs() <<
" didn't find LoopDec where we expected!\n");
163 if (!LoopPhi || LoopPhi->
getOpcode() != TargetOpcode::PHI ||
176 if (!LoopStart || (LoopStart->
getOpcode() != ARM::t2DoLoopStart &&
177 LoopStart->
getOpcode() != ARM::t2WhileLoopSetup &&
178 LoopStart->
getOpcode() != ARM::t2WhileLoopStartLR)) {
189 assert(
MI->getOpcode() == ARM::t2WhileLoopSetup &&
190 "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!");
195 MIB.
add(
MI->getOperand(0));
196 MIB.
add(
MI->getOperand(1));
199 MIB.
addReg(ARM::NoRegister);
204 if (
I.getOpcode() == ARM::t2WhileLoopStart) {
207 MIB.
add(
MI->getOperand(1));
215 MI->eraseFromParent();
228bool MVETPAndVPTOptimisations::LowerWhileLoopStart(
MachineLoop *
ML) {
230 <<
ML->getHeader()->getName() <<
"\n");
236 if (LoopStart->
getOpcode() != ARM::t2WhileLoopSetup)
240 auto WLSIt =
find_if(
MRI->use_nodbg_instructions(LR), [](
auto &
MI) {
241 return MI.getOpcode() == ARM::t2WhileLoopStart;
251 BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(),
252 TII->get(ARM::t2WhileLoopStartLR), LR)
254 .
add(WLSIt->getOperand(1));
258 WLSIt->eraseFromParent();
273MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors(
280 while (!Worklist.
empty()) {
320bool MVETPAndVPTOptimisations::MergeLoopEnd(
MachineLoop *
ML) {
334 auto *PreHeader =
ML->getLoopPreheader();
335 if (LoopStart->
getOpcode() == ARM::t2WhileLoopStartLR && PreHeader)
336 LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart);
342 if (LoopStart->
getOpcode() == ARM::t2DoLoopStart)
366 while (!Worklist.
empty()) {
371 if (
MI.getOpcode() != TargetOpcode::COPY ||
372 !
MI.getOperand(0).getReg().isVirtual()) {
382 if (!CheckUsers(PhiReg, {LoopDec},
MRI) ||
383 !CheckUsers(DecReg, {LoopPhi, LoopEnd},
MRI) ||
384 !CheckUsers(StartReg, {LoopPhi},
MRI)) {
386 if (LoopStart->
getOpcode() == ARM::t2WhileLoopStartLR) {
395 MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
396 MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
397 MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
421 TII->get(ARM::t2LoopEndDec), DecReg)
430 MI->eraseFromParent();
438bool MVETPAndVPTOptimisations::ConvertTailPredLoop(
MachineLoop *
ML,
441 <<
ML->getHeader()->getName() <<
"\n");
448 if (LoopDec != LoopEnd || (LoopStart->
getOpcode() != ARM::t2DoLoopStart &&
449 LoopStart->
getOpcode() != ARM::t2WhileLoopStartLR))
471 if (VCTP->getOpcode() != FirstVCTP->
getOpcode() ||
494 if (!Phi || Phi->
getOpcode() != TargetOpcode::PHI ||
518 unsigned NewOpc = LoopStart->
getOpcode() == ARM::t2DoLoopStart
519 ? ARM::t2DoLoopStartTP
520 : ARM::t2WhileLoopStartTP;
526 if (NewOpc == ARM::t2WhileLoopStartTP)
530 MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
539 MI->getOperand(
Idx + 2).setReg(LR);
556 case ARM::MVE_VCMPf32:
557 case ARM::MVE_VCMPf16:
558 case ARM::MVE_VCMPf32r:
559 case ARM::MVE_VCMPf16r:
560 case ARM::MVE_VCMPi8r:
561 case ARM::MVE_VCMPi16r:
562 case ARM::MVE_VCMPi32r:
563 case ARM::MVE_VCMPu8r:
564 case ARM::MVE_VCMPu16r:
565 case ARM::MVE_VCMPu32r:
566 case ARM::MVE_VCMPs8r:
567 case ARM::MVE_VCMPs16r:
568 case ARM::MVE_VCMPs32r:
596 if (CondOP1.
isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
603 CondOP2.isIdenticalTo(PrevOP1);
618 return RegClass && (RegClass->
getID() == ARM::VCCRRegClassID);
629MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT(
641 User.setReg(NewResult);
642 User.setIsKill(
false);
656 assert(Iter->getOpcode() == ARM::MVE_VPNOT &&
"Not a VPNOT!");
658 "The VPNOT cannot be predicated");
666 bool MustMove =
false, HasUser =
false;
668 for (; Iter !=
MBB.
end(); ++Iter) {
670 Iter->findRegisterUseOperand(VPNOTOperand,
true)) {
672 VPNOTOperandKiller = MO;
675 if (Iter->findRegisterUseOperandIdx(Reg) != -1) {
680 if (Iter->findRegisterUseOperandIdx(VPNOTResult) == -1)
693 if (VPNOTOperandKiller)
719 while (Iter !=
End) {
720 Register VCCRValue, OppositeVCCRValue;
724 for (; Iter !=
End; ++Iter) {
729 Register Dst = Iter->getOperand(0).getReg();
733 if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT &&
734 Iter->findRegisterUseOperandIdx(VCCRValue) != -1) {
740 OppositeVCCRValue = Dst;
753 assert(VCCRValue && OppositeVCCRValue &&
754 "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop "
755 "stopped before the end of the block!");
756 assert(VCCRValue != OppositeVCCRValue &&
757 "VCCRValue should not be equal to OppositeVCCRValue!");
760 Register LastVPNOTResult = OppositeVCCRValue;
763 for (; Iter !=
End; ++Iter) {
764 bool IsInteresting =
false;
766 if (
MachineOperand *MO = Iter->findRegisterUseOperand(VCCRValue)) {
767 IsInteresting =
true;
772 if (Iter->getOpcode() == ARM::MVE_VPNOT) {
775 MRI->replaceRegWith(Result, LastVPNOTResult);
780 <<
"Replacing all uses of '" <<
printReg(Result)
781 <<
"' with '" <<
printReg(LastVPNOTResult) <<
"'\n");
784 ReplaceRegisterUseWithVPNOT(
MBB, *Iter, *MO, LastVPNOTResult);
791 <<
"' with '" <<
printReg(LastVPNOTResult)
792 <<
"' in instr: " << *Iter);
798 Iter->findRegisterUseOperand(OppositeVCCRValue)) {
799 IsInteresting =
true;
802 if (LastVPNOTResult != OppositeVCCRValue) {
804 <<
printReg(OppositeVCCRValue) <<
"' with '"
805 <<
printReg(LastVPNOTResult) <<
" for instr: ";
807 MO->setReg(LastVPNOTResult);
811 MO->setIsKill(
false);
816 if (Iter->getOpcode() == ARM::MVE_VPNOT &&
818 Register VPNOTOperand = Iter->getOperand(1).getReg();
819 if (VPNOTOperand == LastVPNOTResult ||
820 VPNOTOperand == OppositeVCCRValue) {
821 IsInteresting =
true;
824 LastVPNOTResult = Iter->getOperand(0).getReg();
836 DeadInstruction->eraseFromParent();
862 PrevVCMPResultKiller = MO;
891 .
addReg(PrevVCMPResultReg);
899 if (PrevVCMPResultKiller)
906 PrevVCMPResultKiller =
nullptr;
910 DeadInstruction->eraseFromParent();
912 return !DeadInstructions.empty();
922 unsigned LastVPTImm = 0;
937 if (!Copy ||
Copy->getOpcode() != TargetOpcode::COPY ||
938 !
Copy->getOperand(1).getReg().isVirtual() ||
939 MRI->getRegClass(
Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
946 auto getImm = [&](
Register GPR) ->
unsigned {
948 if (Def && (
Def->getOpcode() == ARM::t2MOVi ||
949 Def->getOpcode() == ARM::t2MOVi16))
950 return Def->getOperand(1).getImm();
953 unsigned Imm = getImm(GPR);
959 unsigned NotImm = ~Imm & 0xffff;
960 if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
962 if (
MRI->use_empty(VPR)) {
963 DeadInstructions.
insert(Copy);
964 if (
MRI->hasOneUse(GPR))
965 DeadInstructions.
insert(
MRI->getVRegDef(GPR));
968 }
else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
971 Register NewVPR =
MRI->createVirtualRegister(&ARM::VCCRRegClass);
973 TII->get(ARM::MVE_VPNOT), NewVPR)
979 if (
MRI->use_empty(VPR)) {
980 DeadInstructions.
insert(Copy);
981 if (
MRI->hasOneUse(GPR))
982 DeadInstructions.
insert(
MRI->getVRegDef(GPR));
984 LLVM_DEBUG(
dbgs() <<
"Adding VPNot: " << *VPNot <<
" to replace use at "
994 DI->eraseFromParent();
996 return !DeadInstructions.empty();
1005 bool HasVCTP =
false;
1014 if (!HasVCTP ||
MI.getOpcode() != ARM::MVE_VPSEL)
1019 .
add(
MI.getOperand(0))
1020 .
add(
MI.getOperand(1))
1021 .
add(
MI.getOperand(1))
1023 .
add(
MI.getOperand(4))
1024 .
add(
MI.getOperand(5))
1025 .
add(
MI.getOperand(2));
1034 DeadInstruction->eraseFromParent();
1036 return !DeadInstructions.empty();
1042 bool Changed =
false;
1044 if (
MI.getOpcode() != ARM::t2DoLoopStart)
1054bool MVETPAndVPTOptimisations::runOnMachineFunction(
MachineFunction &Fn) {
1057 if (!STI.
isThumb2() || !STI.hasLOB())
1065 LLVM_DEBUG(
dbgs() <<
"********** ARM MVE VPT Optimisations **********\n"
1066 <<
"********** Function: " << Fn.
getName() <<
'\n');
1083 LLVM_DEBUG(
dbgs() <<
"**************************************\n");
1089 return new MVETPAndVPTOptimisations();
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator MBBI
SmallVector< MachineOperand, 4 > Cond
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const HexagonInstrInfo * TII
ARM MVE TailPred and VPT Optimisations static false MachineInstr * LookThroughCOPY(MachineInstr *MI, MachineRegisterInfo *MRI)
static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII)
static cl::opt< bool > SetLRPredicate("arm-set-lr-predicate", cl::Hidden, cl::desc("Enable setting lr as a predicate in tail predication regions."), cl::init(true))
static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI, MachineInstr *&LoopStart, MachineInstr *&LoopPhi, MachineInstr *&LoopDec, MachineInstr *&LoopEnd)
static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, Register Reg)
static cl::opt< bool > MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden, cl::desc("Enable merging Loop End and Dec instructions."), cl::init(true))
static ARMCC::CondCodes GetCondCode(MachineInstr &Instr)
static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev)
static bool IsInvalidTPInstruction(MachineInstr &MI)
static bool IsVCMP(unsigned Opcode)
ARM MVE TailPred and VPT Optimisations pass
static bool IsWritingToVCCR(MachineInstr &Instr)
static bool CanHaveSwappedOperands(unsigned Opcode)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
const ARMBaseInstrInfo * getInstrInfo() const override
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
FunctionPass class - This class is used to implement most global optimizations.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator_range< iterator > terminators()
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand * findRegisterUseOperand(Register Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
LoopInfoBase< MachineBasicBlock, MachineLoop > & getBase()
MachineOperand class - Representation of each machine instruction operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
unsigned getID() const
Return the register class ID number.
Target - Wrapper for Target specific information.
A Use represents the edge between a Value definition and its users.
self_iterator getIterator()
static CondCodes getOppositeCondition(CondCodes CC)
static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC)
getSwappedCondition - assume the flags are set by MI(a,b), return the condition code if we modify the...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Define
Register definition.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
int findFirstVPTPredOperandIdx(const MachineInstr &MI)
FunctionPass * createMVETPAndVPTOptimisationsPass()
createMVETPAndVPTOptimisationsPass
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isVCTP(const MachineInstr *MI)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static bool isLoopStart(const MachineInstr &MI)
void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool UseCmp=false)
void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool SkipCmp=false)
void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, bool SetFlags=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static unsigned VCMPOpcodeToVPT(unsigned Opcode)
void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII)
void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.