35#define DEBUG_TYPE "arm-mve-vpt-opts"
39 cl::desc(
"Enable merging Loop End and Dec instructions."),
44 cl::desc(
"Enable setting lr as a predicate in tail predication regions."),
67 return "ARM MVE TailPred and VPT Optimisation Pass";
87char MVETPAndVPTOptimisations::ID = 0;
92 "ARM MVE TailPred and VPT Optimisations pass",
false,
101 while (
MI &&
MI->getOpcode() == TargetOpcode::COPY &&
102 MI->getOperand(1).getReg().isVirtual())
103 MI =
MRI->getVRegDef(
MI->getOperand(1).getReg());
115 if (!Header || !Latch) {
123 if (
T.getOpcode() == ARM::t2LoopEnd &&
T.getOperand(1).getMBB() == Header) {
127 if (
T.getOpcode() == ARM::t2LoopEndDec &&
128 T.getOperand(2).getMBB() == Header) {
148 if (LoopEnd->
getOpcode() == ARM::t2LoopEndDec)
153 if (!LoopDec || LoopDec->
getOpcode() != ARM::t2LoopDec) {
154 LLVM_DEBUG(
dbgs() <<
" didn't find LoopDec where we expected!\n");
162 if (!LoopPhi || LoopPhi->
getOpcode() != TargetOpcode::PHI ||
175 if (!LoopStart || (LoopStart->
getOpcode() != ARM::t2DoLoopStart &&
176 LoopStart->
getOpcode() != ARM::t2WhileLoopSetup &&
177 LoopStart->
getOpcode() != ARM::t2WhileLoopStartLR)) {
188 assert(
MI->getOpcode() == ARM::t2WhileLoopSetup &&
189 "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!");
194 MIB.
add(
MI->getOperand(0));
195 MIB.
add(
MI->getOperand(1));
198 MIB.
addReg(ARM::NoRegister);
203 if (
I.getOpcode() == ARM::t2WhileLoopStart) {
206 MIB.
add(
MI->getOperand(1));
214 MI->eraseFromParent();
227bool MVETPAndVPTOptimisations::LowerWhileLoopStart(
MachineLoop *
ML) {
229 <<
ML->getHeader()->getName() <<
"\n");
235 if (LoopStart->
getOpcode() != ARM::t2WhileLoopSetup)
239 auto WLSIt =
find_if(
MRI->use_nodbg_instructions(LR), [](
auto &
MI) {
240 return MI.getOpcode() == ARM::t2WhileLoopStart;
250 BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(),
251 TII->get(ARM::t2WhileLoopStartLR), LR)
253 .
add(WLSIt->getOperand(1));
257 WLSIt->eraseFromParent();
272MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors(
279 while (!Worklist.
empty()) {
319bool MVETPAndVPTOptimisations::MergeLoopEnd(
MachineLoop *
ML) {
333 auto *PreHeader =
ML->getLoopPreheader();
334 if (LoopStart->
getOpcode() == ARM::t2WhileLoopStartLR && PreHeader)
335 LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart);
341 if (LoopStart->
getOpcode() == ARM::t2DoLoopStart)
365 while (!Worklist.
empty()) {
370 if (
MI.getOpcode() != TargetOpcode::COPY ||
371 !
MI.getOperand(0).getReg().isVirtual()) {
381 if (!CheckUsers(PhiReg, {LoopDec},
MRI) ||
382 !CheckUsers(DecReg, {LoopPhi, LoopEnd},
MRI) ||
383 !CheckUsers(StartReg, {LoopPhi},
MRI)) {
385 if (LoopStart->
getOpcode() == ARM::t2WhileLoopStartLR) {
394 MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
395 MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
396 MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
420 TII->get(ARM::t2LoopEndDec), DecReg)
429 MI->eraseFromParent();
437bool MVETPAndVPTOptimisations::ConvertTailPredLoop(
MachineLoop *
ML,
440 <<
ML->getHeader()->getName() <<
"\n");
447 if (LoopDec != LoopEnd || (LoopStart->
getOpcode() != ARM::t2DoLoopStart &&
448 LoopStart->
getOpcode() != ARM::t2WhileLoopStartLR))
470 if (VCTP->getOpcode() != FirstVCTP->
getOpcode() ||
493 if (!Phi ||
Phi->getOpcode() != TargetOpcode::PHI ||
494 Phi->getNumOperands() != 5 ||
495 (
Phi->getOperand(2).getMBB() !=
ML->getLoopLatch() &&
496 Phi->getOperand(4).getMBB() !=
ML->getLoopLatch())) {
500 CountReg =
Phi->getOperand(2).getMBB() ==
ML->getLoopLatch()
501 ?
Phi->getOperand(3).getReg()
502 :
Phi->getOperand(1).getReg();
517 unsigned NewOpc = LoopStart->
getOpcode() == ARM::t2DoLoopStart
518 ? ARM::t2DoLoopStartTP
519 : ARM::t2WhileLoopStartTP;
525 if (NewOpc == ARM::t2WhileLoopStartTP)
529 MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
538 MI->getOperand(
Idx + 2).setReg(LR);
555 case ARM::MVE_VCMPf32:
556 case ARM::MVE_VCMPf16:
557 case ARM::MVE_VCMPf32r:
558 case ARM::MVE_VCMPf16r:
559 case ARM::MVE_VCMPi8r:
560 case ARM::MVE_VCMPi16r:
561 case ARM::MVE_VCMPi32r:
562 case ARM::MVE_VCMPu8r:
563 case ARM::MVE_VCMPu16r:
564 case ARM::MVE_VCMPu32r:
565 case ARM::MVE_VCMPs8r:
566 case ARM::MVE_VCMPs16r:
567 case ARM::MVE_VCMPs32r:
574 assert(
IsVCMP(Instr.getOpcode()) &&
"Inst must be a VCMP");
595 if (CondOP1.
isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
602 CondOP2.isIdenticalTo(PrevOP1);
607 if (Instr.getNumOperands() == 0)
617 return RegClass && (RegClass->
getID() == ARM::VCCRRegClassID);
628MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT(
640 User.setReg(NewResult);
641 User.setIsKill(
false);
655 assert(Iter->getOpcode() == ARM::MVE_VPNOT &&
"Not a VPNOT!");
657 "The VPNOT cannot be predicated");
665 bool MustMove =
false, HasUser =
false;
667 for (; Iter !=
MBB.
end(); ++Iter) {
669 Iter->findRegisterUseOperand(VPNOTOperand,
nullptr,
672 VPNOTOperandKiller = MO;
675 if (Iter->findRegisterUseOperandIdx(Reg,
nullptr) != -1) {
680 if (Iter->findRegisterUseOperandIdx(VPNOTResult,
nullptr) == -1)
693 if (VPNOTOperandKiller)
719 while (Iter !=
End) {
720 Register VCCRValue, OppositeVCCRValue;
724 for (; Iter !=
End; ++Iter) {
729 Register Dst = Iter->getOperand(0).getReg();
733 if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT &&
734 Iter->findRegisterUseOperandIdx(VCCRValue,
nullptr) != -1) {
740 OppositeVCCRValue = Dst;
753 assert(VCCRValue && OppositeVCCRValue &&
754 "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop "
755 "stopped before the end of the block!");
756 assert(VCCRValue != OppositeVCCRValue &&
757 "VCCRValue should not be equal to OppositeVCCRValue!");
760 Register LastVPNOTResult = OppositeVCCRValue;
763 for (; Iter !=
End; ++Iter) {
764 bool IsInteresting =
false;
767 Iter->findRegisterUseOperand(VCCRValue,
nullptr)) {
768 IsInteresting =
true;
773 if (Iter->getOpcode() == ARM::MVE_VPNOT) {
776 MRI->replaceRegWith(Result, LastVPNOTResult);
781 <<
"Replacing all uses of '" <<
printReg(Result)
782 <<
"' with '" <<
printReg(LastVPNOTResult) <<
"'\n");
785 ReplaceRegisterUseWithVPNOT(
MBB, *Iter, *MO, LastVPNOTResult);
792 <<
"' with '" <<
printReg(LastVPNOTResult)
793 <<
"' in instr: " << *Iter);
799 OppositeVCCRValue,
nullptr)) {
800 IsInteresting =
true;
803 if (LastVPNOTResult != OppositeVCCRValue) {
805 <<
printReg(OppositeVCCRValue) <<
"' with '"
806 <<
printReg(LastVPNOTResult) <<
" for instr: ";
808 MO->setReg(LastVPNOTResult);
812 MO->setIsKill(
false);
817 if (Iter->getOpcode() == ARM::MVE_VPNOT &&
819 Register VPNOTOperand = Iter->getOperand(1).getReg();
820 if (VPNOTOperand == LastVPNOTResult ||
821 VPNOTOperand == OppositeVCCRValue) {
822 IsInteresting =
true;
825 LastVPNOTResult = Iter->getOperand(0).getReg();
837 DeadInstruction->eraseFromParent();
864 PrevVCMPResultKiller = MO;
893 .
addReg(PrevVCMPResultReg);
901 if (PrevVCMPResultKiller)
908 PrevVCMPResultKiller =
nullptr;
912 DeadInstruction->eraseFromParent();
914 return !DeadInstructions.empty();
924 unsigned LastVPTImm = 0;
939 if (!Copy ||
Copy->getOpcode() != TargetOpcode::COPY ||
940 !
Copy->getOperand(1).getReg().isVirtual() ||
941 MRI->getRegClass(
Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
948 auto getImm = [&](
Register GPR) ->
unsigned {
950 if (Def && (
Def->getOpcode() == ARM::t2MOVi ||
951 Def->getOpcode() == ARM::t2MOVi16))
952 return Def->getOperand(1).getImm();
955 unsigned Imm = getImm(GPR);
961 unsigned NotImm = ~Imm & 0xffff;
962 if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
963 MRI->clearKillFlags(LastVPTReg);
964 Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
965 if (
MRI->use_empty(VPR)) {
966 DeadInstructions.
insert(Copy);
967 if (
MRI->hasOneUse(GPR))
968 DeadInstructions.
insert(
MRI->getVRegDef(GPR));
972 }
else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
975 Register NewVPR =
MRI->createVirtualRegister(&ARM::VCCRRegClass);
977 TII->get(ARM::MVE_VPNOT), NewVPR)
982 Instr.getOperand(PIdx + 1).setReg(NewVPR);
983 if (
MRI->use_empty(VPR)) {
984 DeadInstructions.
insert(Copy);
985 if (
MRI->hasOneUse(GPR))
986 DeadInstructions.
insert(
MRI->getVRegDef(GPR));
988 LLVM_DEBUG(
dbgs() <<
"Adding VPNot: " << *VPNot <<
" to replace use at "
998 DI->eraseFromParent();
1000 return !DeadInstructions.empty();
1009 bool HasVCTP =
false;
1018 if (!HasVCTP ||
MI.getOpcode() != ARM::MVE_VPSEL)
1023 .
add(
MI.getOperand(0))
1024 .
add(
MI.getOperand(1))
1025 .
add(
MI.getOperand(1))
1027 .
add(
MI.getOperand(4))
1028 .
add(
MI.getOperand(5))
1029 .
add(
MI.getOperand(2));
1038 DeadInstruction->eraseFromParent();
1040 return !DeadInstructions.empty();
1046 bool Changed =
false;
1048 if (
MI.getOpcode() != ARM::t2DoLoopStart)
1058bool MVETPAndVPTOptimisations::runOnMachineFunction(
MachineFunction &Fn) {
1061 if (!STI.
isThumb2() || !STI.hasLOB())
1066 MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
1068 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
1070 LLVM_DEBUG(
dbgs() <<
"********** ARM MVE VPT Optimisations **********\n"
1071 <<
"********** Function: " << Fn.
getName() <<
'\n');
1088 LLVM_DEBUG(
dbgs() <<
"**************************************\n");
1094 return new MVETPAndVPTOptimisations();
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const HexagonInstrInfo * TII
ARM MVE TailPred and VPT Optimisations static false MachineInstr * LookThroughCOPY(MachineInstr *MI, MachineRegisterInfo *MRI)
static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII)
static cl::opt< bool > SetLRPredicate("arm-set-lr-predicate", cl::Hidden, cl::desc("Enable setting lr as a predicate in tail predication regions."), cl::init(true))
static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI, MachineInstr *&LoopStart, MachineInstr *&LoopPhi, MachineInstr *&LoopDec, MachineInstr *&LoopEnd)
static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, Register Reg)
static cl::opt< bool > MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden, cl::desc("Enable merging Loop End and Dec instructions."), cl::init(true))
static ARMCC::CondCodes GetCondCode(MachineInstr &Instr)
static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev)
static bool IsInvalidTPInstruction(MachineInstr &MI)
static bool IsVCMP(unsigned Opcode)
ARM MVE TailPred and VPT Optimisations pass
static bool IsWritingToVCCR(MachineInstr &Instr)
static bool CanHaveSwappedOperands(unsigned Opcode)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
const ARMBaseInstrInfo * getInstrInfo() const override
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
FunctionPass class - This class is used to implement most global optimizations.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator_range< iterator > terminators()
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
unsigned getID() const
Return the register class ID number.
Target - Wrapper for Target specific information.
A Use represents the edge between a Value definition and its users.
self_iterator getIterator()
static CondCodes getOppositeCondition(CondCodes CC)
static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC)
getSwappedCondition - assume the flags are set by MI(a,b), return the condition code if we modify the...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Define
Register definition.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
NodeAddr< InstrNode * > Instr
NodeAddr< PhiNode * > Phi
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
int findFirstVPTPredOperandIdx(const MachineInstr &MI)
FunctionPass * createMVETPAndVPTOptimisationsPass()
createMVETPAndVPTOptimisationsPass
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isVCTP(const MachineInstr *MI)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static bool isLoopStart(const MachineInstr &MI)
void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool UseCmp=false)
void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool SkipCmp=false)
void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, bool SetFlags=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static unsigned VCMPOpcodeToVPT(unsigned Opcode)
void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII)
void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.