36#define DEBUG_TYPE "arm-mve-vpt-opts"
40 cl::desc(
"Enable merging Loop End and Dec instructions."),
45 cl::desc(
"Enable setting lr as a predicate in tail predication regions."),
68 return "ARM MVE TailPred and VPT Optimisation Pass";
88char MVETPAndVPTOptimisations::ID = 0;
93 "ARM MVE TailPred and VPT Optimisations pass",
false,
102 while (
MI &&
MI->getOpcode() == TargetOpcode::COPY &&
103 MI->getOperand(1).getReg().isVirtual())
104 MI =
MRI->getVRegDef(
MI->getOperand(1).getReg());
116 if (!Header || !Latch) {
124 if (
T.getOpcode() == ARM::t2LoopEnd &&
T.getOperand(1).getMBB() == Header) {
128 if (
T.getOpcode() == ARM::t2LoopEndDec &&
129 T.getOperand(2).getMBB() == Header) {
149 if (LoopEnd->
getOpcode() == ARM::t2LoopEndDec)
154 if (!LoopDec || LoopDec->
getOpcode() != ARM::t2LoopDec) {
155 LLVM_DEBUG(
dbgs() <<
" didn't find LoopDec where we expected!\n");
163 if (!LoopPhi || LoopPhi->
getOpcode() != TargetOpcode::PHI ||
176 if (!LoopStart || (LoopStart->
getOpcode() != ARM::t2DoLoopStart &&
177 LoopStart->
getOpcode() != ARM::t2WhileLoopSetup &&
178 LoopStart->
getOpcode() != ARM::t2WhileLoopStartLR)) {
189 assert(
MI->getOpcode() == ARM::t2WhileLoopSetup &&
190 "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!");
195 MIB.
add(
MI->getOperand(0));
196 MIB.
add(
MI->getOperand(1));
199 MIB.
addReg(ARM::NoRegister);
204 if (
I.getOpcode() == ARM::t2WhileLoopStart) {
207 MIB.
add(
MI->getOperand(1));
215 MI->eraseFromParent();
228bool MVETPAndVPTOptimisations::LowerWhileLoopStart(
MachineLoop *
ML) {
230 <<
ML->getHeader()->getName() <<
"\n");
236 if (LoopStart->
getOpcode() != ARM::t2WhileLoopSetup)
240 auto WLSIt =
find_if(
MRI->use_nodbg_instructions(LR), [](
auto &
MI) {
241 return MI.getOpcode() == ARM::t2WhileLoopStart;
251 BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(),
252 TII->get(ARM::t2WhileLoopStartLR), LR)
254 .
add(WLSIt->getOperand(1));
258 WLSIt->eraseFromParent();
273MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors(
280 while (!Worklist.
empty()) {
320bool MVETPAndVPTOptimisations::MergeLoopEnd(
MachineLoop *
ML) {
334 auto *PreHeader =
ML->getLoopPreheader();
335 if (LoopStart->
getOpcode() == ARM::t2WhileLoopStartLR && PreHeader)
336 LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart);
342 if (LoopStart->
getOpcode() == ARM::t2DoLoopStart)
366 while (!Worklist.
empty()) {
371 if (
MI.getOpcode() != TargetOpcode::COPY ||
372 !
MI.getOperand(0).getReg().isVirtual()) {
382 if (!CheckUsers(PhiReg, {LoopDec},
MRI) ||
383 !CheckUsers(DecReg, {LoopPhi, LoopEnd},
MRI) ||
384 !CheckUsers(StartReg, {LoopPhi},
MRI)) {
386 if (LoopStart->
getOpcode() == ARM::t2WhileLoopStartLR) {
395 MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
396 MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
397 MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
421 TII->get(ARM::t2LoopEndDec), DecReg)
430 MI->eraseFromParent();
438bool MVETPAndVPTOptimisations::ConvertTailPredLoop(
MachineLoop *
ML,
441 <<
ML->getHeader()->getName() <<
"\n");
448 if (LoopDec != LoopEnd || (LoopStart->
getOpcode() != ARM::t2DoLoopStart &&
449 LoopStart->
getOpcode() != ARM::t2WhileLoopStartLR))
471 if (VCTP->getOpcode() != FirstVCTP->
getOpcode() ||
494 if (!Phi ||
Phi->getOpcode() != TargetOpcode::PHI ||
495 Phi->getNumOperands() != 5 ||
496 (
Phi->getOperand(2).getMBB() !=
ML->getLoopLatch() &&
497 Phi->getOperand(4).getMBB() !=
ML->getLoopLatch())) {
501 CountReg =
Phi->getOperand(2).getMBB() ==
ML->getLoopLatch()
502 ?
Phi->getOperand(3).getReg()
503 :
Phi->getOperand(1).getReg();
518 unsigned NewOpc = LoopStart->
getOpcode() == ARM::t2DoLoopStart
519 ? ARM::t2DoLoopStartTP
520 : ARM::t2WhileLoopStartTP;
526 if (NewOpc == ARM::t2WhileLoopStartTP)
530 MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
539 MI->getOperand(
Idx + 2).setReg(LR);
556 case ARM::MVE_VCMPf32:
557 case ARM::MVE_VCMPf16:
558 case ARM::MVE_VCMPf32r:
559 case ARM::MVE_VCMPf16r:
560 case ARM::MVE_VCMPi8r:
561 case ARM::MVE_VCMPi16r:
562 case ARM::MVE_VCMPi32r:
563 case ARM::MVE_VCMPu8r:
564 case ARM::MVE_VCMPu16r:
565 case ARM::MVE_VCMPu32r:
566 case ARM::MVE_VCMPs8r:
567 case ARM::MVE_VCMPs16r:
568 case ARM::MVE_VCMPs32r:
575 assert(
IsVCMP(Instr.getOpcode()) &&
"Inst must be a VCMP");
596 if (CondOP1.
isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
603 CondOP2.isIdenticalTo(PrevOP1);
608 if (Instr.getNumOperands() == 0)
618 return RegClass && (RegClass->
getID() == ARM::VCCRRegClassID);
629MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT(
641 User.setReg(NewResult);
642 User.setIsKill(
false);
656 assert(Iter->getOpcode() == ARM::MVE_VPNOT &&
"Not a VPNOT!");
658 "The VPNOT cannot be predicated");
666 bool MustMove =
false, HasUser =
false;
668 for (; Iter !=
MBB.
end(); ++Iter) {
670 Iter->findRegisterUseOperand(VPNOTOperand,
nullptr,
673 VPNOTOperandKiller = MO;
676 if (Iter->findRegisterUseOperandIdx(Reg,
nullptr) != -1) {
681 if (Iter->findRegisterUseOperandIdx(VPNOTResult,
nullptr) == -1)
694 if (VPNOTOperandKiller)
720 while (Iter !=
End) {
721 Register VCCRValue, OppositeVCCRValue;
725 for (; Iter !=
End; ++Iter) {
730 Register Dst = Iter->getOperand(0).getReg();
734 if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT &&
735 Iter->findRegisterUseOperandIdx(VCCRValue,
nullptr) != -1) {
741 OppositeVCCRValue = Dst;
754 assert(VCCRValue && OppositeVCCRValue &&
755 "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop "
756 "stopped before the end of the block!");
757 assert(VCCRValue != OppositeVCCRValue &&
758 "VCCRValue should not be equal to OppositeVCCRValue!");
761 Register LastVPNOTResult = OppositeVCCRValue;
764 for (; Iter !=
End; ++Iter) {
765 bool IsInteresting =
false;
768 Iter->findRegisterUseOperand(VCCRValue,
nullptr)) {
769 IsInteresting =
true;
774 if (Iter->getOpcode() == ARM::MVE_VPNOT) {
777 MRI->replaceRegWith(Result, LastVPNOTResult);
782 <<
"Replacing all uses of '" <<
printReg(Result)
783 <<
"' with '" <<
printReg(LastVPNOTResult) <<
"'\n");
786 ReplaceRegisterUseWithVPNOT(
MBB, *Iter, *MO, LastVPNOTResult);
793 <<
"' with '" <<
printReg(LastVPNOTResult)
794 <<
"' in instr: " << *Iter);
800 OppositeVCCRValue,
nullptr)) {
801 IsInteresting =
true;
804 if (LastVPNOTResult != OppositeVCCRValue) {
806 <<
printReg(OppositeVCCRValue) <<
"' with '"
807 <<
printReg(LastVPNOTResult) <<
" for instr: ";
809 MO->setReg(LastVPNOTResult);
813 MO->setIsKill(
false);
818 if (Iter->getOpcode() == ARM::MVE_VPNOT &&
820 Register VPNOTOperand = Iter->getOperand(1).getReg();
821 if (VPNOTOperand == LastVPNOTResult ||
822 VPNOTOperand == OppositeVCCRValue) {
823 IsInteresting =
true;
826 LastVPNOTResult = Iter->getOperand(0).getReg();
838 DeadInstruction->eraseFromParent();
865 PrevVCMPResultKiller = MO;
894 .
addReg(PrevVCMPResultReg);
902 if (PrevVCMPResultKiller)
909 PrevVCMPResultKiller =
nullptr;
913 DeadInstruction->eraseFromParent();
915 return !DeadInstructions.empty();
925 unsigned LastVPTImm = 0;
940 if (!Copy ||
Copy->getOpcode() != TargetOpcode::COPY ||
941 !
Copy->getOperand(1).getReg().isVirtual() ||
942 MRI->getRegClass(
Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
949 auto getImm = [&](
Register GPR) ->
unsigned {
951 if (Def && (
Def->getOpcode() == ARM::t2MOVi ||
952 Def->getOpcode() == ARM::t2MOVi16))
953 return Def->getOperand(1).getImm();
956 unsigned Imm = getImm(GPR);
962 unsigned NotImm = ~Imm & 0xffff;
963 if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
964 MRI->clearKillFlags(LastVPTReg);
965 Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
966 if (
MRI->use_empty(VPR)) {
967 DeadInstructions.
insert(Copy);
968 if (
MRI->hasOneUse(GPR))
969 DeadInstructions.
insert(
MRI->getVRegDef(GPR));
973 }
else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
976 Register NewVPR =
MRI->createVirtualRegister(&ARM::VCCRRegClass);
978 TII->get(ARM::MVE_VPNOT), NewVPR)
983 Instr.getOperand(PIdx + 1).setReg(NewVPR);
984 if (
MRI->use_empty(VPR)) {
985 DeadInstructions.
insert(Copy);
986 if (
MRI->hasOneUse(GPR))
987 DeadInstructions.
insert(
MRI->getVRegDef(GPR));
989 LLVM_DEBUG(
dbgs() <<
"Adding VPNot: " << *VPNot <<
" to replace use at "
999 DI->eraseFromParent();
1001 return !DeadInstructions.empty();
1010 bool HasVCTP =
false;
1019 if (!HasVCTP ||
MI.getOpcode() != ARM::MVE_VPSEL)
1024 .
add(
MI.getOperand(0))
1025 .
add(
MI.getOperand(1))
1026 .
add(
MI.getOperand(1))
1028 .
add(
MI.getOperand(4))
1029 .
add(
MI.getOperand(5))
1030 .
add(
MI.getOperand(2));
1039 DeadInstruction->eraseFromParent();
1041 return !DeadInstructions.empty();
1047 bool Changed =
false;
1049 if (
MI.getOpcode() != ARM::t2DoLoopStart)
1059bool MVETPAndVPTOptimisations::runOnMachineFunction(
MachineFunction &Fn) {
1062 if (!STI.
isThumb2() || !STI.hasLOB())
1067 MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
1069 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
1071 LLVM_DEBUG(
dbgs() <<
"********** ARM MVE VPT Optimisations **********\n"
1072 <<
"********** Function: " << Fn.
getName() <<
'\n');
1089 LLVM_DEBUG(
dbgs() <<
"**************************************\n");
1095 return new MVETPAndVPTOptimisations();
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const HexagonInstrInfo * TII
ARM MVE TailPred and VPT Optimisations static false MachineInstr * LookThroughCOPY(MachineInstr *MI, MachineRegisterInfo *MRI)
static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII)
static cl::opt< bool > SetLRPredicate("arm-set-lr-predicate", cl::Hidden, cl::desc("Enable setting lr as a predicate in tail predication regions."), cl::init(true))
static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI, MachineInstr *&LoopStart, MachineInstr *&LoopPhi, MachineInstr *&LoopDec, MachineInstr *&LoopEnd)
static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, Register Reg)
static cl::opt< bool > MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden, cl::desc("Enable merging Loop End and Dec instructions."), cl::init(true))
static ARMCC::CondCodes GetCondCode(MachineInstr &Instr)
static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev)
static bool IsInvalidTPInstruction(MachineInstr &MI)
static bool IsVCMP(unsigned Opcode)
ARM MVE TailPred and VPT Optimisations pass
static bool IsWritingToVCCR(MachineInstr &Instr)
static bool CanHaveSwappedOperands(unsigned Opcode)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
const ARMBaseInstrInfo * getInstrInfo() const override
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
FunctionPass class - This class is used to implement most global optimizations.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator_range< iterator > terminators()
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
unsigned getID() const
Return the register class ID number.
Target - Wrapper for Target specific information.
A Use represents the edge between a Value definition and its users.
self_iterator getIterator()
static CondCodes getOppositeCondition(CondCodes CC)
static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC)
getSwappedCondition - assume the flags are set by MI(a,b), return the condition code if we modify the...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Define
Register definition.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
NodeAddr< InstrNode * > Instr
NodeAddr< PhiNode * > Phi
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
int findFirstVPTPredOperandIdx(const MachineInstr &MI)
FunctionPass * createMVETPAndVPTOptimisationsPass()
createMVETPAndVPTOptimisationsPass
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isVCTP(const MachineInstr *MI)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static bool isLoopStart(const MachineInstr &MI)
void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool UseCmp=false)
void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool SkipCmp=false)
void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, bool SetFlags=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static unsigned VCMPOpcodeToVPT(unsigned Opcode)
void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII)
void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.