Go to the documentation of this file.
36 #define DEBUG_TYPE "arm-mve-vpt-opts"
40 cl::desc(
"Enable merging Loop End and Dec instructions."),
45 cl::desc(
"Enable setting lr as a predicate in tail predication regions."),
68 return "ARM MVE TailPred and VPT Optimisation Pass";
93 "ARM MVE TailPred and VPT Optimisations pass",
false,
102 while (
MI &&
MI->getOpcode() == TargetOpcode::COPY &&
103 MI->getOperand(1).getReg().isVirtual())
116 if (!Header || !Latch) {
124 if (
T.getOpcode() == ARM::t2LoopEnd &&
T.getOperand(1).getMBB() == Header) {
128 if (
T.getOpcode() == ARM::t2LoopEndDec &&
129 T.getOperand(2).getMBB() == Header) {
149 if (LoopEnd->
getOpcode() == ARM::t2LoopEndDec)
154 if (!LoopDec || LoopDec->
getOpcode() != ARM::t2LoopDec) {
155 LLVM_DEBUG(
dbgs() <<
" didn't find LoopDec where we expected!\n");
163 if (!LoopPhi || LoopPhi->
getOpcode() != TargetOpcode::PHI ||
176 if (!LoopStart || (LoopStart->
getOpcode() != ARM::t2DoLoopStart &&
177 LoopStart->
getOpcode() != ARM::t2WhileLoopSetup &&
178 LoopStart->
getOpcode() != ARM::t2WhileLoopStartLR)) {
189 assert(
MI->getOpcode() == ARM::t2WhileLoopSetup &&
190 "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!");
195 MIB.
add(
MI->getOperand(0));
196 MIB.
add(
MI->getOperand(1));
199 MIB.
addReg(ARM::NoRegister);
204 if (
I.getOpcode() == ARM::t2WhileLoopStart) {
207 MIB.
add(
MI->getOperand(1));
215 MI->eraseFromParent();
228 bool MVETPAndVPTOptimisations::LowerWhileLoopStart(
MachineLoop *ML) {
236 if (LoopStart->
getOpcode() != ARM::t2WhileLoopSetup)
241 return MI.getOpcode() == ARM::t2WhileLoopStart;
251 BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(),
252 TII->get(ARM::t2WhileLoopStartLR), LR)
254 .
add(WLSIt->getOperand(1));
258 WLSIt->eraseFromParent();
273 MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors(
277 Worklist.push_back(PreHeader);
280 while (!Worklist.empty()) {
308 Worklist.push_back(Pred);
320 bool MVETPAndVPTOptimisations::MergeLoopEnd(
MachineLoop *ML) {
335 if (LoopStart->
getOpcode() == ARM::t2WhileLoopStartLR && PreHeader)
336 LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart);
342 if (LoopStart->
getOpcode() == ARM::t2DoLoopStart)
365 Worklist.push_back(BaseReg);
366 while (!Worklist.empty()) {
371 if (
MI.getOpcode() != TargetOpcode::COPY ||
372 !
MI.getOperand(0).getReg().isVirtual()) {
376 Worklist.push_back(
MI.getOperand(0).getReg());
382 if (!CheckUsers(PhiReg, {LoopDec},
MRI) ||
383 !CheckUsers(DecReg, {LoopPhi, LoopEnd},
MRI) ||
384 !CheckUsers(StartReg, {LoopPhi},
MRI)) {
386 if (LoopStart->
getOpcode() == ARM::t2WhileLoopStartLR) {
410 TII->get(ARM::t2LoopEndDec), DecReg)
419 MI->eraseFromParent();
427 bool MVETPAndVPTOptimisations::ConvertTailPredLoop(
MachineLoop *ML,
437 if (LoopDec != LoopEnd || (LoopStart->
getOpcode() != ARM::t2DoLoopStart &&
438 LoopStart->
getOpcode() != ARM::t2WhileLoopStartLR))
446 VCTPs.push_back(&
MI);
448 MVEInstrs.push_back(&
MI);
460 if (VCTP->getOpcode() != FirstVCTP->
getOpcode() ||
483 if (!Phi || Phi->
getOpcode() != TargetOpcode::PHI ||
507 unsigned NewOpc = LoopStart->
getOpcode() == ARM::t2DoLoopStart
508 ? ARM::t2DoLoopStartTP
509 : ARM::t2WhileLoopStartTP;
515 if (NewOpc == ARM::t2WhileLoopStartTP)
528 MI->getOperand(Idx + 2).setReg(LR);
545 case ARM::MVE_VCMPf32:
546 case ARM::MVE_VCMPf16:
547 case ARM::MVE_VCMPf32r:
548 case ARM::MVE_VCMPf16r:
549 case ARM::MVE_VCMPi8r:
550 case ARM::MVE_VCMPi16r:
551 case ARM::MVE_VCMPi32r:
552 case ARM::MVE_VCMPu8r:
553 case ARM::MVE_VCMPu16r:
554 case ARM::MVE_VCMPu32r:
555 case ARM::MVE_VCMPs8r:
556 case ARM::MVE_VCMPs16r:
557 case ARM::MVE_VCMPs32r:
585 if (CondOP1.
isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
592 CondOP2.isIdenticalTo(PrevOP1);
607 return RegClass && (RegClass->
getID() == ARM::VCCRRegClassID);
618 MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT(
630 User.setReg(NewResult);
631 User.setIsKill(
false);
645 assert(Iter->getOpcode() == ARM::MVE_VPNOT &&
"Not a VPNOT!");
647 "The VPNOT cannot be predicated");
655 bool MustMove =
false, HasUser =
false;
657 for (; Iter !=
MBB.
end(); ++Iter) {
659 Iter->findRegisterUseOperand(VPNOTOperand,
true)) {
661 VPNOTOperandKiller = MO;
664 if (Iter->findRegisterUseOperandIdx(
Reg) != -1) {
669 if (Iter->findRegisterUseOperandIdx(VPNOTResult) == -1)
682 if (VPNOTOperandKiller)
708 while (Iter != End) {
709 Register VCCRValue, OppositeVCCRValue;
713 for (; Iter != End; ++Iter) {
718 Register Dst = Iter->getOperand(0).getReg();
722 if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT &&
723 Iter->findRegisterUseOperandIdx(VCCRValue) != -1) {
729 OppositeVCCRValue = Dst;
742 assert(VCCRValue && OppositeVCCRValue &&
743 "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop "
744 "stopped before the end of the block!");
745 assert(VCCRValue != OppositeVCCRValue &&
746 "VCCRValue should not be equal to OppositeVCCRValue!");
749 Register LastVPNOTResult = OppositeVCCRValue;
752 for (; Iter != End; ++Iter) {
753 bool IsInteresting =
false;
755 if (
MachineOperand *MO = Iter->findRegisterUseOperand(VCCRValue)) {
756 IsInteresting =
true;
761 if (Iter->getOpcode() == ARM::MVE_VPNOT) {
765 DeadInstructions.push_back(&*Iter);
769 <<
"Replacing all uses of '" <<
printReg(Result)
770 <<
"' with '" <<
printReg(LastVPNOTResult) <<
"'\n");
773 ReplaceRegisterUseWithVPNOT(
MBB, *Iter, *MO, LastVPNOTResult);
780 <<
"' with '" <<
printReg(LastVPNOTResult)
781 <<
"' in instr: " << *Iter);
787 Iter->findRegisterUseOperand(OppositeVCCRValue)) {
788 IsInteresting =
true;
791 if (LastVPNOTResult != OppositeVCCRValue) {
793 <<
printReg(OppositeVCCRValue) <<
"' with '"
794 <<
printReg(LastVPNOTResult) <<
" for instr: ";
796 MO->setReg(LastVPNOTResult);
800 MO->setIsKill(
false);
805 if (Iter->getOpcode() == ARM::MVE_VPNOT &&
807 Register VPNOTOperand = Iter->getOperand(1).getReg();
808 if (VPNOTOperand == LastVPNOTResult ||
809 VPNOTOperand == OppositeVCCRValue) {
810 IsInteresting =
true;
813 LastVPNOTResult = Iter->getOperand(0).getReg();
825 DeadInstruction->eraseFromParent();
851 PrevVCMPResultKiller = MO;
880 .
addReg(PrevVCMPResultReg);
888 if (PrevVCMPResultKiller)
893 DeadInstructions.push_back(&Instr);
895 PrevVCMPResultKiller =
nullptr;
899 DeadInstruction->eraseFromParent();
901 return !DeadInstructions.empty();
911 unsigned LastVPTImm = 0;
926 if (!Copy ||
Copy->getOpcode() != TargetOpcode::COPY ||
927 !
Copy->getOperand(1).getReg().isVirtual() ||
935 auto getImm = [&](
Register GPR) ->
unsigned {
937 if (
Def && (
Def->getOpcode() == ARM::t2MOVi ||
938 Def->getOpcode() == ARM::t2MOVi16))
939 return Def->getOperand(1).getImm();
942 unsigned Imm = getImm(GPR);
948 unsigned NotImm = ~
Imm & 0xffff;
949 if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm ==
Imm) {
952 DeadInstructions.
insert(Copy);
957 }
else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
962 TII->get(ARM::MVE_VPNOT), NewVPR)
969 DeadInstructions.
insert(Copy);
973 LLVM_DEBUG(
dbgs() <<
"Adding VPNot: " << *VPNot <<
" to replace use at "
983 DI->eraseFromParent();
985 return !DeadInstructions.empty();
994 bool HasVCTP =
false;
1003 if (!HasVCTP ||
MI.getOpcode() != ARM::MVE_VPSEL)
1008 .
add(
MI.getOperand(0))
1009 .
add(
MI.getOperand(1))
1010 .
add(
MI.getOperand(1))
1012 .
add(
MI.getOperand(4))
1013 .
add(
MI.getOperand(5))
1014 .
add(
MI.getOperand(2));
1019 DeadInstructions.push_back(&
MI);
1023 DeadInstruction->eraseFromParent();
1025 return !DeadInstructions.empty();
1031 bool Changed =
false;
1033 if (
MI.getOpcode() != ARM::t2DoLoopStart)
1043 bool MVETPAndVPTOptimisations::runOnMachineFunction(
MachineFunction &Fn) {
1047 if (!STI.
isThumb2() || !STI.hasLOB())
1055 LLVM_DEBUG(
dbgs() <<
"********** ARM MVE VPT Optimisations **********\n"
1056 <<
"********** Function: " << Fn.
getName() <<
'\n');
1060 Modified |= LowerWhileLoopStart(ML);
1062 Modified |= ConvertTailPredLoop(ML, DT);
1073 LLVM_DEBUG(
dbgs() <<
"**************************************\n");
1079 return new MVETPAndVPTOptimisations();
ARM MVE TailPred and VPT Optimisations pass
static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
unsigned getID() const
Return the register class ID number.
This is an optimization pass for GlobalISel generic memory operations.
static use_instr_nodbg_iterator use_instr_nodbg_end()
@ Define
Register definition.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const MachineInstrBuilder & add(const MachineOperand &MO) const
static bool IsWritingToVCCR(MachineInstr &Instr)
Target - Wrapper for Target specific information.
void setIsKill(bool Val=true)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Reg
All possible values of the reg field in the ModR/M byte.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
ARM MVE TailPred and VPT Optimisations static false MachineInstr * LookThroughCOPY(MachineInstr *MI, MachineRegisterInfo *MRI)
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
MachineOperand * findRegisterUseOperand(Register Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
const ARMBaseInstrInfo * getInstrInfo() const override
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
LLVM_NODISCARD T pop_back_val()
iterator_range< iterator > terminators()
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 and
static cl::opt< bool > SetLRPredicate("arm-set-lr-predicate", cl::Hidden, cl::desc("Enable setting lr as a predicate in tail predication regions."), cl::init(true))
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static ARMCC::CondCodes GetCondCode(MachineInstr &Instr)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
TargetInstrInfo - Interface to description of machine instruction set.
const MachineOperand & getOperand(unsigned i) const
void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, bool SetFlags=false)
int findFirstVPTPredOperandIdx(const MachineInstr &MI)
void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool SkipCmp=false)
Represent the analysis usage information of a pass.
iterator_range< block_iterator > blocks() const
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)
const HexagonInstrInfo * TII
MachineOperand class - Representation of each machine instruction operand.
static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI, MachineInstr *&LoopStart, MachineInstr *&LoopPhi, MachineInstr *&LoopDec, MachineInstr *&LoopEnd)
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
static unsigned VCMPOpcodeToVPT(unsigned Opcode)
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, Register Reg)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII)
INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE, "ARM MVE TailPred and VPT Optimisations pass", false, false) INITIALIZE_PASS_END(MVETPAndVPTOptimisations
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Representation of each machine instruction.
void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
FunctionPass * createMVETPAndVPTOptimisationsPass()
createMVETPAndVPTOptimisationsPass
void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB)
initializer< Ty > init(const Ty &Val)
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isVCTP(const MachineInstr *MI)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Register getReg() const
getReg - Returns the register number.
iterator_range< pred_iterator > predecessors()
static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC)
getSwappedCondition - assume the flags are set by MI(a,b), return the condition code if we modify the...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
MachineBasicBlock * getMBB() const
static bool IsVCMP(unsigned Opcode)
SmallVector< MachineOperand, 4 > Cond
StringRef - Represent a constant reference to a string, i.e.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool UseCmp=false)
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
self_iterator getIterator()
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
const MachineBasicBlock * getParent() const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
BlockT * getHeader() const
LoopInfoBase< MachineBasicBlock, MachineLoop > & getBase()
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
static cl::opt< bool > MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden, cl::desc("Enable merging Loop End and Dec instructions."), cl::init(true))
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool CanHaveSwappedOperands(unsigned Opcode)
void setReg(Register Reg)
Change the register this operand corresponds to.
unsigned getNumOperands() const
Retuns the total number of operands.
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
FunctionPass class - This class is used to implement most global optimizations.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
static CondCodes getOppositeCondition(CondCodes CC)
AnalysisUsage & addRequired()
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
static bool IsInvalidTPInstruction(MachineInstr &MI)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
A Use represents the edge between a Value definition and its users.
static bool isLoopStart(const MachineInstr &MI)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.