Go to the documentation of this file.
18 #define DEBUG_TYPE "si-optimize-exec-masking"
34 return "SI optimize exec mask operations";
46 "SI optimize exec mask operations",
false,
false)
51 char SIOptimizeExecMasking::
ID = 0;
57 switch (
MI.getOpcode()) {
59 case AMDGPU::S_MOV_B64:
60 case AMDGPU::S_MOV_B64_term:
61 case AMDGPU::S_MOV_B32:
62 case AMDGPU::S_MOV_B32_term: {
65 Src.getReg() == (
ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC))
66 return MI.getOperand(0).getReg();
70 return AMDGPU::NoRegister;
75 switch (
MI.getOpcode()) {
77 case AMDGPU::S_MOV_B64:
78 case AMDGPU::S_MOV_B32: {
81 Dst.getReg() == (
ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC) &&
82 MI.getOperand(1).isReg())
83 return MI.getOperand(1).getReg();
86 case AMDGPU::S_MOV_B64_term:
87 case AMDGPU::S_MOV_B32_term:
97 switch (
MI.getOpcode()) {
98 case AMDGPU::S_AND_B64:
99 case AMDGPU::S_OR_B64:
100 case AMDGPU::S_XOR_B64:
101 case AMDGPU::S_ANDN2_B64:
102 case AMDGPU::S_ORN2_B64:
103 case AMDGPU::S_NAND_B64:
104 case AMDGPU::S_NOR_B64:
105 case AMDGPU::S_XNOR_B64: {
108 return MI.getOperand(0).getReg();
111 return MI.getOperand(0).getReg();
114 case AMDGPU::S_AND_B32:
115 case AMDGPU::S_OR_B32:
116 case AMDGPU::S_XOR_B32:
117 case AMDGPU::S_ANDN2_B32:
118 case AMDGPU::S_ORN2_B32:
119 case AMDGPU::S_NAND_B32:
120 case AMDGPU::S_NOR_B32:
121 case AMDGPU::S_XNOR_B32: {
123 if (Src1.
isReg() && Src1.
getReg() == AMDGPU::EXEC_LO)
124 return MI.getOperand(0).getReg();
126 if (Src2.
isReg() && Src2.
getReg() == AMDGPU::EXEC_LO)
127 return MI.getOperand(0).getReg();
132 return AMDGPU::NoRegister;
137 case AMDGPU::S_AND_B64:
138 return AMDGPU::S_AND_SAVEEXEC_B64;
139 case AMDGPU::S_OR_B64:
140 return AMDGPU::S_OR_SAVEEXEC_B64;
141 case AMDGPU::S_XOR_B64:
142 return AMDGPU::S_XOR_SAVEEXEC_B64;
143 case AMDGPU::S_ANDN2_B64:
144 return AMDGPU::S_ANDN2_SAVEEXEC_B64;
145 case AMDGPU::S_ORN2_B64:
146 return AMDGPU::S_ORN2_SAVEEXEC_B64;
147 case AMDGPU::S_NAND_B64:
148 return AMDGPU::S_NAND_SAVEEXEC_B64;
149 case AMDGPU::S_NOR_B64:
150 return AMDGPU::S_NOR_SAVEEXEC_B64;
151 case AMDGPU::S_XNOR_B64:
152 return AMDGPU::S_XNOR_SAVEEXEC_B64;
153 case AMDGPU::S_AND_B32:
154 return AMDGPU::S_AND_SAVEEXEC_B32;
155 case AMDGPU::S_OR_B32:
156 return AMDGPU::S_OR_SAVEEXEC_B32;
157 case AMDGPU::S_XOR_B32:
158 return AMDGPU::S_XOR_SAVEEXEC_B32;
159 case AMDGPU::S_ANDN2_B32:
160 return AMDGPU::S_ANDN2_SAVEEXEC_B32;
161 case AMDGPU::S_ORN2_B32:
162 return AMDGPU::S_ORN2_SAVEEXEC_B32;
163 case AMDGPU::S_NAND_B32:
164 return AMDGPU::S_NAND_SAVEEXEC_B32;
165 case AMDGPU::S_NOR_B32:
166 return AMDGPU::S_NOR_SAVEEXEC_B32;
167 case AMDGPU::S_XNOR_B32:
168 return AMDGPU::S_XNOR_SAVEEXEC_B32;
170 return AMDGPU::INSTRUCTION_LIST_END;
177 switch (
MI.getOpcode()) {
178 case AMDGPU::S_MOV_B32_term: {
179 bool RegSrc =
MI.getOperand(1).isReg();
180 MI.setDesc(
TII.get(RegSrc ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
183 case AMDGPU::S_MOV_B64_term: {
184 bool RegSrc =
MI.getOperand(1).isReg();
185 MI.setDesc(
TII.get(RegSrc ? AMDGPU::COPY : AMDGPU::S_MOV_B64));
188 case AMDGPU::S_XOR_B64_term: {
191 MI.setDesc(
TII.get(AMDGPU::S_XOR_B64));
194 case AMDGPU::S_XOR_B32_term: {
197 MI.setDesc(
TII.get(AMDGPU::S_XOR_B32));
200 case AMDGPU::S_OR_B64_term: {
203 MI.setDesc(
TII.get(AMDGPU::S_OR_B64));
206 case AMDGPU::S_OR_B32_term: {
209 MI.setDesc(
TII.get(AMDGPU::S_OR_B32));
212 case AMDGPU::S_ANDN2_B64_term: {
215 MI.setDesc(
TII.get(AMDGPU::S_ANDN2_B64));
218 case AMDGPU::S_ANDN2_B32_term: {
221 MI.setDesc(
TII.get(AMDGPU::S_ANDN2_B32));
224 case AMDGPU::S_AND_B64_term: {
227 MI.setDesc(
TII.get(AMDGPU::S_AND_B64));
230 case AMDGPU::S_AND_B32_term: {
233 MI.setDesc(
TII.get(AMDGPU::S_AND_B32));
251 for (;
I !=
E; ++
I) {
252 if (!
I->isTerminator())
253 return Seen ? FirstNonTerm :
I;
271 unsigned CopyToExec) {
272 const unsigned InstLimit = 25;
275 for (
unsigned N = 0;
N <= InstLimit &&
I !=
E; ++
I, ++
N) {
289 if (Succ->isLiveIn(
Reg))
308 unsigned CurrentIteration = 0;
310 for (++A; CurrentIteration < MaxInstructions && A !=
E; ++A) {
311 if (A->isDebugInstr())
318 if (A->modifiesRegister(
Reg,
TRI))
338 bool useLiveOuts =
false,
339 bool ignoreStart =
false) {
350 for (; A != Stop.getParent()->
rend() && A != Stop; ++A) {
378 if (!
TRI->isSGPRReg(
MRI, SaveExecDest))
382 TII->getNamedOperand(SaveExec, AMDGPU::OpName::src0);
383 if (!SaveExecSrc0->
isReg())
400 assert(VCmpDest &&
"Should have an sdst operand!");
433 NonDefRegs.push_back(Src0->
getReg());
436 NonDefRegs.push_back(Src1->
getReg());
464 if (!SaveExecInstr.
uses().empty()) {
466 unsigned MovOpcode = isSGPR32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
477 auto TryAddImmediateValueFromNamedOperand =
478 [&](
unsigned OperandName) ->
void {
479 if (
auto *
Mod =
TII->getNamedOperand(VCmp, OperandName))
483 TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::src0_modifiers);
486 TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::src1_modifiers);
500 bool SIOptimizeExecMasking::runOnMachineFunction(
MachineFunction &MF) {
521 bool Changed =
false;
532 unsigned SearchCount = 0;
533 const unsigned SearchLimit = 5;
534 while (
I !=
E && SearchCount++ < SearchLimit) {
545 auto CopyToExecInst = &*
I;
547 if (CopyFromExecInst ==
E) {
548 auto PrepareExecInst = std::next(
I);
549 if (PrepareExecInst ==
E)
552 if (CopyToExecInst->getOperand(1).isKill() &&
556 PrepareExecInst->getOperand(0).setReg(Exec);
560 CopyToExecInst->eraseFromParent();
573 Register CopyFromExec = CopyFromExecInst->getOperand(0).getReg();
578 = std::next(CopyFromExecInst->getIterator()), JE =
I->getIterator();
580 if (SaveExecInst && J->readsRegister(Exec,
TRI)) {
581 LLVM_DEBUG(
dbgs() <<
"exec read prevents saveexec: " << *J <<
'\n');
584 SaveExecInst =
nullptr;
590 if (J->modifiesRegister(CopyToExec,
TRI)) {
594 SaveExecInst =
nullptr;
599 if (SaveExecOp == AMDGPU::INSTRUCTION_LIST_END)
602 if (ReadsCopyFromExec) {
604 LLVM_DEBUG(
dbgs() <<
"Found save exec op: " << *SaveExecInst <<
'\n');
608 <<
"Instruction does not read exec copy: " << *J <<
'\n');
611 }
else if (ReadsCopyFromExec && !SaveExecInst) {
620 LLVM_DEBUG(
dbgs() <<
"Found second use of save inst candidate: " << *J
625 if (SaveExecInst && J->readsRegister(CopyToExec,
TRI)) {
626 assert(SaveExecInst != &*J);
627 OtherUseInsts.push_back(&*J);
634 LLVM_DEBUG(
dbgs() <<
"Insert save exec op: " << *SaveExecInst <<
'\n');
641 if (Src0.
isReg() && Src0.
getReg() == CopyFromExec) {
643 }
else if (Src1.
isReg() && Src1.
getReg() == CopyFromExec) {
651 CopyFromExecInst->eraseFromParent();
661 CopyToExecInst->eraseFromParent();
664 OtherInst->substituteRegister(CopyToExec, Exec,
665 AMDGPU::NoSubRegister, *
TRI);
679 if (
ST.hasGFX10_3Insts()) {
681 const unsigned AndSaveExecOpcode =
682 ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
688 if (
MI.getOpcode() != AndSaveExecOpcode)
693 SaveExecVCmpMapping[&
MI] = VCmp;
697 for (
const auto &Entry : SaveExecVCmpMapping) {
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
This is an optimization pass for GlobalISel generic memory operations.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
SI optimize exec mask operations
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static MachineBasicBlock::reverse_iterator fixTerminators(const SIInstrInfo &TII, MachineBasicBlock &MBB)
Reg
All possible values of the reg field in the ModR/M byte.
static Register isCopyToExec(const MachineInstr &MI, const GCNSubtarget &ST)
If MI is a copy to exec, return the register copied from.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
A set of physical registers with utility functions to track liveness when walking backward/forward th...
reverse_self_iterator getReverseIterator()
static bool optimizeVCMPSaveExecSequence(MachineInstr &SaveExecInstr, MachineInstr &VCmp, MCRegister Exec, const SIInstrInfo *TII, const SIRegisterInfo *TRI, MachineRegisterInfo &MRI)
unsigned const TargetRegisterInfo * TRI
static bool isRegisterInUseAfter(MachineInstr &Stop, MCRegister Reg, const SIRegisterInfo *TRI, MachineRegisterInfo &MRI)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
char & SIOptimizeExecMaskingID
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const MachineOperand & getOperand(unsigned i) const
Represent the analysis usage information of a pass.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
const HexagonInstrInfo * TII
MachineOperand class - Representation of each machine instruction operand.
static unsigned getSaveExecOp(unsigned Opc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void stepBackward(const MachineInstr &MI)
Simulates liveness when stepping backwards over an instruction(bundle).
static Register isLogicalOpOnExec(const MachineInstr &MI)
If MI is a logical operation on an exec value, return the register copied to.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Representation of each machine instruction.
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const
Returns true if register Reg and no aliasing register is in the set.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
StandardInstrumentations SI(Debug, VerifyEach)
print Print MemDeps of function
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Register getReg() const
getReg - Returns the register number.
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
iterator_range< succ_iterator > successors()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
StringRef - Represent a constant reference to a string, i.e.
reverse_iterator rbegin()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
self_iterator getIterator()
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MachineBasicBlock::reverse_iterator findExecCopy(const SIInstrInfo &TII, const GCNSubtarget &ST, MachineBasicBlock &MBB, MachineBasicBlock::reverse_iterator I, unsigned CopyToExec)
const MachineBasicBlock * getParent() const
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
static MachineInstr * findInstrBackwards(MachineInstr &Origin, std::function< bool(MachineInstr *)> Pred, ArrayRef< MCRegister > NonModifiableRegs, const SIRegisterInfo *TRI, unsigned MaxInstructions=20)
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
Function & getFunction()
Return the LLVM function that this machine code represents.
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Iterator for intrusive lists based on ilist_node.
uint32_t clamp(uint64_t value, uint32_t low, uint32_t hi)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
reverse_iterator rend(StringRef path)
Get reverse end iterator over path.
INITIALIZE_PASS_BEGIN(SIOptimizeExecMasking, DEBUG_TYPE, "SI optimize exec mask operations", false, false) INITIALIZE_PASS_END(SIOptimizeExecMasking
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i....
static Register isCopyFromExec(const MachineInstr &MI, const GCNSubtarget &ST)
If MI is a copy from exec, return the register copied to.
LLVM_READONLY int getVCMPXOpFromVCMP(uint16_t Opcode)
static bool isRegisterInUseBetween(MachineInstr &Stop, MachineInstr &Start, MCRegister Reg, const SIRegisterInfo *TRI, MachineRegisterInfo &MRI, bool useLiveOuts=false, bool ignoreStart=false)
static MachineInstr * findPossibleVCMPVCMPXOptimization(MachineInstr &SaveExec, MCRegister Exec, const SIRegisterInfo *TRI, const SIInstrInfo *TII, MachineRegisterInfo &MRI)
Wrapper class representing physical registers. Should be passed by value.