21 #define DEBUG_TYPE "si-optimize-exec-masking"
37 return "SI optimize exec mask operations";
49 "SI optimize exec mask operations",
false,
false)
54 char SIOptimizeExecMasking::
ID = 0;
60 switch (MI.getOpcode()) {
62 case AMDGPU::S_MOV_B64:
63 case AMDGPU::S_MOV_B64_term: {
66 return MI.getOperand(0).
getReg();
70 return AMDGPU::NoRegister;
77 case AMDGPU::S_MOV_B64: {
83 case AMDGPU::S_MOV_B64_term:
87 return AMDGPU::NoRegister;
92 case AMDGPU::S_AND_B64:
93 return AMDGPU::S_AND_SAVEEXEC_B64;
94 case AMDGPU::S_OR_B64:
95 return AMDGPU::S_OR_SAVEEXEC_B64;
96 case AMDGPU::S_XOR_B64:
97 return AMDGPU::S_XOR_SAVEEXEC_B64;
98 case AMDGPU::S_ANDN2_B64:
99 return AMDGPU::S_ANDN2_SAVEEXEC_B64;
100 case AMDGPU::S_ORN2_B64:
101 return AMDGPU::S_ORN2_SAVEEXEC_B64;
102 case AMDGPU::S_NAND_B64:
103 return AMDGPU::S_NAND_SAVEEXEC_B64;
104 case AMDGPU::S_NOR_B64:
105 return AMDGPU::S_NOR_SAVEEXEC_B64;
106 case AMDGPU::S_XNOR_B64:
107 return AMDGPU::S_XNOR_SAVEEXEC_B64;
109 return AMDGPU::INSTRUCTION_LIST_END;
118 case AMDGPU::S_MOV_B64_term: {
119 MI.
setDesc(TII.get(AMDGPU::COPY));
122 case AMDGPU::S_XOR_B64_term: {
125 MI.
setDesc(TII.get(AMDGPU::S_XOR_B64));
128 case AMDGPU::S_ANDN2_B64_term: {
131 MI.
setDesc(TII.get(AMDGPU::S_ANDN2_B64));
143 for (; I !=
E; ++
I) {
144 if (!I->isTerminator())
158 unsigned CopyToExec) {
159 const unsigned InstLimit = 25;
162 for (
unsigned N = 0;
N <= InstLimit && I !=
E; ++
I, ++
N) {
164 if (CopyFromExec != AMDGPU::NoRegister)
176 if (Succ->isLiveIn(Reg))
183 bool SIOptimizeExecMasking::runOnMachineFunction(
MachineFunction &MF) {
206 if (CopyToExec == AMDGPU::NoRegister)
210 auto CopyToExecInst = &*
I;
212 if (CopyFromExecInst == E)
217 DEBUG(
dbgs() <<
"Exec copy source register is live out\n");
221 unsigned CopyFromExec = CopyFromExecInst->getOperand(0).getReg();
226 = std::next(CopyFromExecInst->getIterator()), JE = I->getIterator();
229 DEBUG(
dbgs() <<
"exec read prevents saveexec: " << *J <<
'\n');
232 SaveExecInst =
nullptr;
236 if (J->modifiesRegister(CopyToExec, TRI)) {
238 DEBUG(
dbgs() <<
"Multiple instructions modify "
239 <<
PrintReg(CopyToExec, TRI) <<
'\n');
240 SaveExecInst =
nullptr;
245 if (SaveExecOp == AMDGPU::INSTRUCTION_LIST_END)
248 if (J->readsRegister(CopyFromExec, TRI)) {
250 DEBUG(
dbgs() <<
"Found save exec op: " << *SaveExecInst <<
'\n');
253 DEBUG(
dbgs() <<
"Instruction does not read exec copy: " << *J <<
'\n');
259 assert(SaveExecInst != &*J);
267 DEBUG(
dbgs() <<
"Insert save exec op: " << *SaveExecInst <<
'\n');
274 if (Src0.
isReg() && Src0.
getReg() == CopyFromExec) {
276 }
else if (Src1.
isReg() && Src1.
getReg() == CopyFromExec) {
284 CopyFromExecInst->eraseFromParent();
291 .addReg(OtherOp->
getReg());
294 CopyToExecInst->eraseFromParent();
297 OtherInst->substituteRegister(CopyToExec, AMDGPU::EXEC,
298 AMDGPU::NoSubRegister, *TRI);
void push_back(const T &Elt)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
INITIALIZE_PASS_BEGIN(SIOptimizeExecMasking, DEBUG_TYPE,"SI optimize exec mask operations", false, false) INITIALIZE_PASS_END(SIOptimizeExecMasking
const SIInstrInfo * getInstrInfo() const override
static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI)
static MachineBasicBlock::reverse_iterator fixTerminators(const SIInstrInfo &TII, MachineBasicBlock &MBB)
iterator_range< succ_iterator > successors()
#define INITIALIZE_PASS_DEPENDENCY(depName)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i...
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Reg
All possible values of the reg field in the ModR/M byte.
static unsigned isCopyFromExec(const MachineInstr &MI)
If MI is a copy from exec, return the register copied to.
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
SI optimize exec mask operations
Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubRegIdx=0)
Prints virtual and physical registers with or without a TRI instance.
reverse_iterator rbegin()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
SI optimize exec mask false
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineOperand & getOperand(unsigned i) const
Represent the analysis usage information of a pass.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
self_iterator getIterator()
static MachineBasicBlock::reverse_iterator findExecCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::reverse_iterator I, unsigned CopyToExec)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
char & SIOptimizeExecMaskingID
static unsigned isCopyToExec(const MachineInstr &MI)
If MI is a copy to exec, return the register copied from.
bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
const SIRegisterInfo * getRegisterInfo() const override
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Representation of each machine instruction.
Interface definition for SIInstrInfo.
unsigned getReg() const
getReg - Returns the register number.
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getSaveExecOp(unsigned Opc)
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
StringRef - Represent a constant reference to a string, i.e.