LLVM  4.0.0
SIOptimizeExecMasking.cpp
Go to the documentation of this file.
1 //===-- SIOptimizeExecMasking.cpp -----------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPU.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIInstrInfo.h"
17 #include "llvm/Support/Debug.h"
18 
19 using namespace llvm;
20 
21 #define DEBUG_TYPE "si-optimize-exec-masking"
22 
23 namespace {
24 
25 class SIOptimizeExecMasking : public MachineFunctionPass {
26 public:
27  static char ID;
28 
29 public:
30  SIOptimizeExecMasking() : MachineFunctionPass(ID) {
32  }
33 
34  bool runOnMachineFunction(MachineFunction &MF) override;
35 
36  StringRef getPassName() const override {
37  return "SI optimize exec mask operations";
38  }
39 
40  void getAnalysisUsage(AnalysisUsage &AU) const override {
41  AU.setPreservesCFG();
43  }
44 };
45 
46 } // End anonymous namespace.
47 
48 INITIALIZE_PASS_BEGIN(SIOptimizeExecMasking, DEBUG_TYPE,
49  "SI optimize exec mask operations", false, false)
51 INITIALIZE_PASS_END(SIOptimizeExecMasking, DEBUG_TYPE,
52  "SI optimize exec mask operations", false, false)
53 
54 char SIOptimizeExecMasking::ID = 0;
55 
56 char &llvm::SIOptimizeExecMaskingID = SIOptimizeExecMasking::ID;
57 
58 /// If \p MI is a copy from exec, return the register copied to.
59 static unsigned isCopyFromExec(const MachineInstr &MI) {
60  switch (MI.getOpcode()) {
61  case AMDGPU::COPY:
62  case AMDGPU::S_MOV_B64:
63  case AMDGPU::S_MOV_B64_term: {
64  const MachineOperand &Src = MI.getOperand(1);
65  if (Src.isReg() && Src.getReg() == AMDGPU::EXEC)
66  return MI.getOperand(0).getReg();
67  }
68  }
69 
70  return AMDGPU::NoRegister;
71 }
72 
73 /// If \p MI is a copy to exec, return the register copied from.
74 static unsigned isCopyToExec(const MachineInstr &MI) {
75  switch (MI.getOpcode()) {
76  case AMDGPU::COPY:
77  case AMDGPU::S_MOV_B64: {
78  const MachineOperand &Dst = MI.getOperand(0);
79  if (Dst.isReg() && Dst.getReg() == AMDGPU::EXEC)
80  return MI.getOperand(1).getReg();
81  break;
82  }
83  case AMDGPU::S_MOV_B64_term:
84  llvm_unreachable("should have been replaced");
85  }
86 
87  return AMDGPU::NoRegister;
88 }
89 
90 static unsigned getSaveExecOp(unsigned Opc) {
91  switch (Opc) {
92  case AMDGPU::S_AND_B64:
93  return AMDGPU::S_AND_SAVEEXEC_B64;
94  case AMDGPU::S_OR_B64:
95  return AMDGPU::S_OR_SAVEEXEC_B64;
96  case AMDGPU::S_XOR_B64:
97  return AMDGPU::S_XOR_SAVEEXEC_B64;
98  case AMDGPU::S_ANDN2_B64:
99  return AMDGPU::S_ANDN2_SAVEEXEC_B64;
100  case AMDGPU::S_ORN2_B64:
101  return AMDGPU::S_ORN2_SAVEEXEC_B64;
102  case AMDGPU::S_NAND_B64:
103  return AMDGPU::S_NAND_SAVEEXEC_B64;
104  case AMDGPU::S_NOR_B64:
105  return AMDGPU::S_NOR_SAVEEXEC_B64;
106  case AMDGPU::S_XNOR_B64:
107  return AMDGPU::S_XNOR_SAVEEXEC_B64;
108  default:
109  return AMDGPU::INSTRUCTION_LIST_END;
110  }
111 }
112 
113 // These are only terminators to get correct spill code placement during
114 // register allocation, so turn them back into normal instructions. Only one of
115 // these is expected per block.
117  switch (MI.getOpcode()) {
118  case AMDGPU::S_MOV_B64_term: {
119  MI.setDesc(TII.get(AMDGPU::COPY));
120  return true;
121  }
122  case AMDGPU::S_XOR_B64_term: {
123  // This is only a terminator to get the correct spill code placement during
124  // register allocation.
125  MI.setDesc(TII.get(AMDGPU::S_XOR_B64));
126  return true;
127  }
128  case AMDGPU::S_ANDN2_B64_term: {
129  // This is only a terminator to get the correct spill code placement during
130  // register allocation.
131  MI.setDesc(TII.get(AMDGPU::S_ANDN2_B64));
132  return true;
133  }
134  default:
135  return false;
136  }
137 }
138 
140  const SIInstrInfo &TII,
143  for (; I != E; ++I) {
144  if (!I->isTerminator())
145  return I;
146 
147  if (removeTerminatorBit(TII, *I))
148  return I;
149  }
150 
151  return E;
152 }
153 
155  const SIInstrInfo &TII,
158  unsigned CopyToExec) {
159  const unsigned InstLimit = 25;
160 
161  auto E = MBB.rend();
162  for (unsigned N = 0; N <= InstLimit && I != E; ++I, ++N) {
163  unsigned CopyFromExec = isCopyFromExec(*I);
164  if (CopyFromExec != AMDGPU::NoRegister)
165  return I;
166  }
167 
168  return E;
169 }
170 
171 // XXX - Seems LivePhysRegs doesn't work correctly since it will incorrectly
172 // repor tthe register as unavailable because a super-register with a lane mask
173 // as unavailable.
174 static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) {
175  for (MachineBasicBlock *Succ : MBB.successors()) {
176  if (Succ->isLiveIn(Reg))
177  return true;
178  }
179 
180  return false;
181 }
182 
183 bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
184  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
185  const SIRegisterInfo *TRI = ST.getRegisterInfo();
186  const SIInstrInfo *TII = ST.getInstrInfo();
187 
188  // Optimize sequences emitted for control flow lowering. They are originally
189  // emitted as the separate operations because spill code may need to be
190  // inserted for the saved copy of exec.
191  //
192  // x = copy exec
193  // z = s_<op>_b64 x, y
194  // exec = copy z
195  // =>
196  // x = s_<op>_saveexec_b64 y
197  //
198 
199  for (MachineBasicBlock &MBB : MF) {
202  if (I == E)
203  continue;
204 
205  unsigned CopyToExec = isCopyToExec(*I);
206  if (CopyToExec == AMDGPU::NoRegister)
207  continue;
208 
209  // Scan backwards to find the def.
210  auto CopyToExecInst = &*I;
211  auto CopyFromExecInst = findExecCopy(*TII, MBB, I, CopyToExec);
212  if (CopyFromExecInst == E)
213  continue;
214 
215  if (isLiveOut(MBB, CopyToExec)) {
216  // The copied register is live out and has a second use in another block.
217  DEBUG(dbgs() << "Exec copy source register is live out\n");
218  continue;
219  }
220 
221  unsigned CopyFromExec = CopyFromExecInst->getOperand(0).getReg();
222  MachineInstr *SaveExecInst = nullptr;
223  SmallVector<MachineInstr *, 4> OtherUseInsts;
224 
226  = std::next(CopyFromExecInst->getIterator()), JE = I->getIterator();
227  J != JE; ++J) {
228  if (SaveExecInst && J->readsRegister(AMDGPU::EXEC, TRI)) {
229  DEBUG(dbgs() << "exec read prevents saveexec: " << *J << '\n');
230  // Make sure this is inserted after any VALU ops that may have been
231  // scheduled in between.
232  SaveExecInst = nullptr;
233  break;
234  }
235 
236  if (J->modifiesRegister(CopyToExec, TRI)) {
237  if (SaveExecInst) {
238  DEBUG(dbgs() << "Multiple instructions modify "
239  << PrintReg(CopyToExec, TRI) << '\n');
240  SaveExecInst = nullptr;
241  break;
242  }
243 
244  unsigned SaveExecOp = getSaveExecOp(J->getOpcode());
245  if (SaveExecOp == AMDGPU::INSTRUCTION_LIST_END)
246  break;
247 
248  if (J->readsRegister(CopyFromExec, TRI)) {
249  SaveExecInst = &*J;
250  DEBUG(dbgs() << "Found save exec op: " << *SaveExecInst << '\n');
251  continue;
252  } else {
253  DEBUG(dbgs() << "Instruction does not read exec copy: " << *J << '\n');
254  break;
255  }
256  }
257 
258  if (SaveExecInst && J->readsRegister(CopyToExec, TRI)) {
259  assert(SaveExecInst != &*J);
260  OtherUseInsts.push_back(&*J);
261  }
262  }
263 
264  if (!SaveExecInst)
265  continue;
266 
267  DEBUG(dbgs() << "Insert save exec op: " << *SaveExecInst << '\n');
268 
269  MachineOperand &Src0 = SaveExecInst->getOperand(1);
270  MachineOperand &Src1 = SaveExecInst->getOperand(2);
271 
272  MachineOperand *OtherOp = nullptr;
273 
274  if (Src0.isReg() && Src0.getReg() == CopyFromExec) {
275  OtherOp = &Src1;
276  } else if (Src1.isReg() && Src1.getReg() == CopyFromExec) {
277  if (!SaveExecInst->isCommutable())
278  break;
279 
280  OtherOp = &Src0;
281  } else
282  llvm_unreachable("unexpected");
283 
284  CopyFromExecInst->eraseFromParent();
285 
286  auto InsPt = SaveExecInst->getIterator();
287  const DebugLoc &DL = SaveExecInst->getDebugLoc();
288 
289  BuildMI(MBB, InsPt, DL, TII->get(getSaveExecOp(SaveExecInst->getOpcode())),
290  CopyFromExec)
291  .addReg(OtherOp->getReg());
292  SaveExecInst->eraseFromParent();
293 
294  CopyToExecInst->eraseFromParent();
295 
296  for (MachineInstr *OtherInst : OtherUseInsts) {
297  OtherInst->substituteRegister(CopyToExec, AMDGPU::EXEC,
298  AMDGPU::NoSubRegister, *TRI);
299  }
300  }
301 
302  return true;
303 
304 }
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
INITIALIZE_PASS_BEGIN(SIOptimizeExecMasking, DEBUG_TYPE,"SI optimize exec mask operations", false, false) INITIALIZE_PASS_END(SIOptimizeExecMasking
const SIInstrInfo * getInstrInfo() const override
A debug info location.
Definition: DebugLoc.h:34
static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI)
static MachineBasicBlock::reverse_iterator fixTerminators(const SIInstrInfo &TII, MachineBasicBlock &MBB)
iterator_range< succ_iterator > successors()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i...
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Reg
All possible values of the reg field in the ModR/M byte.
static unsigned isCopyFromExec(const MachineInstr &MI)
If MI is a copy from exec, return the register copied to.
MachineBasicBlock * MBB
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
SI optimize exec mask operations
#define DEBUG_TYPE
Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubRegIdx=0)
Prints virtual and physical registers with or without a TRI instance.
reverse_iterator rend()
reverse_iterator rbegin()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
SI optimize exec mask false
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
Represent the analysis usage information of a pass.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
self_iterator getIterator()
Definition: ilist_node.h:81
static MachineBasicBlock::reverse_iterator findExecCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::reverse_iterator I, unsigned CopyToExec)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
char & SIOptimizeExecMaskingID
static unsigned isCopyToExec(const MachineInstr &MI)
If MI is a copy to exec, return the register copied from.
bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
Definition: MachineInstr.h:865
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
const SIRegisterInfo * getRegisterInfo() const override
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:276
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:250
Representation of each machine instruction.
Definition: MachineInstr.h:52
Interface definition for SIInstrInfo.
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
unsigned getReg() const
getReg - Returns the register number.
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:633
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
aarch64 promote const
static unsigned getSaveExecOp(unsigned Opc)
#define DEBUG(X)
Definition: Debug.h:100
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47