LLVM  4.0.0
SIInsertSkips.cpp
Go to the documentation of this file.
1 //===-- SIInsertSkips.cpp - Use predicates for control flow ----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief This pass inserts branches on the 0 exec mask over divergent branches
12 /// branches when it's expected that jumping over the untaken control flow will
13 /// be cheaper than having every workitem no-op through it.
14 //
15 
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "SIInstrInfo.h"
19 #include "SIMachineFunctionInfo.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "si-insert-skips"
29 
30 namespace {
31 
32 static cl::opt<unsigned> SkipThresholdFlag(
33  "amdgpu-skip-threshold",
34  cl::desc("Number of instructions before jumping over divergent control flow"),
35  cl::init(12), cl::Hidden);
36 
37 class SIInsertSkips : public MachineFunctionPass {
38 private:
39  const SIRegisterInfo *TRI;
40  const SIInstrInfo *TII;
41  unsigned SkipThreshold;
42 
43  bool shouldSkip(const MachineBasicBlock &From,
44  const MachineBasicBlock &To) const;
45 
46  bool skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB);
47 
48  void kill(MachineInstr &MI);
49 
50  MachineBasicBlock *insertSkipBlock(MachineBasicBlock &MBB,
52 
53  bool skipMaskBranch(MachineInstr &MI, MachineBasicBlock &MBB);
54 
55 public:
56  static char ID;
57 
58  SIInsertSkips() :
59  MachineFunctionPass(ID), TRI(nullptr), TII(nullptr), SkipThreshold(0) { }
60 
61  bool runOnMachineFunction(MachineFunction &MF) override;
62 
63  StringRef getPassName() const override {
64  return "SI insert s_cbranch_execz instructions";
65  }
66 
67  void getAnalysisUsage(AnalysisUsage &AU) const override {
69  }
70 };
71 
72 } // End anonymous namespace
73 
74 char SIInsertSkips::ID = 0;
75 
76 INITIALIZE_PASS(SIInsertSkips, DEBUG_TYPE,
77  "SI insert s_cbranch_execz instructions", false, false)
78 
79 char &llvm::SIInsertSkipsPassID = SIInsertSkips::ID;
80 
81 static bool opcodeEmitsNoInsts(unsigned Opc) {
82  switch (Opc) {
83  case TargetOpcode::IMPLICIT_DEF:
84  case TargetOpcode::KILL:
85  case TargetOpcode::BUNDLE:
86  case TargetOpcode::CFI_INSTRUCTION:
88  case TargetOpcode::GC_LABEL:
89  case TargetOpcode::DBG_VALUE:
90  return true;
91  default:
92  return false;
93  }
94 }
95 
96 bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
97  const MachineBasicBlock &To) const {
98  if (From.succ_empty())
99  return false;
100 
101  unsigned NumInstr = 0;
102  const MachineFunction *MF = From.getParent();
103 
104  for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
105  MBBI != End && MBBI != ToI; ++MBBI) {
106  const MachineBasicBlock &MBB = *MBBI;
107 
108  for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
109  NumInstr < SkipThreshold && I != E; ++I) {
110  if (opcodeEmitsNoInsts(I->getOpcode()))
111  continue;
112 
113  // FIXME: Since this is required for correctness, this should be inserted
114  // during SILowerControlFlow.
115 
116  // When a uniform loop is inside non-uniform control flow, the branch
117  // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken
118  // when EXEC = 0. We should skip the loop lest it becomes infinite.
119  if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
120  I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
121  return true;
122 
123  if (I->isInlineAsm()) {
124  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
125  const char *AsmStr = I->getOperand(0).getSymbolName();
126 
127  // inlineasm length estimate is number of bytes assuming the longest
128  // instruction.
129  uint64_t MaxAsmSize = TII->getInlineAsmLength(AsmStr, *MAI);
130  NumInstr += MaxAsmSize / MAI->getMaxInstLength();
131  } else {
132  ++NumInstr;
133  }
134 
135  if (NumInstr >= SkipThreshold)
136  return true;
137  }
138  }
139 
140  return false;
141 }
142 
143 bool SIInsertSkips::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) {
144  MachineBasicBlock &MBB = *MI.getParent();
145  MachineFunction *MF = MBB.getParent();
146 
148  !shouldSkip(MBB, MBB.getParent()->back()))
149  return false;
150 
151  MachineBasicBlock *SkipBB = insertSkipBlock(MBB, MI.getIterator());
152 
153  const DebugLoc &DL = MI.getDebugLoc();
154 
155  // If the exec mask is non-zero, skip the next two instructions
156  BuildMI(&MBB, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
157  .addMBB(&NextBB);
158 
159  MachineBasicBlock::iterator Insert = SkipBB->begin();
160 
161  // Exec mask is zero: Export to NULL target...
162  BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP_DONE))
163  .addImm(0x09) // V_008DFC_SQ_EXP_NULL
164  .addReg(AMDGPU::VGPR0, RegState::Undef)
165  .addReg(AMDGPU::VGPR0, RegState::Undef)
166  .addReg(AMDGPU::VGPR0, RegState::Undef)
167  .addReg(AMDGPU::VGPR0, RegState::Undef)
168  .addImm(1) // vm
169  .addImm(0) // compr
170  .addImm(0); // en
171 
172  // ... and terminate wavefront.
173  BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
174 
175  return true;
176 }
177 
178 void SIInsertSkips::kill(MachineInstr &MI) {
179  MachineBasicBlock &MBB = *MI.getParent();
180  DebugLoc DL = MI.getDebugLoc();
181  const MachineOperand &Op = MI.getOperand(0);
182 
183 #ifndef NDEBUG
184  CallingConv::ID CallConv = MBB.getParent()->getFunction()->getCallingConv();
185  // Kill is only allowed in pixel / geometry shaders.
186  assert(CallConv == CallingConv::AMDGPU_PS ||
187  CallConv == CallingConv::AMDGPU_GS);
188 #endif
189  // Clear this thread from the exec mask if the operand is negative.
190  if (Op.isImm()) {
191  // Constant operand: Set exec mask to 0 or do nothing
192  if (Op.getImm() & 0x80000000) {
193  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
194  .addImm(0);
195  }
196  } else {
197  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32))
198  .addImm(0)
199  .addOperand(Op);
200  }
201 }
202 
203 MachineBasicBlock *SIInsertSkips::insertSkipBlock(
205  MachineFunction *MF = MBB.getParent();
206 
208  MachineFunction::iterator MBBI(MBB);
209  ++MBBI;
210 
211  MF->insert(MBBI, SkipBB);
212  MBB.addSuccessor(SkipBB);
213 
214  return SkipBB;
215 }
216 
217 // Returns true if a branch over the block was inserted.
218 bool SIInsertSkips::skipMaskBranch(MachineInstr &MI,
219  MachineBasicBlock &SrcMBB) {
220  MachineBasicBlock *DestBB = MI.getOperand(0).getMBB();
221 
222  if (!shouldSkip(**SrcMBB.succ_begin(), *DestBB))
223  return false;
224 
225  const DebugLoc &DL = MI.getDebugLoc();
226  MachineBasicBlock::iterator InsPt = std::next(MI.getIterator());
227 
228  BuildMI(SrcMBB, InsPt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
229  .addMBB(DestBB);
230 
231  return true;
232 }
233 
234 bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
235  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
236  TII = ST.getInstrInfo();
237  TRI = &TII->getRegisterInfo();
238  SkipThreshold = SkipThresholdFlag;
239 
240  bool HaveKill = false;
241  bool MadeChange = false;
242 
243  // Track depth of exec mask, divergent branches.
244  SmallVector<MachineBasicBlock *, 16> ExecBranchStack;
245 
247 
248  MachineBasicBlock *EmptyMBBAtEnd = nullptr;
249 
250  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
251  BI != BE; BI = NextBB) {
252  NextBB = std::next(BI);
253  MachineBasicBlock &MBB = *BI;
254 
255  if (!ExecBranchStack.empty() && ExecBranchStack.back() == &MBB) {
256  // Reached convergence point for last divergent branch.
257  ExecBranchStack.pop_back();
258  }
259 
260  if (HaveKill && ExecBranchStack.empty()) {
261  HaveKill = false;
262 
263  // TODO: Insert skip if exec is 0?
264  }
265 
267  for (I = MBB.begin(); I != MBB.end(); I = Next) {
268  Next = std::next(I);
269 
270  MachineInstr &MI = *I;
271 
272  switch (MI.getOpcode()) {
273  case AMDGPU::SI_MASK_BRANCH: {
274  ExecBranchStack.push_back(MI.getOperand(0).getMBB());
275  MadeChange |= skipMaskBranch(MI, MBB);
276  break;
277  }
278  case AMDGPU::S_BRANCH: {
279  // Optimize out branches to the next block.
280  // FIXME: Shouldn't this be handled by BranchFolding?
281  if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB()))
282  MI.eraseFromParent();
283  break;
284  }
285  case AMDGPU::SI_KILL_TERMINATOR: {
286  MadeChange = true;
287  kill(MI);
288 
289  if (ExecBranchStack.empty()) {
290  if (skipIfDead(MI, *NextBB)) {
291  NextBB = std::next(BI);
292  BE = MF.end();
293  Next = MBB.end();
294  }
295  } else {
296  HaveKill = true;
297  }
298 
299  MI.eraseFromParent();
300  break;
301  }
302  case AMDGPU::SI_RETURN: {
303  // FIXME: Should move somewhere else
305 
306  // Graphics shaders returning non-void shouldn't contain S_ENDPGM,
307  // because external bytecode will be appended at the end.
308  if (BI != --MF.end() || I != MBB.getFirstTerminator()) {
309  // SI_RETURN is not the last instruction. Add an empty block at
310  // the end and jump there.
311  if (!EmptyMBBAtEnd) {
312  EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
313  MF.insert(MF.end(), EmptyMBBAtEnd);
314  }
315 
316  MBB.addSuccessor(EmptyMBBAtEnd);
317  BuildMI(*BI, I, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
318  .addMBB(EmptyMBBAtEnd);
319  I->eraseFromParent();
320  }
321  }
322  default:
323  break;
324  }
325  }
326  }
327 
328  return MadeChange;
329 }
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
AMDGPU specific subclass of TargetSubtarget.
MachineBasicBlock * getMBB() const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
const SIInstrInfo * getInstrInfo() const override
A debug info location.
Definition: DebugLoc.h:34
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:165
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
#define DEBUG_TYPE
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
MachineBasicBlock * MBB
int64_t getImm() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:131
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:57
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
char & SIInsertSkipsPassID
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
Represent the analysis usage information of a pass.
static const unsigned End
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
self_iterator getIterator()
Definition: ilist_node.h:81
Calling convention used for Mesa pixel shaders.
Definition: CallingConv.h:188
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
Definition: ISDOpcodes.h:594
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:36
Iterator for intrusive lists based on ilist_node.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Calling convention used for Mesa geometry shaders.
Definition: CallingConv.h:185
const SIRegisterInfo * getRegisterInfo() const override
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
unsigned getInlineAsmLength(const char *Str, const MCAsmInfo &MAI) const override
Measure the specified inline asm to determine an approximation of its length.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:250
Representation of each machine instruction.
Definition: MachineInstr.h:52
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
#define I(x, y, z)
Definition: MD5.cpp:54
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
unsigned getMaxInstLength() const
Definition: MCAsmInfo.h:462
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
const MachineBasicBlock & back() const
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
static INITIALIZE_PASS(SIInsertSkips, DEBUG_TYPE,"SI insert s_cbranch_execz instructions", false, false) char &llvm bool opcodeEmitsNoInsts(unsigned Opc)
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.