LLVM  3.7.0
SIInsertWaits.cpp
Go to the documentation of this file.
1 //===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Insert wait instructions for memory reads and writes.
12 ///
13 /// Memory reads and writes are issued asynchronously, so we need to insert
14 /// S_WAITCNT instructions when we want to access any of their results or
15 /// overwrite any register that's used asynchronously.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "AMDGPU.h"
20 #include "AMDGPUSubtarget.h"
21 #include "SIDefines.h"
22 #include "SIInstrInfo.h"
23 #include "SIMachineFunctionInfo.h"
28 
29 using namespace llvm;
30 
31 namespace {
32 
33 /// \brief One variable for each of the hardware counters
34 typedef union {
35  struct {
36  unsigned VM;
37  unsigned EXP;
38  unsigned LGKM;
39  } Named;
40  unsigned Array[3];
41 
42 } Counters;
43 
44 typedef enum {
45  OTHER,
46  SMEM,
47  VMEM
48 } InstType;
49 
50 typedef Counters RegCounters[512];
51 typedef std::pair<unsigned, unsigned> RegInterval;
52 
53 class SIInsertWaits : public MachineFunctionPass {
54 
55 private:
56  static char ID;
57  const SIInstrInfo *TII;
58  const SIRegisterInfo *TRI;
59  const MachineRegisterInfo *MRI;
60 
61  /// \brief Constant hardware limits
62  static const Counters WaitCounts;
63 
64  /// \brief Constant zero value
65  static const Counters ZeroCounts;
66 
67  /// \brief Counter values we have already waited on.
68  Counters WaitedOn;
69 
70  /// \brief Counter values for last instruction issued.
71  Counters LastIssued;
72 
73  /// \brief Registers used by async instructions.
74  RegCounters UsedRegs;
75 
76  /// \brief Registers defined by async instructions.
77  RegCounters DefinedRegs;
78 
79  /// \brief Different export instruction types seen since last wait.
80  unsigned ExpInstrTypesSeen;
81 
82  /// \brief Type of the last opcode.
83  InstType LastOpcodeType;
84 
85  bool LastInstWritesM0;
86 
87  /// \brief Get increment/decrement amount for this instruction.
88  Counters getHwCounts(MachineInstr &MI);
89 
90  /// \brief Is operand relevant for async execution?
91  bool isOpRelevant(MachineOperand &Op);
92 
93  /// \brief Get register interval an operand affects.
94  RegInterval getRegInterval(MachineOperand &Op);
95 
96  /// \brief Handle instructions async components
97  void pushInstruction(MachineBasicBlock &MBB,
99 
100  /// \brief Insert the actual wait instruction
101  bool insertWait(MachineBasicBlock &MBB,
103  const Counters &Counts);
104 
105  /// \brief Do we need def2def checks?
106  bool unorderedDefines(MachineInstr &MI);
107 
108  /// \brief Resolve all operand dependencies to counter requirements
109  Counters handleOperands(MachineInstr &MI);
110 
111  /// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
112  void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
113 
114 public:
115  SIInsertWaits(TargetMachine &tm) :
117  TII(nullptr),
118  TRI(nullptr),
119  ExpInstrTypesSeen(0) { }
120 
121  bool runOnMachineFunction(MachineFunction &MF) override;
122 
123  const char *getPassName() const override {
124  return "SI insert wait instructions";
125  }
126 
127 };
128 
129 } // End anonymous namespace
130 
131 char SIInsertWaits::ID = 0;
132 
133 const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } };
134 const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
135 
137  return new SIInsertWaits(tm);
138 }
139 
140 Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
141 
142  uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
143  Counters Result;
144 
145  Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
146 
147  // Only consider stores or EXP for EXP_CNT
148  Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
149  (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
150 
151  // LGKM may uses larger values
152  if (TSFlags & SIInstrFlags::LGKM_CNT) {
153 
154  if (TII->isSMRD(MI.getOpcode())) {
155 
156  MachineOperand &Op = MI.getOperand(0);
157  assert(Op.isReg() && "First LGKM operand must be a register!");
158 
159  unsigned Reg = Op.getReg();
160  unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
161  Result.Named.LGKM = Size > 4 ? 2 : 1;
162 
163  } else {
164  // DS
165  Result.Named.LGKM = 1;
166  }
167 
168  } else {
169  Result.Named.LGKM = 0;
170  }
171 
172  return Result;
173 }
174 
175 bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
176 
177  // Constants are always irrelevant
178  if (!Op.isReg())
179  return false;
180 
181  // Defines are always relevant
182  if (Op.isDef())
183  return true;
184 
185  // For exports all registers are relevant
186  MachineInstr &MI = *Op.getParent();
187  if (MI.getOpcode() == AMDGPU::EXP)
188  return true;
189 
190  // For stores the stored value is also relevant
191  if (!MI.getDesc().mayStore())
192  return false;
193 
194  // Check if this operand is the value being stored.
195  // Special case for DS instructions, since the address
196  // operand comes before the value operand and it may have
197  // multiple data operands.
198 
199  if (TII->isDS(MI.getOpcode())) {
200  MachineOperand *Data = TII->getNamedOperand(MI, AMDGPU::OpName::data);
201  if (Data && Op.isIdenticalTo(*Data))
202  return true;
203 
204  MachineOperand *Data0 = TII->getNamedOperand(MI, AMDGPU::OpName::data0);
205  if (Data0 && Op.isIdenticalTo(*Data0))
206  return true;
207 
208  MachineOperand *Data1 = TII->getNamedOperand(MI, AMDGPU::OpName::data1);
209  if (Data1 && Op.isIdenticalTo(*Data1))
210  return true;
211 
212  return false;
213  }
214 
215  // NOTE: This assumes that the value operand is before the
216  // address operand, and that there is only one value operand.
218  E = MI.operands_end(); I != E; ++I) {
219 
220  if (I->isReg() && I->isUse())
221  return Op.isIdenticalTo(*I);
222  }
223 
224  return false;
225 }
226 
227 RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
228 
229  if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
230  return std::make_pair(0, 0);
231 
232  unsigned Reg = Op.getReg();
233  unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
234 
235  assert(Size >= 4);
236 
237  RegInterval Result;
238  Result.first = TRI->getEncodingValue(Reg);
239  Result.second = Result.first + Size / 4;
240 
241  return Result;
242 }
243 
244 void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
246 
247  // Get the hardware counter increments and sum them up
248  Counters Increment = getHwCounts(*I);
249  unsigned Sum = 0;
250 
251  for (unsigned i = 0; i < 3; ++i) {
252  LastIssued.Array[i] += Increment.Array[i];
253  Sum += Increment.Array[i];
254  }
255 
256  // If we don't increase anything then that's it
257  if (Sum == 0) {
258  LastOpcodeType = OTHER;
259  return;
260  }
261 
262  if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
264  // Any occurence of consecutive VMEM or SMEM instructions forms a VMEM
265  // or SMEM clause, respectively.
266  //
267  // The temporary workaround is to break the clauses with S_NOP.
268  //
269  // The proper solution would be to allocate registers such that all source
270  // and destination registers don't overlap, e.g. this is illegal:
271  // r0 = load r2
272  // r2 = load r0
273  if ((LastOpcodeType == SMEM && TII->isSMRD(I->getOpcode())) ||
274  (LastOpcodeType == VMEM && Increment.Named.VM)) {
275  // Insert a NOP to break the clause.
276  BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
277  .addImm(0);
278  LastInstWritesM0 = false;
279  }
280 
281  if (TII->isSMRD(I->getOpcode()))
282  LastOpcodeType = SMEM;
283  else if (Increment.Named.VM)
284  LastOpcodeType = VMEM;
285  }
286 
287  // Remember which export instructions we have seen
288  if (Increment.Named.EXP) {
289  ExpInstrTypesSeen |= I->getOpcode() == AMDGPU::EXP ? 1 : 2;
290  }
291 
292  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
293 
294  MachineOperand &Op = I->getOperand(i);
295  if (!isOpRelevant(Op))
296  continue;
297 
298  RegInterval Interval = getRegInterval(Op);
299  for (unsigned j = Interval.first; j < Interval.second; ++j) {
300 
301  // Remember which registers we define
302  if (Op.isDef())
303  DefinedRegs[j] = LastIssued;
304 
305  // and which one we are using
306  if (Op.isUse())
307  UsedRegs[j] = LastIssued;
308  }
309  }
310 }
311 
312 bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
314  const Counters &Required) {
315 
316  // End of program? No need to wait on anything
317  if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
318  return false;
319 
320  // Figure out if the async instructions execute in order
321  bool Ordered[3];
322 
323  // VM_CNT is always ordered
324  Ordered[0] = true;
325 
326  // EXP_CNT is unordered if we have both EXP & VM-writes
327  Ordered[1] = ExpInstrTypesSeen == 3;
328 
329  // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
330  Ordered[2] = false;
331 
332  // The values we are going to put into the S_WAITCNT instruction
333  Counters Counts = WaitCounts;
334 
335  // Do we really need to wait?
336  bool NeedWait = false;
337 
338  for (unsigned i = 0; i < 3; ++i) {
339 
340  if (Required.Array[i] <= WaitedOn.Array[i])
341  continue;
342 
343  NeedWait = true;
344 
345  if (Ordered[i]) {
346  unsigned Value = LastIssued.Array[i] - Required.Array[i];
347 
348  // Adjust the value to the real hardware possibilities.
349  Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
350 
351  } else
352  Counts.Array[i] = 0;
353 
354  // Remember on what we have waited on.
355  WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
356  }
357 
358  if (!NeedWait)
359  return false;
360 
361  // Reset EXP_CNT instruction types
362  if (Counts.Named.EXP == 0)
363  ExpInstrTypesSeen = 0;
364 
365  // Build the wait instruction
366  BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
367  .addImm((Counts.Named.VM & 0xF) |
368  ((Counts.Named.EXP & 0x7) << 4) |
369  ((Counts.Named.LGKM & 0x7) << 8));
370 
371  LastOpcodeType = OTHER;
372  LastInstWritesM0 = false;
373  return true;
374 }
375 
376 /// \brief helper function for handleOperands
377 static void increaseCounters(Counters &Dst, const Counters &Src) {
378 
379  for (unsigned i = 0; i < 3; ++i)
380  Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
381 }
382 
383 Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
384 
385  Counters Result = ZeroCounts;
386 
387  // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
388  // but we also want to wait for any other outstanding transfers before
389  // signalling other hardware blocks
390  if (MI.getOpcode() == AMDGPU::S_SENDMSG)
391  return LastIssued;
392 
393  // For each register affected by this
394  // instruction increase the result sequence
395  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
396 
397  MachineOperand &Op = MI.getOperand(i);
398  RegInterval Interval = getRegInterval(Op);
399  for (unsigned j = Interval.first; j < Interval.second; ++j) {
400 
401  if (Op.isDef()) {
402  increaseCounters(Result, UsedRegs[j]);
403  increaseCounters(Result, DefinedRegs[j]);
404  }
405 
406  if (Op.isUse())
407  increaseCounters(Result, DefinedRegs[j]);
408  }
409  }
410 
411  return Result;
412 }
413 
414 void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
416  if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <
418  return;
419 
420  // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
421  if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
422  BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
423  LastInstWritesM0 = false;
424  return;
425  }
426 
427  // Set whether this instruction sets M0
428  LastInstWritesM0 = false;
429 
430  unsigned NumOperands = I->getNumOperands();
431  for (unsigned i = 0; i < NumOperands; i++) {
432  const MachineOperand &Op = I->getOperand(i);
433 
434  if (Op.isReg() && Op.isDef() && Op.getReg() == AMDGPU::M0)
435  LastInstWritesM0 = true;
436  }
437 }
438 
439 // FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
440 // around other non-memory instructions.
441 bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
442  bool Changes = false;
443 
444  TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
445  TRI =
446  static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
447 
448  MRI = &MF.getRegInfo();
449 
450  WaitedOn = ZeroCounts;
451  LastIssued = ZeroCounts;
452  LastOpcodeType = OTHER;
453  LastInstWritesM0 = false;
454 
455  memset(&UsedRegs, 0, sizeof(UsedRegs));
456  memset(&DefinedRegs, 0, sizeof(DefinedRegs));
457 
458  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
459  BI != BE; ++BI) {
460 
461  MachineBasicBlock &MBB = *BI;
462  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
463  I != E; ++I) {
464 
465  // Wait for everything before a barrier.
466  if (I->getOpcode() == AMDGPU::S_BARRIER)
467  Changes |= insertWait(MBB, I, LastIssued);
468  else
469  Changes |= insertWait(MBB, I, handleOperands(*I));
470 
471  pushInstruction(MBB, I);
472  handleSendMsg(MBB, I);
473  }
474 
475  // Wait for everything at the end of the MBB
476  Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
477  }
478 
479  return Changes;
480 }
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
mop_iterator operands_end()
Definition: MachineInstr.h:290
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
AMDGPU specific subclass of TargetSubtarget.
Interval Class - An Interval is a set of nodes defined such that every node in the interval has all o...
Definition: Interval.h:37
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:356
iterator getFirstTerminator()
getFirstTerminator - returns an iterator to the first terminator instruction of this basic block...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:264
A debug info location.
Definition: DebugLoc.h:34
FunctionPass * createSIInsertWaits(TargetMachine &tm)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Reg
All possible values of the reg field in the ModR/M byte.
Number of individual test Apply this number of consecutive mutations to each input exit after the first new interesting input is found the minimized corpus is saved into the first input directory Number of jobs to run If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:271
static void increaseCounters(Counters &Dst, const Counters &Src)
helper function for handleOperands
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
bundle_iterator< MachineInstr, instr_iterator > iterator
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
MachineOperand class - Representation of each machine instruction operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:51
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
#define I(x, y, z)
Definition: MD5.cpp:54
unsigned getReg() const
getReg - Returns the register number.
virtual const TargetInstrInfo * getInstrInfo() const
LLVM Value Representation.
Definition: Value.h:69
mop_iterator operands_begin()
Definition: MachineInstr.h:289
BasicBlockListType::iterator iterator
Primary interface to the complete machine description for the target machine.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
bool isIdenticalTo(const MachineOperand &Other) const
isIdenticalTo - Return true if this operand is identical to the specified operand.
InstType