LLVM  10.0.0svn
GCNHazardRecognizer.cpp
Go to the documentation of this file.
1 //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements hazard recognizers for scheduling on GCN processors.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "GCNHazardRecognizer.h"
14 #include "AMDGPUSubtarget.h"
15 #include "SIDefines.h"
16 #include "SIInstrInfo.h"
17 #include "SIRegisterInfo.h"
19 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm/MC/MCInstrDesc.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <limits>
31 #include <set>
32 #include <vector>
33 
34 using namespace llvm;
35 
36 //===----------------------------------------------------------------------===//
37 // Hazard Recoginizer Implementation
38 //===----------------------------------------------------------------------===//
39 
41  IsHazardRecognizerMode(false),
42  CurrCycleInstr(nullptr),
43  MF(MF),
44  ST(MF.getSubtarget<GCNSubtarget>()),
45  TII(*ST.getInstrInfo()),
46  TRI(TII.getRegisterInfo()),
47  ClauseUses(TRI.getNumRegUnits()),
48  ClauseDefs(TRI.getNumRegUnits()) {
49  MaxLookAhead = MF.getRegInfo().isPhysRegUsed(AMDGPU::AGPR0) ? 18 : 5;
50  TSchedModel.init(&ST);
51 }
52 
55 }
56 
58  CurrCycleInstr = MI;
59 }
60 
61 static bool isDivFMas(unsigned Opcode) {
62  return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64;
63 }
64 
65 static bool isSGetReg(unsigned Opcode) {
66  return Opcode == AMDGPU::S_GETREG_B32;
67 }
68 
69 static bool isSSetReg(unsigned Opcode) {
70  return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32;
71 }
72 
73 static bool isRWLane(unsigned Opcode) {
74  return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32;
75 }
76 
77 static bool isRFE(unsigned Opcode) {
78  return Opcode == AMDGPU::S_RFE_B64;
79 }
80 
81 static bool isSMovRel(unsigned Opcode) {
82  switch (Opcode) {
83  case AMDGPU::S_MOVRELS_B32:
84  case AMDGPU::S_MOVRELS_B64:
85  case AMDGPU::S_MOVRELD_B32:
86  case AMDGPU::S_MOVRELD_B64:
87  return true;
88  default:
89  return false;
90  }
91 }
92 
93 static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
94  const MachineInstr &MI) {
95  if (TII.isAlwaysGDS(MI.getOpcode()))
96  return true;
97 
98  switch (MI.getOpcode()) {
99  case AMDGPU::S_SENDMSG:
100  case AMDGPU::S_SENDMSGHALT:
101  case AMDGPU::S_TTRACEDATA:
102  return true;
103  // These DS opcodes don't support GDS.
104  case AMDGPU::DS_NOP:
105  case AMDGPU::DS_PERMUTE_B32:
106  case AMDGPU::DS_BPERMUTE_B32:
107  return false;
108  default:
109  if (TII.isDS(MI.getOpcode())) {
110  int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
111  AMDGPU::OpName::gds);
112  if (MI.getOperand(GDS).getImm())
113  return true;
114  }
115  return false;
116  }
117 }
118 
119 static bool isPermlane(const MachineInstr &MI) {
120  unsigned Opcode = MI.getOpcode();
121  return Opcode == AMDGPU::V_PERMLANE16_B32 ||
122  Opcode == AMDGPU::V_PERMLANEX16_B32;
123 }
124 
125 static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
126  const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
127  AMDGPU::OpName::simm16);
128  return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
129 }
130 
133  MachineInstr *MI = SU->getInstr();
134  if (MI->isBundle())
135  return NoHazard;
136 
137  if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
138  return NoopHazard;
139 
140  // FIXME: Should flat be considered vmem?
141  if ((SIInstrInfo::isVMEM(*MI) ||
142  SIInstrInfo::isFLAT(*MI))
143  && checkVMEMHazards(MI) > 0)
144  return NoopHazard;
145 
146  if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
147  return NoopHazard;
148 
149  if (checkFPAtomicToDenormModeHazard(MI) > 0)
150  return NoopHazard;
151 
152  if (ST.hasNoDataDepHazard())
153  return NoHazard;
154 
155  if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
156  return NoopHazard;
157 
158  if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
159  return NoopHazard;
160 
161  if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
162  return NoopHazard;
163 
164  if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
165  return NoopHazard;
166 
167  if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
168  return NoopHazard;
169 
170  if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
171  return NoopHazard;
172 
173  if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
174  return NoopHazard;
175 
176  if (ST.hasReadM0MovRelInterpHazard() &&
177  (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
178  checkReadM0Hazards(MI) > 0)
179  return NoopHazard;
180 
181  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
182  checkReadM0Hazards(MI) > 0)
183  return NoopHazard;
184 
185  if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0)
186  return NoopHazard;
187 
188  if ((MI->mayLoad() || MI->mayStore()) && checkMAILdStHazards(MI) > 0)
189  return NoopHazard;
190 
191  if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
192  return NoopHazard;
193 
194  if (checkAnyInstHazards(MI) > 0)
195  return NoopHazard;
196 
197  return NoHazard;
198 }
199 
200 static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII) {
201  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
202  .addImm(0);
203 }
204 
205 void GCNHazardRecognizer::processBundle() {
206  MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator());
208  // Check bundled MachineInstr's for hazards.
209  for (; MI != E && MI->isInsideBundle(); ++MI) {
210  CurrCycleInstr = &*MI;
211  unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
212 
213  if (IsHazardRecognizerMode)
214  fixHazards(CurrCycleInstr);
215 
216  for (unsigned i = 0; i < WaitStates; ++i)
217  insertNoopInBundle(CurrCycleInstr, TII);
218 
219  // It’s unnecessary to track more than MaxLookAhead instructions. Since we
220  // include the bundled MI directly after, only add a maximum of
221  // (MaxLookAhead - 1) noops to EmittedInstrs.
222  for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i)
223  EmittedInstrs.push_front(nullptr);
224 
225  EmittedInstrs.push_front(CurrCycleInstr);
226  EmittedInstrs.resize(MaxLookAhead);
227  }
228  CurrCycleInstr = nullptr;
229 }
230 
232  IsHazardRecognizerMode = false;
233  return PreEmitNoopsCommon(SU->getInstr());
234 }
235 
237  IsHazardRecognizerMode = true;
238  CurrCycleInstr = MI;
239  unsigned W = PreEmitNoopsCommon(MI);
240  fixHazards(MI);
241  CurrCycleInstr = nullptr;
242  return W;
243 }
244 
246  if (MI->isBundle())
247  return 0;
248 
249  int WaitStates = std::max(0, checkAnyInstHazards(MI));
250 
251  if (SIInstrInfo::isSMRD(*MI))
252  return std::max(WaitStates, checkSMRDHazards(MI));
253 
254  if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
255  WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
256 
257  if (ST.hasNSAtoVMEMBug())
258  WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
259 
260  WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI));
261 
262  if (ST.hasNoDataDepHazard())
263  return WaitStates;
264 
265  if (SIInstrInfo::isVALU(*MI))
266  WaitStates = std::max(WaitStates, checkVALUHazards(MI));
267 
268  if (SIInstrInfo::isDPP(*MI))
269  WaitStates = std::max(WaitStates, checkDPPHazards(MI));
270 
271  if (isDivFMas(MI->getOpcode()))
272  WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
273 
274  if (isRWLane(MI->getOpcode()))
275  WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
276 
277  if (MI->isInlineAsm())
278  return std::max(WaitStates, checkInlineAsmHazards(MI));
279 
280  if (isSGetReg(MI->getOpcode()))
281  return std::max(WaitStates, checkGetRegHazards(MI));
282 
283  if (isSSetReg(MI->getOpcode()))
284  return std::max(WaitStates, checkSetRegHazards(MI));
285 
286  if (isRFE(MI->getOpcode()))
287  return std::max(WaitStates, checkRFEHazards(MI));
288 
289  if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
290  isSMovRel(MI->getOpcode())))
291  return std::max(WaitStates, checkReadM0Hazards(MI));
292 
293  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
294  return std::max(WaitStates, checkReadM0Hazards(MI));
295 
296  if (SIInstrInfo::isMAI(*MI))
297  return std::max(WaitStates, checkMAIHazards(MI));
298 
299  if (MI->mayLoad() || MI->mayStore())
300  return std::max(WaitStates, checkMAILdStHazards(MI));
301 
302  return WaitStates;
303 }
304 
306  EmittedInstrs.push_front(nullptr);
307 }
308 
310  // When the scheduler detects a stall, it will call AdvanceCycle() without
311  // emitting any instructions.
312  if (!CurrCycleInstr)
313  return;
314 
315  // Do not track non-instructions which do not affect the wait states.
316  // If included, these instructions can lead to buffer overflow such that
317  // detectable hazards are missed.
318  if (CurrCycleInstr->isImplicitDef() || CurrCycleInstr->isDebugInstr() ||
319  CurrCycleInstr->isKill())
320  return;
321 
322  if (CurrCycleInstr->isBundle()) {
323  processBundle();
324  return;
325  }
326 
327  unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
328 
329  // Keep track of emitted instructions
330  EmittedInstrs.push_front(CurrCycleInstr);
331 
332  // Add a nullptr for each additional wait state after the first. Make sure
333  // not to add more than getMaxLookAhead() items to the list, since we
334  // truncate the list to that size right after this loop.
335  for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
336  i < e; ++i) {
337  EmittedInstrs.push_front(nullptr);
338  }
339 
340  // getMaxLookahead() is the largest number of wait states we will ever need
341  // to insert, so there is no point in keeping track of more than that many
342  // wait states.
343  EmittedInstrs.resize(getMaxLookAhead());
344 
345  CurrCycleInstr = nullptr;
346 }
347 
349  llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
350 }
351 
352 //===----------------------------------------------------------------------===//
353 // Helper Functions
354 //===----------------------------------------------------------------------===//
355 
357 
358 // Returns a minimum wait states since \p I walking all predecessors.
359 // Only scans until \p IsExpired does not return true.
360 // Can only be run in a hazard recognizer mode.
361 static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
362  MachineBasicBlock *MBB,
364  int WaitStates,
365  IsExpiredFn IsExpired,
367  for (auto E = MBB->instr_rend(); I != E; ++I) {
368  // Don't add WaitStates for parent BUNDLE instructions.
369  if (I->isBundle())
370  continue;
371 
372  if (IsHazard(&*I))
373  return WaitStates;
374 
375  if (I->isInlineAsm() || I->isImplicitDef() || I->isDebugInstr())
376  continue;
377 
378  WaitStates += SIInstrInfo::getNumWaitStates(*I);
379 
380  if (IsExpired(&*I, WaitStates))
382  }
383 
384  int MinWaitStates = WaitStates;
385  bool Found = false;
386  for (MachineBasicBlock *Pred : MBB->predecessors()) {
387  if (!Visited.insert(Pred).second)
388  continue;
389 
390  int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(),
391  WaitStates, IsExpired, Visited);
392 
394  continue;
395 
396  MinWaitStates = Found ? std::min(MinWaitStates, W) : W;
397  if (IsExpired(nullptr, MinWaitStates))
398  return MinWaitStates;
399 
400  Found = true;
401  }
402 
403  if (Found)
404  return MinWaitStates;
405 
407 }
408 
409 static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
410  MachineInstr *MI,
411  IsExpiredFn IsExpired) {
413  return getWaitStatesSince(IsHazard, MI->getParent(),
414  std::next(MI->getReverseIterator()),
415  0, IsExpired, Visited);
416 }
417 
418 int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
419  if (IsHazardRecognizerMode) {
420  auto IsExpiredFn = [Limit] (MachineInstr *, int WaitStates) {
421  return WaitStates >= Limit;
422  };
423  return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn);
424  }
425 
426  int WaitStates = 0;
427  for (MachineInstr *MI : EmittedInstrs) {
428  if (MI) {
429  if (IsHazard(MI))
430  return WaitStates;
431 
432  if (MI->isInlineAsm())
433  continue;
434  }
435  ++WaitStates;
436 
437  if (WaitStates >= Limit)
438  break;
439  }
441 }
442 
443 int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg,
444  IsHazardFn IsHazardDef,
445  int Limit) {
446  const SIRegisterInfo *TRI = ST.getRegisterInfo();
447 
448  auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) {
449  return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI);
450  };
451 
452  return getWaitStatesSince(IsHazardFn, Limit);
453 }
454 
455 int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard,
456  int Limit) {
457  auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
458  return isSSetReg(MI->getOpcode()) && IsHazard(MI);
459  };
460 
461  return getWaitStatesSince(IsHazardFn, Limit);
462 }
463 
464 //===----------------------------------------------------------------------===//
465 // No-op Hazard Detection
466 //===----------------------------------------------------------------------===//
467 
468 static void addRegUnits(const SIRegisterInfo &TRI,
469  BitVector &BV, unsigned Reg) {
470  for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI)
471  BV.set(*RUI);
472 }
473 
474 static void addRegsToSet(const SIRegisterInfo &TRI,
476  BitVector &Set) {
477  for (const MachineOperand &Op : Ops) {
478  if (Op.isReg())
479  addRegUnits(TRI, Set, Op.getReg());
480  }
481 }
482 
483 void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
484  // XXX: Do we need to worry about implicit operands
485  addRegsToSet(TRI, MI.defs(), ClauseDefs);
486  addRegsToSet(TRI, MI.uses(), ClauseUses);
487 }
488 
489 int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
490  // SMEM soft clause are only present on VI+, and only matter if xnack is
491  // enabled.
492  if (!ST.isXNACKEnabled())
493  return 0;
494 
495  bool IsSMRD = TII.isSMRD(*MEM);
496 
497  resetClause();
498 
499  // A soft-clause is any group of consecutive SMEM instructions. The
500  // instructions in this group may return out of order and/or may be
501  // replayed (i.e. the same instruction issued more than once).
502  //
503  // In order to handle these situations correctly we need to make sure that
504  // when a clause has more than one instruction, no instruction in the clause
505  // writes to a register that is read by another instruction in the clause
506  // (including itself). If we encounter this situaion, we need to break the
507  // clause by inserting a non SMEM instruction.
508 
509  for (MachineInstr *MI : EmittedInstrs) {
510  // When we hit a non-SMEM instruction then we have passed the start of the
511  // clause and we can stop.
512  if (!MI)
513  break;
514 
515  if (IsSMRD != SIInstrInfo::isSMRD(*MI))
516  break;
517 
518  addClauseInst(*MI);
519  }
520 
521  if (ClauseDefs.none())
522  return 0;
523 
524  // We need to make sure not to put loads and stores in the same clause if they
525  // use the same address. For now, just start a new clause whenever we see a
526  // store.
527  if (MEM->mayStore())
528  return 1;
529 
530  addClauseInst(*MEM);
531 
532  // If the set of defs and uses intersect then we cannot add this instruction
533  // to the clause, so we have a hazard.
534  return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0;
535 }
536 
537 int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
538  int WaitStatesNeeded = 0;
539 
540  WaitStatesNeeded = checkSoftClauseHazards(SMRD);
541 
542  // This SMRD hazard only affects SI.
543  if (!ST.hasSMRDReadVALUDefHazard())
544  return WaitStatesNeeded;
545 
546  // A read of an SGPR by SMRD instruction requires 4 wait states when the
547  // SGPR was written by a VALU instruction.
548  int SmrdSgprWaitStates = 4;
549  auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
550  auto IsBufferHazardDefFn = [this] (MachineInstr *MI) { return TII.isSALU(*MI); };
551 
552  bool IsBufferSMRD = TII.isBufferSMRD(*SMRD);
553 
554  for (const MachineOperand &Use : SMRD->uses()) {
555  if (!Use.isReg())
556  continue;
557  int WaitStatesNeededForUse =
558  SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
559  SmrdSgprWaitStates);
560  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
561 
562  // This fixes what appears to be undocumented hardware behavior in SI where
563  // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor
564  // needs some number of nops in between. We don't know how many we need, but
565  // let's use 4. This wasn't discovered before probably because the only
566  // case when this happens is when we expand a 64-bit pointer into a full
567  // descriptor and use s_buffer_load_dword instead of s_load_dword, which was
568  // probably never encountered in the closed-source land.
569  if (IsBufferSMRD) {
570  int WaitStatesNeededForUse =
571  SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
572  IsBufferHazardDefFn,
573  SmrdSgprWaitStates);
574  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
575  }
576  }
577 
578  return WaitStatesNeeded;
579 }
580 
581 int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
583  return 0;
584 
585  int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
586 
587  // A read of an SGPR by a VMEM instruction requires 5 wait states when the
588  // SGPR was written by a VALU Instruction.
589  const int VmemSgprWaitStates = 5;
590  auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
591  for (const MachineOperand &Use : VMEM->uses()) {
592  if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
593  continue;
594 
595  int WaitStatesNeededForUse =
596  VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
597  VmemSgprWaitStates);
598  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
599  }
600  return WaitStatesNeeded;
601 }
602 
603 int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
604  const SIRegisterInfo *TRI = ST.getRegisterInfo();
605  const SIInstrInfo *TII = ST.getInstrInfo();
606 
607  // Check for DPP VGPR read after VALU VGPR write and EXEC write.
608  int DppVgprWaitStates = 2;
609  int DppExecWaitStates = 5;
610  int WaitStatesNeeded = 0;
611  auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
612 
613  for (const MachineOperand &Use : DPP->uses()) {
614  if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
615  continue;
616  int WaitStatesNeededForUse =
617  DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
618  [](MachineInstr *) { return true; },
619  DppVgprWaitStates);
620  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
621  }
622 
623  WaitStatesNeeded = std::max(
624  WaitStatesNeeded,
625  DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn,
626  DppExecWaitStates));
627 
628  return WaitStatesNeeded;
629 }
630 
631 int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
632  const SIInstrInfo *TII = ST.getInstrInfo();
633 
634  // v_div_fmas requires 4 wait states after a write to vcc from a VALU
635  // instruction.
636  const int DivFMasWaitStates = 4;
637  auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
638  int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn,
639  DivFMasWaitStates);
640 
641  return DivFMasWaitStates - WaitStatesNeeded;
642 }
643 
644 int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
645  const SIInstrInfo *TII = ST.getInstrInfo();
646  unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr);
647 
648  const int GetRegWaitStates = 2;
649  auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) {
650  return GetRegHWReg == getHWReg(TII, *MI);
651  };
652  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates);
653 
654  return GetRegWaitStates - WaitStatesNeeded;
655 }
656 
657 int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
658  const SIInstrInfo *TII = ST.getInstrInfo();
659  unsigned HWReg = getHWReg(TII, *SetRegInstr);
660 
661  const int SetRegWaitStates = ST.getSetRegWaitStates();
662  auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) {
663  return HWReg == getHWReg(TII, *MI);
664  };
665  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates);
666  return SetRegWaitStates - WaitStatesNeeded;
667 }
668 
669 int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
670  if (!MI.mayStore())
671  return -1;
672 
673  const SIInstrInfo *TII = ST.getInstrInfo();
674  unsigned Opcode = MI.getOpcode();
675  const MCInstrDesc &Desc = MI.getDesc();
676 
677  int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
678  int VDataRCID = -1;
679  if (VDataIdx != -1)
680  VDataRCID = Desc.OpInfo[VDataIdx].RegClass;
681 
682  if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) {
683  // There is no hazard if the instruction does not use vector regs
684  // (like wbinvl1)
685  if (VDataIdx == -1)
686  return -1;
687  // For MUBUF/MTBUF instructions this hazard only exists if the
688  // instruction is not using a register in the soffset field.
689  const MachineOperand *SOffset =
690  TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
691  // If we have no soffset operand, then assume this field has been
692  // hardcoded to zero.
693  if (AMDGPU::getRegBitWidth(VDataRCID) > 64 &&
694  (!SOffset || !SOffset->isReg()))
695  return VDataIdx;
696  }
697 
698  // MIMG instructions create a hazard if they don't use a 256-bit T# and
699  // the store size is greater than 8 bytes and they have more than two bits
700  // of their dmask set.
701  // All our MIMG definitions use a 256-bit T#, so we can skip checking for them.
702  if (TII->isMIMG(MI)) {
703  int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
704  assert(SRsrcIdx != -1 &&
705  AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256);
706  (void)SRsrcIdx;
707  }
708 
709  if (TII->isFLAT(MI)) {
710  int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
711  if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64)
712  return DataIdx;
713  }
714 
715  return -1;
716 }
717 
718 int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
719  const MachineRegisterInfo &MRI) {
720  // Helper to check for the hazard where VMEM instructions that store more than
721  // 8 bytes can have there store data over written by the next instruction.
722  const SIRegisterInfo *TRI = ST.getRegisterInfo();
723 
724  const int VALUWaitStates = 1;
725  int WaitStatesNeeded = 0;
726 
727  if (!TRI->isVGPR(MRI, Def.getReg()))
728  return WaitStatesNeeded;
729  Register Reg = Def.getReg();
730  auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) {
731  int DataIdx = createsVALUHazard(*MI);
732  return DataIdx >= 0 &&
733  TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
734  };
735  int WaitStatesNeededForDef =
736  VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates);
737  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
738 
739  return WaitStatesNeeded;
740 }
741 
742 int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
743  // This checks for the hazard where VMEM instructions that store more than
744  // 8 bytes can have there store data over written by the next instruction.
745  if (!ST.has12DWordStoreHazard())
746  return 0;
747 
748  const MachineRegisterInfo &MRI = MF.getRegInfo();
749  int WaitStatesNeeded = 0;
750 
751  for (const MachineOperand &Def : VALU->defs()) {
752  WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI));
753  }
754 
755  return WaitStatesNeeded;
756 }
757 
758 int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
759  // This checks for hazards associated with inline asm statements.
760  // Since inline asms can contain just about anything, we use this
761  // to call/leverage other check*Hazard routines. Note that
762  // this function doesn't attempt to address all possible inline asm
763  // hazards (good luck), but is a collection of what has been
764  // problematic thus far.
765 
766  // see checkVALUHazards()
767  if (!ST.has12DWordStoreHazard())
768  return 0;
769 
770  const MachineRegisterInfo &MRI = MF.getRegInfo();
771  int WaitStatesNeeded = 0;
772 
773  for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands();
774  I != E; ++I) {
775  const MachineOperand &Op = IA->getOperand(I);
776  if (Op.isReg() && Op.isDef()) {
777  WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
778  }
779  }
780 
781  return WaitStatesNeeded;
782 }
783 
784 int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
785  const SIInstrInfo *TII = ST.getInstrInfo();
786  const SIRegisterInfo *TRI = ST.getRegisterInfo();
787  const MachineRegisterInfo &MRI = MF.getRegInfo();
788 
789  const MachineOperand *LaneSelectOp =
790  TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
791 
792  if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg()))
793  return 0;
794 
795  Register LaneSelectReg = LaneSelectOp->getReg();
796  auto IsHazardFn = [TII] (MachineInstr *MI) {
797  return TII->isVALU(*MI);
798  };
799 
800  const int RWLaneWaitStates = 4;
801  int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn,
802  RWLaneWaitStates);
803  return RWLaneWaitStates - WaitStatesSince;
804 }
805 
806 int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
807  if (!ST.hasRFEHazards())
808  return 0;
809 
810  const SIInstrInfo *TII = ST.getInstrInfo();
811 
812  const int RFEWaitStates = 1;
813 
814  auto IsHazardFn = [TII] (MachineInstr *MI) {
815  return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS;
816  };
817  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates);
818  return RFEWaitStates - WaitStatesNeeded;
819 }
820 
821 int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
822  if (MI->isDebugInstr())
823  return 0;
824 
825  const SIRegisterInfo *TRI = ST.getRegisterInfo();
826  if (!ST.hasSMovFedHazard())
827  return 0;
828 
829  // Check for any instruction reading an SGPR after a write from
830  // s_mov_fed_b32.
831  int MovFedWaitStates = 1;
832  int WaitStatesNeeded = 0;
833 
834  for (const MachineOperand &Use : MI->uses()) {
835  if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
836  continue;
837  auto IsHazardFn = [] (MachineInstr *MI) {
838  return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
839  };
840  int WaitStatesNeededForUse =
841  MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn,
842  MovFedWaitStates);
843  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
844  }
845 
846  return WaitStatesNeeded;
847 }
848 
849 int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
850  const SIInstrInfo *TII = ST.getInstrInfo();
851  const int SMovRelWaitStates = 1;
852  auto IsHazardFn = [TII] (MachineInstr *MI) {
853  return TII->isSALU(*MI);
854  };
855  return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn,
856  SMovRelWaitStates);
857 }
858 
859 void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
860  fixVMEMtoScalarWriteHazards(MI);
861  fixVcmpxPermlaneHazards(MI);
862  fixSMEMtoVectorWriteHazards(MI);
863  fixVcmpxExecWARHazard(MI);
864  fixLdsBranchVmemWARHazard(MI);
865 }
866 
867 bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
868  if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI))
869  return false;
870 
871  const SIInstrInfo *TII = ST.getInstrInfo();
872  auto IsHazardFn = [TII] (MachineInstr *MI) {
873  return TII->isVOPC(*MI);
874  };
875 
876  auto IsExpiredFn = [] (MachineInstr *MI, int) {
877  if (!MI)
878  return false;
879  unsigned Opc = MI->getOpcode();
880  return SIInstrInfo::isVALU(*MI) &&
881  Opc != AMDGPU::V_NOP_e32 &&
882  Opc != AMDGPU::V_NOP_e64 &&
883  Opc != AMDGPU::V_NOP_sdwa;
884  };
885 
886  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
888  return false;
889 
890  // V_NOP will be discarded by SQ.
891  // Use V_MOB_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE*
892  // which is always a VGPR and available.
893  auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
894  Register Reg = Src0->getReg();
895  bool IsUndef = Src0->isUndef();
896  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
897  TII->get(AMDGPU::V_MOV_B32_e32))
898  .addReg(Reg, RegState::Define | (IsUndef ? RegState::Dead : 0))
899  .addReg(Reg, IsUndef ? RegState::Undef : RegState::Kill);
900 
901  return true;
902 }
903 
904 bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
905  if (!ST.hasVMEMtoScalarWriteHazard())
906  return false;
907 
908  if (!SIInstrInfo::isSALU(*MI) && !SIInstrInfo::isSMRD(*MI))
909  return false;
910 
911  if (MI->getNumDefs() == 0)
912  return false;
913 
914  const SIRegisterInfo *TRI = ST.getRegisterInfo();
915 
916  auto IsHazardFn = [TRI, MI] (MachineInstr *I) {
917  if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isDS(*I) &&
919  return false;
920 
921  for (const MachineOperand &Def : MI->defs()) {
922  MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI);
923  if (!Op)
924  continue;
925  return true;
926  }
927  return false;
928  };
929 
930  auto IsExpiredFn = [] (MachineInstr *MI, int) {
931  return MI && (SIInstrInfo::isVALU(*MI) ||
932  (MI->getOpcode() == AMDGPU::S_WAITCNT &&
933  !MI->getOperand(0).getImm()));
934  };
935 
936  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
938  return false;
939 
940  const SIInstrInfo *TII = ST.getInstrInfo();
941  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32));
942  return true;
943 }
944 
945 bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
946  if (!ST.hasSMEMtoVectorWriteHazard())
947  return false;
948 
949  if (!SIInstrInfo::isVALU(*MI))
950  return false;
951 
952  unsigned SDSTName;
953  switch (MI->getOpcode()) {
954  case AMDGPU::V_READLANE_B32:
955  case AMDGPU::V_READFIRSTLANE_B32:
956  SDSTName = AMDGPU::OpName::vdst;
957  break;
958  default:
959  SDSTName = AMDGPU::OpName::sdst;
960  break;
961  }
962 
963  const SIInstrInfo *TII = ST.getInstrInfo();
964  const SIRegisterInfo *TRI = ST.getRegisterInfo();
965  const AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST.getCPU());
966  const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName);
967  if (!SDST) {
968  for (const auto &MO : MI->implicit_operands()) {
969  if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) {
970  SDST = &MO;
971  break;
972  }
973  }
974  }
975 
976  if (!SDST)
977  return false;
978 
979  const Register SDSTReg = SDST->getReg();
980  auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) {
981  return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI);
982  };
983 
984  auto IsExpiredFn = [TII, IV] (MachineInstr *MI, int) {
985  if (MI) {
986  if (TII->isSALU(*MI)) {
987  switch (MI->getOpcode()) {
988  case AMDGPU::S_SETVSKIP:
989  case AMDGPU::S_VERSION:
990  case AMDGPU::S_WAITCNT_VSCNT:
991  case AMDGPU::S_WAITCNT_VMCNT:
992  case AMDGPU::S_WAITCNT_EXPCNT:
993  // These instructions cannot not mitigate the hazard.
994  return false;
995  case AMDGPU::S_WAITCNT_LGKMCNT:
996  // Reducing lgkmcnt count to 0 always mitigates the hazard.
997  return (MI->getOperand(1).getImm() == 0) &&
998  (MI->getOperand(0).getReg() == AMDGPU::SGPR_NULL);
999  case AMDGPU::S_WAITCNT: {
1000  const int64_t Imm = MI->getOperand(0).getImm();
1001  AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm);
1002  return (Decoded.LgkmCnt == 0);
1003  }
1004  default:
1005  // SOPP instructions cannot mitigate the hazard.
1006  if (TII->isSOPP(*MI))
1007  return false;
1008  // At this point the SALU can be assumed to mitigate the hazard
1009  // because either:
1010  // (a) it is independent of the at risk SMEM (breaking chain),
1011  // or
1012  // (b) it is dependent on the SMEM, in which case an appropriate
1013  // s_waitcnt lgkmcnt _must_ exist between it and the at risk
1014  // SMEM instruction.
1015  return true;
1016  }
1017  }
1018  }
1019  return false;
1020  };
1021 
1022  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1024  return false;
1025 
1026  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1027  TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL)
1028  .addImm(0);
1029  return true;
1030 }
1031 
1032 bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
1033  if (!ST.hasVcmpxExecWARHazard() || !SIInstrInfo::isVALU(*MI))
1034  return false;
1035 
1036  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1037  if (!MI->modifiesRegister(AMDGPU::EXEC, TRI))
1038  return false;
1039 
1040  auto IsHazardFn = [TRI] (MachineInstr *I) {
1041  if (SIInstrInfo::isVALU(*I))
1042  return false;
1043  return I->readsRegister(AMDGPU::EXEC, TRI);
1044  };
1045 
1046  const SIInstrInfo *TII = ST.getInstrInfo();
1047  auto IsExpiredFn = [TII, TRI] (MachineInstr *MI, int) {
1048  if (!MI)
1049  return false;
1050  if (SIInstrInfo::isVALU(*MI)) {
1051  if (TII->getNamedOperand(*MI, AMDGPU::OpName::sdst))
1052  return true;
1053  for (auto MO : MI->implicit_operands())
1054  if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg())))
1055  return true;
1056  }
1057  if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
1058  (MI->getOperand(0).getImm() & 0xfffe) == 0xfffe)
1059  return true;
1060  return false;
1061  };
1062 
1063  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1065  return false;
1066 
1067  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1068  TII->get(AMDGPU::S_WAITCNT_DEPCTR))
1069  .addImm(0xfffe);
1070  return true;
1071 }
1072 
1073 bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
1074  if (!ST.hasLdsBranchVmemWARHazard())
1075  return false;
1076 
1077  auto IsHazardInst = [] (const MachineInstr *MI) {
1078  if (SIInstrInfo::isDS(*MI))
1079  return 1;
1081  return 2;
1082  return 0;
1083  };
1084 
1085  auto InstType = IsHazardInst(MI);
1086  if (!InstType)
1087  return false;
1088 
1089  auto IsExpiredFn = [&IsHazardInst] (MachineInstr *I, int) {
1090  return I && (IsHazardInst(I) ||
1091  (I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1092  I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1093  !I->getOperand(1).getImm()));
1094  };
1095 
1096  auto IsHazardFn = [InstType, &IsHazardInst] (MachineInstr *I) {
1097  if (!I->isBranch())
1098  return false;
1099 
1100  auto IsHazardFn = [InstType, IsHazardInst] (MachineInstr *I) {
1101  auto InstType2 = IsHazardInst(I);
1102  return InstType2 && InstType != InstType2;
1103  };
1104 
1105  auto IsExpiredFn = [InstType, &IsHazardInst] (MachineInstr *I, int) {
1106  if (!I)
1107  return false;
1108 
1109  auto InstType2 = IsHazardInst(I);
1110  if (InstType == InstType2)
1111  return true;
1112 
1113  return I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1114  I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1115  !I->getOperand(1).getImm();
1116  };
1117 
1120  };
1121 
1122  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1124  return false;
1125 
1126  const SIInstrInfo *TII = ST.getInstrInfo();
1127  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1128  TII->get(AMDGPU::S_WAITCNT_VSCNT))
1129  .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
1130  .addImm(0);
1131 
1132  return true;
1133 }
1134 
1135 int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
1136  int NSAtoVMEMWaitStates = 1;
1137 
1138  if (!ST.hasNSAtoVMEMBug())
1139  return 0;
1140 
1141  if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isMTBUF(*MI))
1142  return 0;
1143 
1144  const SIInstrInfo *TII = ST.getInstrInfo();
1145  const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
1146  if (!Offset || (Offset->getImm() & 6) == 0)
1147  return 0;
1148 
1149  auto IsHazardFn = [TII] (MachineInstr *I) {
1150  if (!SIInstrInfo::isMIMG(*I))
1151  return false;
1152  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I->getOpcode());
1153  return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA &&
1154  TII->getInstSizeInBytes(*I) >= 16;
1155  };
1156 
1157  return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);
1158 }
1159 
1160 int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
1161  int FPAtomicToDenormModeWaitStates = 3;
1162 
1163  if (MI->getOpcode() != AMDGPU::S_DENORM_MODE)
1164  return 0;
1165 
1166  auto IsHazardFn = [] (MachineInstr *I) {
1168  return false;
1169  return SIInstrInfo::isFPAtomic(*I);
1170  };
1171 
1172  auto IsExpiredFn = [] (MachineInstr *MI, int WaitStates) {
1173  if (WaitStates >= 3 || SIInstrInfo::isVALU(*MI))
1174  return true;
1175 
1176  switch (MI->getOpcode()) {
1177  case AMDGPU::S_WAITCNT:
1178  case AMDGPU::S_WAITCNT_VSCNT:
1179  case AMDGPU::S_WAITCNT_VMCNT:
1180  case AMDGPU::S_WAITCNT_EXPCNT:
1181  case AMDGPU::S_WAITCNT_LGKMCNT:
1182  case AMDGPU::S_WAITCNT_IDLE:
1183  return true;
1184  default:
1185  break;
1186  }
1187 
1188  return false;
1189  };
1190 
1191 
1192  return FPAtomicToDenormModeWaitStates -
1193  ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn);
1194 }
1195 
1196 int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
1197  assert(SIInstrInfo::isMAI(*MI));
1198 
1199  int WaitStatesNeeded = 0;
1200  unsigned Opc = MI->getOpcode();
1201 
1202  auto IsVALUFn = [] (MachineInstr *MI) {
1203  return SIInstrInfo::isVALU(*MI);
1204  };
1205 
1206  if (Opc != AMDGPU::V_ACCVGPR_READ_B32) { // MFMA or v_accvgpr_write
1207  const int LegacyVALUWritesVGPRWaitStates = 2;
1208  const int VALUWritesExecWaitStates = 4;
1209  const int MaxWaitStates = 4;
1210 
1211  int WaitStatesNeededForUse = VALUWritesExecWaitStates -
1212  getWaitStatesSinceDef(AMDGPU::EXEC, IsVALUFn, MaxWaitStates);
1213  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1214 
1215  if (WaitStatesNeeded < MaxWaitStates) {
1216  for (const MachineOperand &Use : MI->explicit_uses()) {
1217  const int MaxWaitStates = 2;
1218 
1219  if (!Use.isReg() || !TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
1220  continue;
1221 
1222  int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates -
1223  getWaitStatesSinceDef(Use.getReg(), IsVALUFn, MaxWaitStates);
1224  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1225 
1226  if (WaitStatesNeeded == MaxWaitStates)
1227  break;
1228  }
1229  }
1230  }
1231 
1232  auto IsMFMAFn = [] (MachineInstr *MI) {
1233  return SIInstrInfo::isMAI(*MI) &&
1234  MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32 &&
1235  MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32;
1236  };
1237 
1238  for (const MachineOperand &Op : MI->explicit_operands()) {
1239  if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg()))
1240  continue;
1241 
1242  if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32)
1243  continue;
1244 
1245  const int MFMAWritesAGPROverlappedSrcABWaitStates = 4;
1246  const int MFMAWritesAGPROverlappedSrcCWaitStates = 2;
1247  const int MFMA4x4WritesAGPRAccVgprReadWaitStates = 4;
1248  const int MFMA16x16WritesAGPRAccVgprReadWaitStates = 10;
1249  const int MFMA32x32WritesAGPRAccVgprReadWaitStates = 18;
1250  const int MFMA4x4WritesAGPRAccVgprWriteWaitStates = 1;
1251  const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7;
1252  const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15;
1253  const int MaxWaitStates = 18;
1254  Register Reg = Op.getReg();
1255  unsigned HazardDefLatency = 0;
1256 
1257  auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency, this]
1258  (MachineInstr *MI) {
1259  if (!IsMFMAFn(MI))
1260  return false;
1261  Register DstReg = MI->getOperand(0).getReg();
1262  if (DstReg == Reg)
1263  return false;
1264  HazardDefLatency = std::max(HazardDefLatency,
1265  TSchedModel.computeInstrLatency(MI));
1266  return TRI.regsOverlap(DstReg, Reg);
1267  };
1268 
1269  int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn,
1270  MaxWaitStates);
1271  int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates;
1272  int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
1273  int OpNo = MI->getOperandNo(&Op);
1274  if (OpNo == SrcCIdx) {
1275  NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates;
1276  } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32) {
1277  switch (HazardDefLatency) {
1278  case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates;
1279  break;
1280  case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates;
1281  break;
1282  case 16: LLVM_FALLTHROUGH;
1283  default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates;
1284  break;
1285  }
1286  } else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
1287  switch (HazardDefLatency) {
1288  case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates;
1289  break;
1290  case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprWriteWaitStates;
1291  break;
1292  case 16: LLVM_FALLTHROUGH;
1293  default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprWriteWaitStates;
1294  break;
1295  }
1296  }
1297 
1298  int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
1299  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1300 
1301  if (WaitStatesNeeded == MaxWaitStates)
1302  return WaitStatesNeeded; // Early exit.
1303 
1304  auto IsAccVgprWriteFn = [Reg, this] (MachineInstr *MI) {
1305  if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
1306  return false;
1307  Register DstReg = MI->getOperand(0).getReg();
1308  return TRI.regsOverlap(Reg, DstReg);
1309  };
1310 
1311  const int AccVGPRWriteMFMAReadSrcCWaitStates = 1;
1312  const int AccVGPRWriteMFMAReadSrcABWaitStates = 3;
1313  const int AccVGPRWriteAccVgprReadWaitStates = 3;
1314  NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates;
1315  if (OpNo == SrcCIdx)
1316  NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates;
1317  else if (Opc == AMDGPU::V_ACCVGPR_READ_B32)
1318  NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates;
1319 
1320  WaitStatesNeededForUse = NeedWaitStates -
1321  getWaitStatesSinceDef(Reg, IsAccVgprWriteFn, MaxWaitStates);
1322  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1323 
1324  if (WaitStatesNeeded == MaxWaitStates)
1325  return WaitStatesNeeded; // Early exit.
1326  }
1327 
1328  if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
1329  const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0;
1330  const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5;
1331  const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13;
1332  const int MaxWaitStates = 13;
1333  Register DstReg = MI->getOperand(0).getReg();
1334  unsigned HazardDefLatency = 0;
1335 
1336  auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency, this]
1337  (MachineInstr *MI) {
1338  if (!IsMFMAFn(MI))
1339  return false;
1340  Register Reg = TII.getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg();
1341  HazardDefLatency = std::max(HazardDefLatency,
1342  TSchedModel.computeInstrLatency(MI));
1343  return TRI.regsOverlap(Reg, DstReg);
1344  };
1345 
1346  int WaitStatesSince = getWaitStatesSince(IsSrcCMFMAFn, MaxWaitStates);
1347  int NeedWaitStates;
1348  switch (HazardDefLatency) {
1349  case 2: NeedWaitStates = MFMA4x4ReadSrcCAccVgprWriteWaitStates;
1350  break;
1351  case 8: NeedWaitStates = MFMA16x16ReadSrcCAccVgprWriteWaitStates;
1352  break;
1353  case 16: LLVM_FALLTHROUGH;
1354  default: NeedWaitStates = MFMA32x32ReadSrcCAccVgprWriteWaitStates;
1355  break;
1356  }
1357 
1358  int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince;
1359  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1360  }
1361 
1362  return WaitStatesNeeded;
1363 }
1364 
1365 int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
1366  if (!ST.hasMAIInsts())
1367  return 0;
1368 
1369  int WaitStatesNeeded = 0;
1370 
1371  auto IsAccVgprReadFn = [] (MachineInstr *MI) {
1372  return MI->getOpcode() == AMDGPU::V_ACCVGPR_READ_B32;
1373  };
1374 
1375  for (const MachineOperand &Op : MI->explicit_uses()) {
1376  if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg()))
1377  continue;
1378 
1379  Register Reg = Op.getReg();
1380 
1381  const int AccVgprReadLdStWaitStates = 2;
1382  const int VALUWriteAccVgprReadLdStDepVALUWaitStates = 1;
1383  const int MaxWaitStates = 2;
1384 
1385  int WaitStatesNeededForUse = AccVgprReadLdStWaitStates -
1386  getWaitStatesSinceDef(Reg, IsAccVgprReadFn, MaxWaitStates);
1387  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1388 
1389  if (WaitStatesNeeded == MaxWaitStates)
1390  return WaitStatesNeeded; // Early exit.
1391 
1392  auto IsVALUAccVgprReadCheckFn = [Reg, this] (MachineInstr *MI) {
1393  if (MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32)
1394  return false;
1395  auto IsVALUFn = [] (MachineInstr *MI) {
1396  return SIInstrInfo::isVALU(*MI) && !SIInstrInfo::isMAI(*MI);
1397  };
1398  return getWaitStatesSinceDef(Reg, IsVALUFn, 2 /*MaxWaitStates*/) <
1400  };
1401 
1402  WaitStatesNeededForUse = VALUWriteAccVgprReadLdStDepVALUWaitStates -
1403  getWaitStatesSince(IsVALUAccVgprReadCheckFn, MaxWaitStates);
1404  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1405  }
1406 
1407  return WaitStatesNeeded;
1408 }
static bool isSMovRel(unsigned Opcode)
BitVector & set()
Definition: BitVector.h:397
Interface definition for SIRegisterInfo.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
AMDGPU specific subclass of TargetSubtarget.
instr_iterator instr_end()
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:510
bool hasReadM0MovRelInterpHazard() const
iterator_range< mop_iterator > explicit_operands()
Definition: MachineInstr.h:483
Implements a dense probed hash-table based set.
Definition: DenseSet.h:249
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:385
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:178
static bool isVINTRP(const MachineInstr &MI)
Definition: SIInstrInfo.h:565
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:527
bool hasNSAtoVMEMBug() const
unsigned Reg
bool isInlineAsm() const
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:104
bool hasVcmpxPermlaneHazard() const
static bool isRFE(unsigned Opcode)
const SIInstrInfo * getInstrInfo() const override
static void addRegUnits(const SIRegisterInfo &TRI, BitVector &BV, unsigned Reg)
unsigned const TargetRegisterInfo * TRI
bool isAGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
static bool isSOPP(const MachineInstr &MI)
Definition: SIInstrInfo.h:380
static bool isSMRD(const MachineInstr &MI)
Definition: SIInstrInfo.h:452
function_ref< bool(MachineInstr *)> IsHazardFn
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
void EmitNoop() override
EmitNoop - This callback is invoked when a noop was added to the instruction stream.
static bool isDS(const MachineInstr &MI)
Definition: SIInstrInfo.h:462
static bool isFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:488
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
const HexagonInstrInfo * TII
bool isAlwaysGDS(uint16_t Opcode) const
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:414
static bool isMIMG(const MachineInstr &MI)
Definition: SIInstrInfo.h:472
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
bool hasLdsBranchVmemWARHazard() const
static bool isFPAtomic(const MachineInstr &MI)
Definition: SIInstrInfo.h:655
bool hasSMovFedHazard() const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
static bool isVALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:332
bool isSGPRClass(const TargetRegisterClass *RC) const
static bool isPermlane(const MachineInstr &MI)
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:408
bool isBundle() const
static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII, const MachineInstr &MI)
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:436
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
static bool isSSetReg(unsigned Opcode)
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
unsigned PreEmitNoops(SUnit *SU) override
PreEmitNoops - This callback is invoked prior to emitting an instruction.
bool hasVMEMtoScalarWriteHazard() const
function_ref< bool(MachineInstr *, int WaitStates)> IsExpiredFn
bool hasNoDataDepHazard() const
bool isBranch(QueryType Type=AnyInBundle) const
Returns true if this is a conditional, unconditional, or indirect branch.
Definition: MachineInstr.h:675
void RecedeCycle() override
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
void EmitInstruction(SUnit *SU) override
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
unsigned MaxLookAhead
MaxLookAhead - Indicate the number of cycles in the scoreboard state.
static bool isDPP(const MachineInstr &MI)
Definition: SIInstrInfo.h:549
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:838
static void addRegsToSet(const SIRegisterInfo &TRI, iterator_range< MachineInstr::const_mop_iterator > Ops, BitVector &Set)
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
unsigned const MachineRegisterInfo * MRI
bool anyCommon(const BitVector &RHS) const
Test if any common bits are set.
Definition: BitVector.h:523
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII)
static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, MachineBasicBlock *MBB, MachineBasicBlock::reverse_instr_iterator I, int WaitStates, IsExpiredFn IsExpired, DenseSet< const MachineBasicBlock *> &Visited)
bool hasSMEMtoVectorWriteHazard() const
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:499
self_iterator getIterator()
Definition: ilist_node.h:81
iterator_range< pred_iterator > predecessors()
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
bool isImplicitDef() const
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
bool isDebugInstr() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
iterator_range< mop_iterator > explicit_uses()
Definition: MachineInstr.h:517
IsaVersion getIsaVersion(StringRef GPU)
bool hasVcmpxExecWARHazard() const
Iterator for intrusive lists based on ilist_node.
bool hasRFEHazards() const
static bool isSALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:324
HazardType getHazardType(SUnit *SU, int Stalls) override
getHazardType - Return the hazard type of emitting this node.
MachineOperand class - Representation of each machine instruction operand.
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
GCNHazardRecognizer(const MachineFunction &MF)
int64_t getImm() const
reverse_instr_iterator instr_rend()
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
A range adaptor for a pair of iterators.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
void AdvanceCycle() override
AdvanceCycle - This callback is invoked whenever the next top-down instruction to be scheduled cannot...
iterator_range< mop_iterator > implicit_operands()
Definition: MachineInstr.h:491
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:494
static bool isRWLane(unsigned Opcode)
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
bool none() const
none - Returns true if none of the bits are set.
Definition: BitVector.h:201
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
Interface definition for SIInstrInfo.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:84
static bool isMTBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:444
bool has12DWordStoreHazard() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool isVOPC(const MachineInstr &MI)
Definition: SIInstrInfo.h:428
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:76
static bool isMAI(const MachineInstr &MI)
Definition: SIInstrInfo.h:573
bool isXNACKEnabled() const
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isVMEM(const MachineInstr &MI)
Definition: SIInstrInfo.h:340
unsigned getNumDefs() const
Returns the total number of definitions.
Definition: MachineInstr.h:426
bool isKill() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:825
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSGetReg(unsigned Opcode)
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:265
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:189
bool hasMAIInsts() const
IRTranslator LLVM IR MI
Register getReg() const
getReg - Returns the register number.
static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr)
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
bool hasReadM0SendMsgHazard() const
bool isBufferSMRD(const MachineInstr &MI) const
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
unsigned PreEmitNoopsCommon(MachineInstr *)
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
const SIRegisterInfo * getRegisterInfo() const override
static bool isDivFMas(unsigned Opcode)