LLVM  10.0.0svn
GCNHazardRecognizer.cpp
Go to the documentation of this file.
1 //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements hazard recognizers for scheduling on GCN processors.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "GCNHazardRecognizer.h"
14 #include "AMDGPUSubtarget.h"
15 #include "SIDefines.h"
16 #include "SIInstrInfo.h"
17 #include "SIRegisterInfo.h"
19 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm/MC/MCInstrDesc.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <limits>
31 #include <set>
32 #include <vector>
33 
34 using namespace llvm;
35 
36 //===----------------------------------------------------------------------===//
37 // Hazard Recoginizer Implementation
38 //===----------------------------------------------------------------------===//
39 
41  IsHazardRecognizerMode(false),
42  CurrCycleInstr(nullptr),
43  MF(MF),
44  ST(MF.getSubtarget<GCNSubtarget>()),
45  TII(*ST.getInstrInfo()),
46  TRI(TII.getRegisterInfo()),
47  ClauseUses(TRI.getNumRegUnits()),
48  ClauseDefs(TRI.getNumRegUnits()) {
49  MaxLookAhead = MF.getRegInfo().isPhysRegUsed(AMDGPU::AGPR0) ? 18 : 5;
50  TSchedModel.init(&ST);
51 }
52 
55 }
56 
58  CurrCycleInstr = MI;
59 }
60 
61 static bool isDivFMas(unsigned Opcode) {
62  return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64;
63 }
64 
65 static bool isSGetReg(unsigned Opcode) {
66  return Opcode == AMDGPU::S_GETREG_B32;
67 }
68 
69 static bool isSSetReg(unsigned Opcode) {
70  return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32;
71 }
72 
73 static bool isRWLane(unsigned Opcode) {
74  return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32;
75 }
76 
77 static bool isRFE(unsigned Opcode) {
78  return Opcode == AMDGPU::S_RFE_B64;
79 }
80 
81 static bool isSMovRel(unsigned Opcode) {
82  switch (Opcode) {
83  case AMDGPU::S_MOVRELS_B32:
84  case AMDGPU::S_MOVRELS_B64:
85  case AMDGPU::S_MOVRELD_B32:
86  case AMDGPU::S_MOVRELD_B64:
87  return true;
88  default:
89  return false;
90  }
91 }
92 
93 static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
94  const MachineInstr &MI) {
95  if (TII.isAlwaysGDS(MI.getOpcode()))
96  return true;
97 
98  switch (MI.getOpcode()) {
99  case AMDGPU::S_SENDMSG:
100  case AMDGPU::S_SENDMSGHALT:
101  case AMDGPU::S_TTRACEDATA:
102  return true;
103  // These DS opcodes don't support GDS.
104  case AMDGPU::DS_NOP:
105  case AMDGPU::DS_PERMUTE_B32:
106  case AMDGPU::DS_BPERMUTE_B32:
107  return false;
108  default:
109  if (TII.isDS(MI.getOpcode())) {
110  int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
111  AMDGPU::OpName::gds);
112  if (MI.getOperand(GDS).getImm())
113  return true;
114  }
115  return false;
116  }
117 }
118 
119 static bool isPermlane(const MachineInstr &MI) {
120  unsigned Opcode = MI.getOpcode();
121  return Opcode == AMDGPU::V_PERMLANE16_B32 ||
122  Opcode == AMDGPU::V_PERMLANEX16_B32;
123 }
124 
125 static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
126  const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
127  AMDGPU::OpName::simm16);
128  return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
129 }
130 
133  MachineInstr *MI = SU->getInstr();
134  if (MI->isBundle())
135  return NoHazard;
136 
137  if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
138  return NoopHazard;
139 
140  // FIXME: Should flat be considered vmem?
141  if ((SIInstrInfo::isVMEM(*MI) ||
142  SIInstrInfo::isFLAT(*MI))
143  && checkVMEMHazards(MI) > 0)
144  return NoopHazard;
145 
146  if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
147  return NoopHazard;
148 
149  if (checkFPAtomicToDenormModeHazard(MI) > 0)
150  return NoopHazard;
151 
152  if (ST.hasNoDataDepHazard())
153  return NoHazard;
154 
155  if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
156  return NoopHazard;
157 
158  if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
159  return NoopHazard;
160 
161  if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
162  return NoopHazard;
163 
164  if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
165  return NoopHazard;
166 
167  if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
168  return NoopHazard;
169 
170  if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
171  return NoopHazard;
172 
173  if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
174  return NoopHazard;
175 
176  if (ST.hasReadM0MovRelInterpHazard() &&
177  (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
178  checkReadM0Hazards(MI) > 0)
179  return NoopHazard;
180 
181  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
182  checkReadM0Hazards(MI) > 0)
183  return NoopHazard;
184 
185  if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0)
186  return NoopHazard;
187 
188  if ((MI->mayLoad() || MI->mayStore()) && checkMAILdStHazards(MI) > 0)
189  return NoopHazard;
190 
191  if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
192  return NoopHazard;
193 
194  if (checkAnyInstHazards(MI) > 0)
195  return NoopHazard;
196 
197  return NoHazard;
198 }
199 
200 static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII) {
201  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
202  .addImm(0);
203 }
204 
205 void GCNHazardRecognizer::processBundle() {
206  MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator());
208  // Check bundled MachineInstr's for hazards.
209  for (; MI != E && MI->isInsideBundle(); ++MI) {
210  CurrCycleInstr = &*MI;
211  unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
212 
213  if (IsHazardRecognizerMode)
214  fixHazards(CurrCycleInstr);
215 
216  for (unsigned i = 0; i < WaitStates; ++i)
217  insertNoopInBundle(CurrCycleInstr, TII);
218 
219  // It’s unnecessary to track more than MaxLookAhead instructions. Since we
220  // include the bundled MI directly after, only add a maximum of
221  // (MaxLookAhead - 1) noops to EmittedInstrs.
222  for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i)
223  EmittedInstrs.push_front(nullptr);
224 
225  EmittedInstrs.push_front(CurrCycleInstr);
226  EmittedInstrs.resize(MaxLookAhead);
227  }
228  CurrCycleInstr = nullptr;
229 }
230 
232  IsHazardRecognizerMode = false;
233  return PreEmitNoopsCommon(SU->getInstr());
234 }
235 
237  IsHazardRecognizerMode = true;
238  CurrCycleInstr = MI;
239  unsigned W = PreEmitNoopsCommon(MI);
240  fixHazards(MI);
241  CurrCycleInstr = nullptr;
242  return W;
243 }
244 
246  if (MI->isBundle())
247  return 0;
248 
249  int WaitStates = std::max(0, checkAnyInstHazards(MI));
250 
251  if (SIInstrInfo::isSMRD(*MI))
252  return std::max(WaitStates, checkSMRDHazards(MI));
253 
254  if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
255  WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
256 
257  if (ST.hasNSAtoVMEMBug())
258  WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
259 
260  WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI));
261 
262  if (ST.hasNoDataDepHazard())
263  return WaitStates;
264 
265  if (SIInstrInfo::isVALU(*MI))
266  WaitStates = std::max(WaitStates, checkVALUHazards(MI));
267 
268  if (SIInstrInfo::isDPP(*MI))
269  WaitStates = std::max(WaitStates, checkDPPHazards(MI));
270 
271  if (isDivFMas(MI->getOpcode()))
272  WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
273 
274  if (isRWLane(MI->getOpcode()))
275  WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
276 
277  if (MI->isInlineAsm())
278  return std::max(WaitStates, checkInlineAsmHazards(MI));
279 
280  if (isSGetReg(MI->getOpcode()))
281  return std::max(WaitStates, checkGetRegHazards(MI));
282 
283  if (isSSetReg(MI->getOpcode()))
284  return std::max(WaitStates, checkSetRegHazards(MI));
285 
286  if (isRFE(MI->getOpcode()))
287  return std::max(WaitStates, checkRFEHazards(MI));
288 
289  if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
290  isSMovRel(MI->getOpcode())))
291  return std::max(WaitStates, checkReadM0Hazards(MI));
292 
293  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
294  return std::max(WaitStates, checkReadM0Hazards(MI));
295 
296  if (SIInstrInfo::isMAI(*MI))
297  return std::max(WaitStates, checkMAIHazards(MI));
298 
299  if (MI->mayLoad() || MI->mayStore())
300  return std::max(WaitStates, checkMAILdStHazards(MI));
301 
302  return WaitStates;
303 }
304 
306  EmittedInstrs.push_front(nullptr);
307 }
308 
310  // When the scheduler detects a stall, it will call AdvanceCycle() without
311  // emitting any instructions.
312  if (!CurrCycleInstr)
313  return;
314 
315  // Do not track non-instructions which do not affect the wait states.
316  // If included, these instructions can lead to buffer overflow such that
317  // detectable hazards are missed.
318  if (CurrCycleInstr->isImplicitDef() || CurrCycleInstr->isDebugInstr() ||
319  CurrCycleInstr->isKill())
320  return;
321 
322  if (CurrCycleInstr->isBundle()) {
323  processBundle();
324  return;
325  }
326 
327  unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
328 
329  // Keep track of emitted instructions
330  EmittedInstrs.push_front(CurrCycleInstr);
331 
332  // Add a nullptr for each additional wait state after the first. Make sure
333  // not to add more than getMaxLookAhead() items to the list, since we
334  // truncate the list to that size right after this loop.
335  for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
336  i < e; ++i) {
337  EmittedInstrs.push_front(nullptr);
338  }
339 
340  // getMaxLookahead() is the largest number of wait states we will ever need
341  // to insert, so there is no point in keeping track of more than that many
342  // wait states.
343  EmittedInstrs.resize(getMaxLookAhead());
344 
345  CurrCycleInstr = nullptr;
346 }
347 
349  llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
350 }
351 
352 //===----------------------------------------------------------------------===//
353 // Helper Functions
354 //===----------------------------------------------------------------------===//
355 
357 
358 // Returns a minimum wait states since \p I walking all predecessors.
359 // Only scans until \p IsExpired does not return true.
360 // Can only be run in a hazard recognizer mode.
361 static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
362  MachineBasicBlock *MBB,
364  int WaitStates,
365  IsExpiredFn IsExpired,
367  for (auto E = MBB->instr_rend(); I != E; ++I) {
368  // Don't add WaitStates for parent BUNDLE instructions.
369  if (I->isBundle())
370  continue;
371 
372  if (IsHazard(&*I))
373  return WaitStates;
374 
375  if (I->isInlineAsm() || I->isImplicitDef() || I->isDebugInstr())
376  continue;
377 
378  WaitStates += SIInstrInfo::getNumWaitStates(*I);
379 
380  if (IsExpired(&*I, WaitStates))
382  }
383 
384  int MinWaitStates = WaitStates;
385  bool Found = false;
386  for (MachineBasicBlock *Pred : MBB->predecessors()) {
387  if (!Visited.insert(Pred).second)
388  continue;
389 
390  int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(),
391  WaitStates, IsExpired, Visited);
392 
394  continue;
395 
396  MinWaitStates = Found ? std::min(MinWaitStates, W) : W;
397  if (IsExpired(nullptr, MinWaitStates))
398  return MinWaitStates;
399 
400  Found = true;
401  }
402 
403  if (Found)
404  return MinWaitStates;
405 
407 }
408 
409 static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
410  MachineInstr *MI,
411  IsExpiredFn IsExpired) {
413  return getWaitStatesSince(IsHazard, MI->getParent(),
414  std::next(MI->getReverseIterator()),
415  0, IsExpired, Visited);
416 }
417 
418 int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
419  if (IsHazardRecognizerMode) {
420  auto IsExpiredFn = [Limit] (MachineInstr *, int WaitStates) {
421  return WaitStates >= Limit;
422  };
423  return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn);
424  }
425 
426  int WaitStates = 0;
427  for (MachineInstr *MI : EmittedInstrs) {
428  if (MI) {
429  if (IsHazard(MI))
430  return WaitStates;
431 
432  if (MI->isInlineAsm())
433  continue;
434  }
435  ++WaitStates;
436 
437  if (WaitStates >= Limit)
438  break;
439  }
441 }
442 
443 int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg,
444  IsHazardFn IsHazardDef,
445  int Limit) {
446  const SIRegisterInfo *TRI = ST.getRegisterInfo();
447 
448  auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) {
449  return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI);
450  };
451 
452  return getWaitStatesSince(IsHazardFn, Limit);
453 }
454 
455 int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard,
456  int Limit) {
457  auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
458  return isSSetReg(MI->getOpcode()) && IsHazard(MI);
459  };
460 
461  return getWaitStatesSince(IsHazardFn, Limit);
462 }
463 
464 //===----------------------------------------------------------------------===//
465 // No-op Hazard Detection
466 //===----------------------------------------------------------------------===//
467 
468 static void addRegUnits(const SIRegisterInfo &TRI,
469  BitVector &BV, unsigned Reg) {
470  for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI)
471  BV.set(*RUI);
472 }
473 
474 static void addRegsToSet(const SIRegisterInfo &TRI,
476  BitVector &Set) {
477  for (const MachineOperand &Op : Ops) {
478  if (Op.isReg())
479  addRegUnits(TRI, Set, Op.getReg());
480  }
481 }
482 
483 void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
484  // XXX: Do we need to worry about implicit operands
485  addRegsToSet(TRI, MI.defs(), ClauseDefs);
486  addRegsToSet(TRI, MI.uses(), ClauseUses);
487 }
488 
489 int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
490  // SMEM soft clause are only present on VI+, and only matter if xnack is
491  // enabled.
492  if (!ST.isXNACKEnabled())
493  return 0;
494 
495  bool IsSMRD = TII.isSMRD(*MEM);
496 
497  resetClause();
498 
499  // A soft-clause is any group of consecutive SMEM instructions. The
500  // instructions in this group may return out of order and/or may be
501  // replayed (i.e. the same instruction issued more than once).
502  //
503  // In order to handle these situations correctly we need to make sure that
504  // when a clause has more than one instruction, no instruction in the clause
505  // writes to a register that is read by another instruction in the clause
506  // (including itself). If we encounter this situaion, we need to break the
507  // clause by inserting a non SMEM instruction.
508 
509  for (MachineInstr *MI : EmittedInstrs) {
510  // When we hit a non-SMEM instruction then we have passed the start of the
511  // clause and we can stop.
512  if (!MI)
513  break;
514 
515  if (IsSMRD != SIInstrInfo::isSMRD(*MI))
516  break;
517 
518  addClauseInst(*MI);
519  }
520 
521  if (ClauseDefs.none())
522  return 0;
523 
524  // We need to make sure not to put loads and stores in the same clause if they
525  // use the same address. For now, just start a new clause whenever we see a
526  // store.
527  if (MEM->mayStore())
528  return 1;
529 
530  addClauseInst(*MEM);
531 
532  // If the set of defs and uses intersect then we cannot add this instruction
533  // to the clause, so we have a hazard.
534  return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0;
535 }
536 
537 int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
538  int WaitStatesNeeded = 0;
539 
540  WaitStatesNeeded = checkSoftClauseHazards(SMRD);
541 
542  // This SMRD hazard only affects SI.
543  if (!ST.hasSMRDReadVALUDefHazard())
544  return WaitStatesNeeded;
545 
546  // A read of an SGPR by SMRD instruction requires 4 wait states when the
547  // SGPR was written by a VALU instruction.
548  int SmrdSgprWaitStates = 4;
549  auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
550  auto IsBufferHazardDefFn = [this] (MachineInstr *MI) { return TII.isSALU(*MI); };
551 
552  bool IsBufferSMRD = TII.isBufferSMRD(*SMRD);
553 
554  for (const MachineOperand &Use : SMRD->uses()) {
555  if (!Use.isReg())
556  continue;
557  int WaitStatesNeededForUse =
558  SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
559  SmrdSgprWaitStates);
560  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
561 
562  // This fixes what appears to be undocumented hardware behavior in SI where
563  // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor
564  // needs some number of nops in between. We don't know how many we need, but
565  // let's use 4. This wasn't discovered before probably because the only
566  // case when this happens is when we expand a 64-bit pointer into a full
567  // descriptor and use s_buffer_load_dword instead of s_load_dword, which was
568  // probably never encountered in the closed-source land.
569  if (IsBufferSMRD) {
570  int WaitStatesNeededForUse =
571  SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
572  IsBufferHazardDefFn,
573  SmrdSgprWaitStates);
574  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
575  }
576  }
577 
578  return WaitStatesNeeded;
579 }
580 
581 int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
583  return 0;
584 
585  int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
586 
587  // A read of an SGPR by a VMEM instruction requires 5 wait states when the
588  // SGPR was written by a VALU Instruction.
589  const int VmemSgprWaitStates = 5;
590  auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
591  for (const MachineOperand &Use : VMEM->uses()) {
592  if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
593  continue;
594 
595  int WaitStatesNeededForUse =
596  VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
597  VmemSgprWaitStates);
598  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
599  }
600  return WaitStatesNeeded;
601 }
602 
603 int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
604  const SIRegisterInfo *TRI = ST.getRegisterInfo();
605  const SIInstrInfo *TII = ST.getInstrInfo();
606 
607  // Check for DPP VGPR read after VALU VGPR write and EXEC write.
608  int DppVgprWaitStates = 2;
609  int DppExecWaitStates = 5;
610  int WaitStatesNeeded = 0;
611  auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
612 
613  for (const MachineOperand &Use : DPP->uses()) {
614  if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
615  continue;
616  int WaitStatesNeededForUse =
617  DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
618  [](MachineInstr *) { return true; },
619  DppVgprWaitStates);
620  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
621  }
622 
623  WaitStatesNeeded = std::max(
624  WaitStatesNeeded,
625  DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn,
626  DppExecWaitStates));
627 
628  return WaitStatesNeeded;
629 }
630 
631 int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
632  const SIInstrInfo *TII = ST.getInstrInfo();
633 
634  // v_div_fmas requires 4 wait states after a write to vcc from a VALU
635  // instruction.
636  const int DivFMasWaitStates = 4;
637  auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
638  int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn,
639  DivFMasWaitStates);
640 
641  return DivFMasWaitStates - WaitStatesNeeded;
642 }
643 
644 int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
645  const SIInstrInfo *TII = ST.getInstrInfo();
646  unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr);
647 
648  const int GetRegWaitStates = 2;
649  auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) {
650  return GetRegHWReg == getHWReg(TII, *MI);
651  };
652  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates);
653 
654  return GetRegWaitStates - WaitStatesNeeded;
655 }
656 
657 int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
658  const SIInstrInfo *TII = ST.getInstrInfo();
659  unsigned HWReg = getHWReg(TII, *SetRegInstr);
660 
661  const int SetRegWaitStates = ST.getSetRegWaitStates();
662  auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) {
663  return HWReg == getHWReg(TII, *MI);
664  };
665  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates);
666  return SetRegWaitStates - WaitStatesNeeded;
667 }
668 
669 int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
670  if (!MI.mayStore())
671  return -1;
672 
673  const SIInstrInfo *TII = ST.getInstrInfo();
674  unsigned Opcode = MI.getOpcode();
675  const MCInstrDesc &Desc = MI.getDesc();
676 
677  int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
678  int VDataRCID = -1;
679  if (VDataIdx != -1)
680  VDataRCID = Desc.OpInfo[VDataIdx].RegClass;
681 
682  if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) {
683  // There is no hazard if the instruction does not use vector regs
684  // (like wbinvl1)
685  if (VDataIdx == -1)
686  return -1;
687  // For MUBUF/MTBUF instructions this hazard only exists if the
688  // instruction is not using a register in the soffset field.
689  const MachineOperand *SOffset =
690  TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
691  // If we have no soffset operand, then assume this field has been
692  // hardcoded to zero.
693  if (AMDGPU::getRegBitWidth(VDataRCID) > 64 &&
694  (!SOffset || !SOffset->isReg()))
695  return VDataIdx;
696  }
697 
698  // MIMG instructions create a hazard if they don't use a 256-bit T# and
699  // the store size is greater than 8 bytes and they have more than two bits
700  // of their dmask set.
701  // All our MIMG definitions use a 256-bit T#, so we can skip checking for them.
702  if (TII->isMIMG(MI)) {
703  int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
704  assert(SRsrcIdx != -1 &&
705  AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256);
706  (void)SRsrcIdx;
707  }
708 
709  if (TII->isFLAT(MI)) {
710  int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
711  if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64)
712  return DataIdx;
713  }
714 
715  return -1;
716 }
717 
718 int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
719  const MachineRegisterInfo &MRI) {
720  // Helper to check for the hazard where VMEM instructions that store more than
721  // 8 bytes can have there store data over written by the next instruction.
722  const SIRegisterInfo *TRI = ST.getRegisterInfo();
723 
724  const int VALUWaitStates = 1;
725  int WaitStatesNeeded = 0;
726 
727  if (!TRI->isVGPR(MRI, Def.getReg()))
728  return WaitStatesNeeded;
729  Register Reg = Def.getReg();
730  auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) {
731  int DataIdx = createsVALUHazard(*MI);
732  return DataIdx >= 0 &&
733  TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
734  };
735  int WaitStatesNeededForDef =
736  VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates);
737  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
738 
739  return WaitStatesNeeded;
740 }
741 
742 int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
743  // This checks for the hazard where VMEM instructions that store more than
744  // 8 bytes can have there store data over written by the next instruction.
745  if (!ST.has12DWordStoreHazard())
746  return 0;
747 
748  const MachineRegisterInfo &MRI = MF.getRegInfo();
749  int WaitStatesNeeded = 0;
750 
751  for (const MachineOperand &Def : VALU->defs()) {
752  WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI));
753  }
754 
755  return WaitStatesNeeded;
756 }
757 
758 int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
759  // This checks for hazards associated with inline asm statements.
760  // Since inline asms can contain just about anything, we use this
761  // to call/leverage other check*Hazard routines. Note that
762  // this function doesn't attempt to address all possible inline asm
763  // hazards (good luck), but is a collection of what has been
764  // problematic thus far.
765 
766  // see checkVALUHazards()
767  if (!ST.has12DWordStoreHazard())
768  return 0;
769 
770  const MachineRegisterInfo &MRI = MF.getRegInfo();
771  int WaitStatesNeeded = 0;
772 
773  for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands();
774  I != E; ++I) {
775  const MachineOperand &Op = IA->getOperand(I);
776  if (Op.isReg() && Op.isDef()) {
777  WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
778  }
779  }
780 
781  return WaitStatesNeeded;
782 }
783 
784 int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
785  const SIInstrInfo *TII = ST.getInstrInfo();
786  const SIRegisterInfo *TRI = ST.getRegisterInfo();
787  const MachineRegisterInfo &MRI = MF.getRegInfo();
788 
789  const MachineOperand *LaneSelectOp =
790  TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
791 
792  if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg()))
793  return 0;
794 
795  Register LaneSelectReg = LaneSelectOp->getReg();
796  auto IsHazardFn = [TII] (MachineInstr *MI) {
797  return TII->isVALU(*MI);
798  };
799 
800  const int RWLaneWaitStates = 4;
801  int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn,
802  RWLaneWaitStates);
803  return RWLaneWaitStates - WaitStatesSince;
804 }
805 
806 int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
807  if (!ST.hasRFEHazards())
808  return 0;
809 
810  const SIInstrInfo *TII = ST.getInstrInfo();
811 
812  const int RFEWaitStates = 1;
813 
814  auto IsHazardFn = [TII] (MachineInstr *MI) {
815  return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS;
816  };
817  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates);
818  return RFEWaitStates - WaitStatesNeeded;
819 }
820 
821 int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
822  if (MI->isDebugInstr())
823  return 0;
824 
825  const SIRegisterInfo *TRI = ST.getRegisterInfo();
826  if (!ST.hasSMovFedHazard())
827  return 0;
828 
829  // Check for any instruction reading an SGPR after a write from
830  // s_mov_fed_b32.
831  int MovFedWaitStates = 1;
832  int WaitStatesNeeded = 0;
833 
834  for (const MachineOperand &Use : MI->uses()) {
835  if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
836  continue;
837  auto IsHazardFn = [] (MachineInstr *MI) {
838  return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
839  };
840  int WaitStatesNeededForUse =
841  MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn,
842  MovFedWaitStates);
843  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
844  }
845 
846  return WaitStatesNeeded;
847 }
848 
849 int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
850  const SIInstrInfo *TII = ST.getInstrInfo();
851  const int SMovRelWaitStates = 1;
852  auto IsHazardFn = [TII] (MachineInstr *MI) {
853  return TII->isSALU(*MI);
854  };
855  return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn,
856  SMovRelWaitStates);
857 }
858 
859 void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
860  fixVMEMtoScalarWriteHazards(MI);
861  fixVcmpxPermlaneHazards(MI);
862  fixSMEMtoVectorWriteHazards(MI);
863  fixVcmpxExecWARHazard(MI);
864  fixLdsBranchVmemWARHazard(MI);
865 }
866 
867 bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
868  if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI))
869  return false;
870 
871  const SIInstrInfo *TII = ST.getInstrInfo();
872  auto IsHazardFn = [TII] (MachineInstr *MI) {
873  return TII->isVOPC(*MI);
874  };
875 
876  auto IsExpiredFn = [] (MachineInstr *MI, int) {
877  if (!MI)
878  return false;
879  unsigned Opc = MI->getOpcode();
880  return SIInstrInfo::isVALU(*MI) &&
881  Opc != AMDGPU::V_NOP_e32 &&
882  Opc != AMDGPU::V_NOP_e64 &&
883  Opc != AMDGPU::V_NOP_sdwa;
884  };
885 
886  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
888  return false;
889 
890  // V_NOP will be discarded by SQ.
891  // Use V_MOB_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE*
892  // which is always a VGPR and available.
893  auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
894  Register Reg = Src0->getReg();
895  bool IsUndef = Src0->isUndef();
896  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
897  TII->get(AMDGPU::V_MOV_B32_e32))
898  .addReg(Reg, RegState::Define | (IsUndef ? RegState::Dead : 0))
899  .addReg(Reg, IsUndef ? RegState::Undef : RegState::Kill);
900 
901  return true;
902 }
903 
904 bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
905  if (!ST.hasVMEMtoScalarWriteHazard())
906  return false;
907 
908  if (!SIInstrInfo::isSALU(*MI) && !SIInstrInfo::isSMRD(*MI))
909  return false;
910 
911  if (MI->getNumDefs() == 0)
912  return false;
913 
914  const SIRegisterInfo *TRI = ST.getRegisterInfo();
915 
916  auto IsHazardFn = [TRI, MI] (MachineInstr *I) {
917  if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isDS(*I) &&
919  return false;
920 
921  for (const MachineOperand &Def : MI->defs()) {
922  MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI);
923  if (!Op)
924  continue;
925  return true;
926  }
927  return false;
928  };
929 
930  auto IsExpiredFn = [] (MachineInstr *MI, int) {
931  return MI && (SIInstrInfo::isVALU(*MI) ||
932  (MI->getOpcode() == AMDGPU::S_WAITCNT &&
933  !MI->getOperand(0).getImm()));
934  };
935 
936  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
938  return false;
939 
940  const SIInstrInfo *TII = ST.getInstrInfo();
941  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32));
942  return true;
943 }
944 
945 bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
946  if (!ST.hasSMEMtoVectorWriteHazard())
947  return false;
948 
949  if (!SIInstrInfo::isVALU(*MI))
950  return false;
951 
952  unsigned SDSTName;
953  switch (MI->getOpcode()) {
954  case AMDGPU::V_READLANE_B32:
955  case AMDGPU::V_READLANE_B32_gfx10:
956  case AMDGPU::V_READFIRSTLANE_B32:
957  SDSTName = AMDGPU::OpName::vdst;
958  break;
959  default:
960  SDSTName = AMDGPU::OpName::sdst;
961  break;
962  }
963 
964  const SIInstrInfo *TII = ST.getInstrInfo();
965  const SIRegisterInfo *TRI = ST.getRegisterInfo();
966  const AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST.getCPU());
967  const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName);
968  if (!SDST) {
969  for (const auto &MO : MI->implicit_operands()) {
970  if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) {
971  SDST = &MO;
972  break;
973  }
974  }
975  }
976 
977  if (!SDST)
978  return false;
979 
980  const Register SDSTReg = SDST->getReg();
981  auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) {
982  return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI);
983  };
984 
985  auto IsExpiredFn = [TII, IV] (MachineInstr *MI, int) {
986  if (MI) {
987  if (TII->isSALU(*MI)) {
988  switch (MI->getOpcode()) {
989  case AMDGPU::S_SETVSKIP:
990  case AMDGPU::S_VERSION:
991  case AMDGPU::S_WAITCNT_VSCNT:
992  case AMDGPU::S_WAITCNT_VMCNT:
993  case AMDGPU::S_WAITCNT_EXPCNT:
994  // These instructions cannot not mitigate the hazard.
995  return false;
996  case AMDGPU::S_WAITCNT_LGKMCNT:
997  // Reducing lgkmcnt count to 0 always mitigates the hazard.
998  return (MI->getOperand(1).getImm() == 0) &&
999  (MI->getOperand(0).getReg() == AMDGPU::SGPR_NULL);
1000  case AMDGPU::S_WAITCNT: {
1001  const int64_t Imm = MI->getOperand(0).getImm();
1002  AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm);
1003  return (Decoded.LgkmCnt == 0);
1004  }
1005  default:
1006  // SOPP instructions cannot mitigate the hazard.
1007  if (TII->isSOPP(*MI))
1008  return false;
1009  // At this point the SALU can be assumed to mitigate the hazard
1010  // because either:
1011  // (a) it is independent of the at risk SMEM (breaking chain),
1012  // or
1013  // (b) it is dependent on the SMEM, in which case an appropriate
1014  // s_waitcnt lgkmcnt _must_ exist between it and the at risk
1015  // SMEM instruction.
1016  return true;
1017  }
1018  }
1019  }
1020  return false;
1021  };
1022 
1023  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1025  return false;
1026 
1027  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1028  TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL)
1029  .addImm(0);
1030  return true;
1031 }
1032 
1033 bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
1034  if (!ST.hasVcmpxExecWARHazard() || !SIInstrInfo::isVALU(*MI))
1035  return false;
1036 
1037  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1038  if (!MI->modifiesRegister(AMDGPU::EXEC, TRI))
1039  return false;
1040 
1041  auto IsHazardFn = [TRI] (MachineInstr *I) {
1042  if (SIInstrInfo::isVALU(*I))
1043  return false;
1044  return I->readsRegister(AMDGPU::EXEC, TRI);
1045  };
1046 
1047  const SIInstrInfo *TII = ST.getInstrInfo();
1048  auto IsExpiredFn = [TII, TRI] (MachineInstr *MI, int) {
1049  if (!MI)
1050  return false;
1051  if (SIInstrInfo::isVALU(*MI)) {
1052  if (TII->getNamedOperand(*MI, AMDGPU::OpName::sdst))
1053  return true;
1054  for (auto MO : MI->implicit_operands())
1055  if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg())))
1056  return true;
1057  }
1058  if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
1059  (MI->getOperand(0).getImm() & 0xfffe) == 0xfffe)
1060  return true;
1061  return false;
1062  };
1063 
1064  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1066  return false;
1067 
1068  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1069  TII->get(AMDGPU::S_WAITCNT_DEPCTR))
1070  .addImm(0xfffe);
1071  return true;
1072 }
1073 
1074 bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
1075  if (!ST.hasLdsBranchVmemWARHazard())
1076  return false;
1077 
1078  auto IsHazardInst = [] (const MachineInstr *MI) {
1079  if (SIInstrInfo::isDS(*MI))
1080  return 1;
1082  return 2;
1083  return 0;
1084  };
1085 
1086  auto InstType = IsHazardInst(MI);
1087  if (!InstType)
1088  return false;
1089 
1090  auto IsExpiredFn = [&IsHazardInst] (MachineInstr *I, int) {
1091  return I && (IsHazardInst(I) ||
1092  (I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1093  I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1094  !I->getOperand(1).getImm()));
1095  };
1096 
1097  auto IsHazardFn = [InstType, &IsHazardInst] (MachineInstr *I) {
1098  if (!I->isBranch())
1099  return false;
1100 
1101  auto IsHazardFn = [InstType, IsHazardInst] (MachineInstr *I) {
1102  auto InstType2 = IsHazardInst(I);
1103  return InstType2 && InstType != InstType2;
1104  };
1105 
1106  auto IsExpiredFn = [InstType, &IsHazardInst] (MachineInstr *I, int) {
1107  if (!I)
1108  return false;
1109 
1110  auto InstType2 = IsHazardInst(I);
1111  if (InstType == InstType2)
1112  return true;
1113 
1114  return I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1115  I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1116  !I->getOperand(1).getImm();
1117  };
1118 
1121  };
1122 
1123  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1125  return false;
1126 
1127  const SIInstrInfo *TII = ST.getInstrInfo();
1128  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1129  TII->get(AMDGPU::S_WAITCNT_VSCNT))
1130  .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
1131  .addImm(0);
1132 
1133  return true;
1134 }
1135 
1136 int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
1137  int NSAtoVMEMWaitStates = 1;
1138 
1139  if (!ST.hasNSAtoVMEMBug())
1140  return 0;
1141 
1142  if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isMTBUF(*MI))
1143  return 0;
1144 
1145  const SIInstrInfo *TII = ST.getInstrInfo();
1146  const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
1147  if (!Offset || (Offset->getImm() & 6) == 0)
1148  return 0;
1149 
1150  auto IsHazardFn = [TII] (MachineInstr *I) {
1151  if (!SIInstrInfo::isMIMG(*I))
1152  return false;
1153  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I->getOpcode());
1154  return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA &&
1155  TII->getInstSizeInBytes(*I) >= 16;
1156  };
1157 
1158  return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);
1159 }
1160 
1161 int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
1162  int FPAtomicToDenormModeWaitStates = 3;
1163 
1164  if (MI->getOpcode() != AMDGPU::S_DENORM_MODE)
1165  return 0;
1166 
1167  auto IsHazardFn = [] (MachineInstr *I) {
1169  return false;
1170  return SIInstrInfo::isFPAtomic(*I);
1171  };
1172 
1173  auto IsExpiredFn = [] (MachineInstr *MI, int WaitStates) {
1174  if (WaitStates >= 3 || SIInstrInfo::isVALU(*MI))
1175  return true;
1176 
1177  switch (MI->getOpcode()) {
1178  case AMDGPU::S_WAITCNT:
1179  case AMDGPU::S_WAITCNT_VSCNT:
1180  case AMDGPU::S_WAITCNT_VMCNT:
1181  case AMDGPU::S_WAITCNT_EXPCNT:
1182  case AMDGPU::S_WAITCNT_LGKMCNT:
1183  case AMDGPU::S_WAITCNT_IDLE:
1184  return true;
1185  default:
1186  break;
1187  }
1188 
1189  return false;
1190  };
1191 
1192 
1193  return FPAtomicToDenormModeWaitStates -
1194  ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn);
1195 }
1196 
1197 int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
1198  assert(SIInstrInfo::isMAI(*MI));
1199 
1200  int WaitStatesNeeded = 0;
1201  unsigned Opc = MI->getOpcode();
1202 
1203  auto IsVALUFn = [] (MachineInstr *MI) {
1204  return SIInstrInfo::isVALU(*MI);
1205  };
1206 
1207  if (Opc != AMDGPU::V_ACCVGPR_READ_B32) { // MFMA or v_accvgpr_write
1208  const int LegacyVALUWritesVGPRWaitStates = 2;
1209  const int VALUWritesExecWaitStates = 4;
1210  const int MaxWaitStates = 4;
1211 
1212  int WaitStatesNeededForUse = VALUWritesExecWaitStates -
1213  getWaitStatesSinceDef(AMDGPU::EXEC, IsVALUFn, MaxWaitStates);
1214  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1215 
1216  if (WaitStatesNeeded < MaxWaitStates) {
1217  for (const MachineOperand &Use : MI->explicit_uses()) {
1218  const int MaxWaitStates = 2;
1219 
1220  if (!Use.isReg() || !TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
1221  continue;
1222 
1223  int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates -
1224  getWaitStatesSinceDef(Use.getReg(), IsVALUFn, MaxWaitStates);
1225  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1226 
1227  if (WaitStatesNeeded == MaxWaitStates)
1228  break;
1229  }
1230  }
1231  }
1232 
1233  auto IsMFMAFn = [] (MachineInstr *MI) {
1234  return SIInstrInfo::isMAI(*MI) &&
1235  MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32 &&
1236  MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32;
1237  };
1238 
1239  for (const MachineOperand &Op : MI->explicit_operands()) {
1240  if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg()))
1241  continue;
1242 
1243  if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32)
1244  continue;
1245 
1246  const int MFMAWritesAGPROverlappedSrcABWaitStates = 4;
1247  const int MFMAWritesAGPROverlappedSrcCWaitStates = 2;
1248  const int MFMA4x4WritesAGPRAccVgprReadWaitStates = 4;
1249  const int MFMA16x16WritesAGPRAccVgprReadWaitStates = 10;
1250  const int MFMA32x32WritesAGPRAccVgprReadWaitStates = 18;
1251  const int MFMA4x4WritesAGPRAccVgprWriteWaitStates = 1;
1252  const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7;
1253  const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15;
1254  const int MaxWaitStates = 18;
1255  Register Reg = Op.getReg();
1256  unsigned HazardDefLatency = 0;
1257 
1258  auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency, this]
1259  (MachineInstr *MI) {
1260  if (!IsMFMAFn(MI))
1261  return false;
1262  Register DstReg = MI->getOperand(0).getReg();
1263  if (DstReg == Reg)
1264  return false;
1265  HazardDefLatency = std::max(HazardDefLatency,
1266  TSchedModel.computeInstrLatency(MI));
1267  return TRI.regsOverlap(DstReg, Reg);
1268  };
1269 
1270  int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn,
1271  MaxWaitStates);
1272  int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates;
1273  int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
1274  int OpNo = MI->getOperandNo(&Op);
1275  if (OpNo == SrcCIdx) {
1276  NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates;
1277  } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32) {
1278  switch (HazardDefLatency) {
1279  case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates;
1280  break;
1281  case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates;
1282  break;
1283  case 16: LLVM_FALLTHROUGH;
1284  default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates;
1285  break;
1286  }
1287  } else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
1288  switch (HazardDefLatency) {
1289  case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates;
1290  break;
1291  case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprWriteWaitStates;
1292  break;
1293  case 16: LLVM_FALLTHROUGH;
1294  default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprWriteWaitStates;
1295  break;
1296  }
1297  }
1298 
1299  int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
1300  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1301 
1302  if (WaitStatesNeeded == MaxWaitStates)
1303  return WaitStatesNeeded; // Early exit.
1304 
1305  auto IsAccVgprWriteFn = [Reg, this] (MachineInstr *MI) {
1306  if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
1307  return false;
1308  Register DstReg = MI->getOperand(0).getReg();
1309  return TRI.regsOverlap(Reg, DstReg);
1310  };
1311 
1312  const int AccVGPRWriteMFMAReadSrcCWaitStates = 1;
1313  const int AccVGPRWriteMFMAReadSrcABWaitStates = 3;
1314  const int AccVGPRWriteAccVgprReadWaitStates = 3;
1315  NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates;
1316  if (OpNo == SrcCIdx)
1317  NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates;
1318  else if (Opc == AMDGPU::V_ACCVGPR_READ_B32)
1319  NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates;
1320 
1321  WaitStatesNeededForUse = NeedWaitStates -
1322  getWaitStatesSinceDef(Reg, IsAccVgprWriteFn, MaxWaitStates);
1323  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1324 
1325  if (WaitStatesNeeded == MaxWaitStates)
1326  return WaitStatesNeeded; // Early exit.
1327  }
1328 
1329  if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
1330  const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0;
1331  const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5;
1332  const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13;
1333  const int MaxWaitStates = 13;
1334  Register DstReg = MI->getOperand(0).getReg();
1335  unsigned HazardDefLatency = 0;
1336 
1337  auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency, this]
1338  (MachineInstr *MI) {
1339  if (!IsMFMAFn(MI))
1340  return false;
1341  Register Reg = TII.getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg();
1342  HazardDefLatency = std::max(HazardDefLatency,
1343  TSchedModel.computeInstrLatency(MI));
1344  return TRI.regsOverlap(Reg, DstReg);
1345  };
1346 
1347  int WaitStatesSince = getWaitStatesSince(IsSrcCMFMAFn, MaxWaitStates);
1348  int NeedWaitStates;
1349  switch (HazardDefLatency) {
1350  case 2: NeedWaitStates = MFMA4x4ReadSrcCAccVgprWriteWaitStates;
1351  break;
1352  case 8: NeedWaitStates = MFMA16x16ReadSrcCAccVgprWriteWaitStates;
1353  break;
1354  case 16: LLVM_FALLTHROUGH;
1355  default: NeedWaitStates = MFMA32x32ReadSrcCAccVgprWriteWaitStates;
1356  break;
1357  }
1358 
1359  int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince;
1360  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1361  }
1362 
1363  return WaitStatesNeeded;
1364 }
1365 
1366 int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
1367  if (!ST.hasMAIInsts())
1368  return 0;
1369 
1370  int WaitStatesNeeded = 0;
1371 
1372  auto IsAccVgprReadFn = [] (MachineInstr *MI) {
1373  return MI->getOpcode() == AMDGPU::V_ACCVGPR_READ_B32;
1374  };
1375 
1376  for (const MachineOperand &Op : MI->explicit_uses()) {
1377  if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg()))
1378  continue;
1379 
1380  Register Reg = Op.getReg();
1381 
1382  const int AccVgprReadLdStWaitStates = 2;
1383  const int VALUWriteAccVgprReadLdStDepVALUWaitStates = 1;
1384  const int MaxWaitStates = 2;
1385 
1386  int WaitStatesNeededForUse = AccVgprReadLdStWaitStates -
1387  getWaitStatesSinceDef(Reg, IsAccVgprReadFn, MaxWaitStates);
1388  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1389 
1390  if (WaitStatesNeeded == MaxWaitStates)
1391  return WaitStatesNeeded; // Early exit.
1392 
1393  auto IsVALUAccVgprReadCheckFn = [Reg, this] (MachineInstr *MI) {
1394  if (MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32)
1395  return false;
1396  auto IsVALUFn = [] (MachineInstr *MI) {
1397  return SIInstrInfo::isVALU(*MI) && !SIInstrInfo::isMAI(*MI);
1398  };
1399  return getWaitStatesSinceDef(Reg, IsVALUFn, 2 /*MaxWaitStates*/) <
1401  };
1402 
1403  WaitStatesNeededForUse = VALUWriteAccVgprReadLdStDepVALUWaitStates -
1404  getWaitStatesSince(IsVALUAccVgprReadCheckFn, MaxWaitStates);
1405  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1406  }
1407 
1408  return WaitStatesNeeded;
1409 }
static bool isSMovRel(unsigned Opcode)
BitVector & set()
Definition: BitVector.h:397
Interface definition for SIRegisterInfo.
AMDGPU specific subclass of TargetSubtarget.
instr_iterator instr_end()
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:509
bool hasReadM0MovRelInterpHazard() const
iterator_range< mop_iterator > explicit_operands()
Definition: MachineInstr.h:482
Implements a dense probed hash-table based set.
Definition: DenseSet.h:249
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:384
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:179
static bool isVINTRP(const MachineInstr &MI)
Definition: SIInstrInfo.h:572
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:526
bool hasNSAtoVMEMBug() const
unsigned Reg
bool isInlineAsm() const
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:104
bool hasVcmpxPermlaneHazard() const
static bool isRFE(unsigned Opcode)
const SIInstrInfo * getInstrInfo() const override
static void addRegUnits(const SIRegisterInfo &TRI, BitVector &BV, unsigned Reg)
unsigned const TargetRegisterInfo * TRI
bool isAGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
static bool isSOPP(const MachineInstr &MI)
Definition: SIInstrInfo.h:387
static bool isSMRD(const MachineInstr &MI)
Definition: SIInstrInfo.h:459
function_ref< bool(MachineInstr *)> IsHazardFn
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
void EmitNoop() override
EmitNoop - This callback is invoked when a noop was added to the instruction stream.
static bool isDS(const MachineInstr &MI)
Definition: SIInstrInfo.h:469
static bool isFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:495
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
const HexagonInstrInfo * TII
bool isAlwaysGDS(uint16_t Opcode) const
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:413
static bool isMIMG(const MachineInstr &MI)
Definition: SIInstrInfo.h:479
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
bool hasLdsBranchVmemWARHazard() const
static bool isFPAtomic(const MachineInstr &MI)
Definition: SIInstrInfo.h:662
bool hasSMovFedHazard() const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
static bool isVALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:339
bool isSGPRClass(const TargetRegisterClass *RC) const
static bool isPermlane(const MachineInstr &MI)
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:407
bool isBundle() const
static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII, const MachineInstr &MI)
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:443
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
static bool isSSetReg(unsigned Opcode)
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
unsigned PreEmitNoops(SUnit *SU) override
PreEmitNoops - This callback is invoked prior to emitting an instruction.
bool hasVMEMtoScalarWriteHazard() const
function_ref< bool(MachineInstr *, int WaitStates)> IsExpiredFn
bool hasNoDataDepHazard() const
bool isBranch(QueryType Type=AnyInBundle) const
Returns true if this is a conditional, unconditional, or indirect branch.
Definition: MachineInstr.h:680
void RecedeCycle() override
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
void EmitInstruction(SUnit *SU) override
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
unsigned MaxLookAhead
MaxLookAhead - Indicate the number of cycles in the scoreboard state.
static bool isDPP(const MachineInstr &MI)
Definition: SIInstrInfo.h:556
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:843
static void addRegsToSet(const SIRegisterInfo &TRI, iterator_range< MachineInstr::const_mop_iterator > Ops, BitVector &Set)
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
unsigned const MachineRegisterInfo * MRI
bool anyCommon(const BitVector &RHS) const
Test if any common bits are set.
Definition: BitVector.h:523
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII)
static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, MachineBasicBlock *MBB, MachineBasicBlock::reverse_instr_iterator I, int WaitStates, IsExpiredFn IsExpired, DenseSet< const MachineBasicBlock *> &Visited)
bool hasSMEMtoVectorWriteHazard() const
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:498
constexpr double e
Definition: MathExtras.h:57
self_iterator getIterator()
Definition: ilist_node.h:81
iterator_range< pred_iterator > predecessors()
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
bool isImplicitDef() const
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
bool isDebugInstr() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
iterator_range< mop_iterator > explicit_uses()
Definition: MachineInstr.h:516
IsaVersion getIsaVersion(StringRef GPU)
bool hasVcmpxExecWARHazard() const
Iterator for intrusive lists based on ilist_node.
bool hasRFEHazards() const
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:390
static bool isSALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:331
HazardType getHazardType(SUnit *SU, int Stalls) override
getHazardType - Return the hazard type of emitting this node.
MachineOperand class - Representation of each machine instruction operand.
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
GCNHazardRecognizer(const MachineFunction &MF)
int64_t getImm() const
reverse_instr_iterator instr_rend()
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
A range adaptor for a pair of iterators.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
void AdvanceCycle() override
AdvanceCycle - This callback is invoked whenever the next top-down instruction to be scheduled cannot...
iterator_range< mop_iterator > implicit_operands()
Definition: MachineInstr.h:490
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:501
static bool isRWLane(unsigned Opcode)
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:255
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
bool none() const
none - Returns true if none of the bits are set.
Definition: BitVector.h:201
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
Interface definition for SIInstrInfo.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:84
static bool isMTBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:451
bool has12DWordStoreHazard() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool isVOPC(const MachineInstr &MI)
Definition: SIInstrInfo.h:435
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:76
static bool isMAI(const MachineInstr &MI)
Definition: SIInstrInfo.h:580
bool isXNACKEnabled() const
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isVMEM(const MachineInstr &MI)
Definition: SIInstrInfo.h:347
unsigned getNumDefs() const
Returns the total number of definitions.
Definition: MachineInstr.h:425
bool isKill() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:830
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSGetReg(unsigned Opcode)
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:273
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:190
bool hasMAIInsts() const
IRTranslator LLVM IR MI
Register getReg() const
getReg - Returns the register number.
static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr)
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
bool hasReadM0SendMsgHazard() const
bool isBufferSMRD(const MachineInstr &MI) const
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
unsigned PreEmitNoopsCommon(MachineInstr *)
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
const SIRegisterInfo * getRegisterInfo() const override
static bool isDivFMas(unsigned Opcode)