LLVM  14.0.0git
GCNHazardRecognizer.cpp
Go to the documentation of this file.
1 //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements hazard recognizers for scheduling on GCN processors.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "GCNHazardRecognizer.h"
14 #include "GCNSubtarget.h"
19 
20 using namespace llvm;
21 
22 //===----------------------------------------------------------------------===//
23 // Hazard Recoginizer Implementation
24 //===----------------------------------------------------------------------===//
25 
27  const GCNSubtarget &ST);
28 
30  IsHazardRecognizerMode(false),
31  CurrCycleInstr(nullptr),
32  MF(MF),
33  ST(MF.getSubtarget<GCNSubtarget>()),
34  TII(*ST.getInstrInfo()),
35  TRI(TII.getRegisterInfo()),
36  ClauseUses(TRI.getNumRegUnits()),
37  ClauseDefs(TRI.getNumRegUnits()) {
38  MaxLookAhead = MF.getRegInfo().isPhysRegUsed(AMDGPU::AGPR0) ? 19 : 5;
39  TSchedModel.init(&ST);
40  RunLdsBranchVmemWARHazardFixup = shouldRunLdsBranchVmemWARHazardFixup(MF, ST);
41 }
42 
44  EmittedInstrs.clear();
45 }
46 
49 }
50 
52  CurrCycleInstr = MI;
53 }
54 
55 static bool isDivFMas(unsigned Opcode) {
56  return Opcode == AMDGPU::V_DIV_FMAS_F32_e64 || Opcode == AMDGPU::V_DIV_FMAS_F64_e64;
57 }
58 
59 static bool isSGetReg(unsigned Opcode) {
60  return Opcode == AMDGPU::S_GETREG_B32;
61 }
62 
63 static bool isSSetReg(unsigned Opcode) {
64  switch (Opcode) {
65  case AMDGPU::S_SETREG_B32:
66  case AMDGPU::S_SETREG_B32_mode:
67  case AMDGPU::S_SETREG_IMM32_B32:
68  case AMDGPU::S_SETREG_IMM32_B32_mode:
69  return true;
70  }
71  return false;
72 }
73 
74 static bool isRWLane(unsigned Opcode) {
75  return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32;
76 }
77 
78 static bool isRFE(unsigned Opcode) {
79  return Opcode == AMDGPU::S_RFE_B64;
80 }
81 
82 static bool isSMovRel(unsigned Opcode) {
83  switch (Opcode) {
84  case AMDGPU::S_MOVRELS_B32:
85  case AMDGPU::S_MOVRELS_B64:
86  case AMDGPU::S_MOVRELD_B32:
87  case AMDGPU::S_MOVRELD_B64:
88  return true;
89  default:
90  return false;
91  }
92 }
93 
94 static bool isDGEMM(unsigned Opcode) {
95  return Opcode == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
96  Opcode == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64 ||
97  Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_e64 ||
98  Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64;
99 }
100 
101 static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI) {
102  unsigned Opcode = MI.getOpcode();
103 
104  if (!SIInstrInfo::isMAI(MI) ||
105  isDGEMM(Opcode) ||
106  Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
107  Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
108  return false;
109 
110  return true;
111 }
112 
114  const MachineInstr &MI) {
115  if (TII.isAlwaysGDS(MI.getOpcode()))
116  return true;
117 
118  switch (MI.getOpcode()) {
119  case AMDGPU::S_SENDMSG:
120  case AMDGPU::S_SENDMSGHALT:
121  case AMDGPU::S_TTRACEDATA:
122  return true;
123  // These DS opcodes don't support GDS.
124  case AMDGPU::DS_NOP:
125  case AMDGPU::DS_PERMUTE_B32:
126  case AMDGPU::DS_BPERMUTE_B32:
127  return false;
128  default:
129  if (TII.isDS(MI.getOpcode())) {
130  int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
131  AMDGPU::OpName::gds);
132  if (MI.getOperand(GDS).getImm())
133  return true;
134  }
135  return false;
136  }
137 }
138 
139 static bool isPermlane(const MachineInstr &MI) {
140  unsigned Opcode = MI.getOpcode();
141  return Opcode == AMDGPU::V_PERMLANE16_B32_e64 ||
142  Opcode == AMDGPU::V_PERMLANEX16_B32_e64;
143 }
144 
145 static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
146  const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
147  AMDGPU::OpName::simm16);
148  return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
149 }
150 
153  MachineInstr *MI = SU->getInstr();
154  // If we are not in "HazardRecognizerMode" and therefore not being run from
155  // the scheduler, track possible stalls from hazards but don't insert noops.
156  auto HazardType = IsHazardRecognizerMode ? NoopHazard : Hazard;
157 
158  if (MI->isBundle())
159  return NoHazard;
160 
161  if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
162  return HazardType;
163 
164  if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
165  return HazardType;
166 
167  if (checkFPAtomicToDenormModeHazard(MI) > 0)
168  return HazardType;
169 
170  if (ST.hasNoDataDepHazard())
171  return NoHazard;
172 
173  // FIXME: Should flat be considered vmem?
174  if ((SIInstrInfo::isVMEM(*MI) ||
176  && checkVMEMHazards(MI) > 0)
177  return HazardType;
178 
179  if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
180  return HazardType;
181 
182  if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
183  return HazardType;
184 
185  if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
186  return HazardType;
187 
188  if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
189  return HazardType;
190 
193  SIInstrInfo::isEXP(*MI)) && checkMAIVALUHazards(MI) > 0)
194  return HazardType;
195 
196  if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
197  return HazardType;
198 
199  if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
200  return HazardType;
201 
202  if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
203  return HazardType;
204 
205  if (ST.hasReadM0MovRelInterpHazard() &&
206  (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
207  checkReadM0Hazards(MI) > 0)
208  return HazardType;
209 
211  checkReadM0Hazards(MI) > 0)
212  return HazardType;
213 
214  if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0)
215  return HazardType;
216 
217  if ((SIInstrInfo::isVMEM(*MI) ||
219  SIInstrInfo::isDS(*MI)) && checkMAILdStHazards(MI) > 0)
220  return HazardType;
221 
222  if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
223  return HazardType;
224 
225  return NoHazard;
226 }
227 
229  unsigned Quantity) {
230  while (Quantity > 0) {
231  unsigned Arg = std::min(Quantity, 8u);
232  Quantity -= Arg;
233  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
234  .addImm(Arg - 1);
235  }
236 }
237 
238 void GCNHazardRecognizer::processBundle() {
239  MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator());
241  // Check bundled MachineInstr's for hazards.
242  for (; MI != E && MI->isInsideBundle(); ++MI) {
243  CurrCycleInstr = &*MI;
244  unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
245 
246  if (IsHazardRecognizerMode) {
247  fixHazards(CurrCycleInstr);
248 
249  insertNoopsInBundle(CurrCycleInstr, TII, WaitStates);
250  }
251 
252  // It’s unnecessary to track more than MaxLookAhead instructions. Since we
253  // include the bundled MI directly after, only add a maximum of
254  // (MaxLookAhead - 1) noops to EmittedInstrs.
255  for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i)
256  EmittedInstrs.push_front(nullptr);
257 
258  EmittedInstrs.push_front(CurrCycleInstr);
259  EmittedInstrs.resize(MaxLookAhead);
260  }
261  CurrCycleInstr = nullptr;
262 }
263 
265  IsHazardRecognizerMode = true;
266  CurrCycleInstr = MI;
267  unsigned W = PreEmitNoopsCommon(MI);
268  fixHazards(MI);
269  CurrCycleInstr = nullptr;
270  return W;
271 }
272 
274  if (MI->isBundle())
275  return 0;
276 
277  int WaitStates = 0;
278 
279  if (SIInstrInfo::isSMRD(*MI))
280  return std::max(WaitStates, checkSMRDHazards(MI));
281 
282  if (ST.hasNSAtoVMEMBug())
283  WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
284 
285  WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI));
286 
287  if (ST.hasNoDataDepHazard())
288  return WaitStates;
289 
291  WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
292 
293  if (SIInstrInfo::isVALU(*MI))
294  WaitStates = std::max(WaitStates, checkVALUHazards(MI));
295 
296  if (SIInstrInfo::isDPP(*MI))
297  WaitStates = std::max(WaitStates, checkDPPHazards(MI));
298 
299  if (isDivFMas(MI->getOpcode()))
300  WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
301 
302  if (isRWLane(MI->getOpcode()))
303  WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
304 
307  SIInstrInfo::isEXP(*MI)) && checkMAIVALUHazards(MI) > 0)
308  WaitStates = std::max(WaitStates, checkMAIVALUHazards(MI));
309 
310  if (MI->isInlineAsm())
311  return std::max(WaitStates, checkInlineAsmHazards(MI));
312 
313  if (isSGetReg(MI->getOpcode()))
314  return std::max(WaitStates, checkGetRegHazards(MI));
315 
316  if (isSSetReg(MI->getOpcode()))
317  return std::max(WaitStates, checkSetRegHazards(MI));
318 
319  if (isRFE(MI->getOpcode()))
320  return std::max(WaitStates, checkRFEHazards(MI));
321 
322  if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
323  isSMovRel(MI->getOpcode())))
324  return std::max(WaitStates, checkReadM0Hazards(MI));
325 
327  return std::max(WaitStates, checkReadM0Hazards(MI));
328 
329  if (SIInstrInfo::isMAI(*MI))
330  return std::max(WaitStates, checkMAIHazards(MI));
331 
332  if (SIInstrInfo::isVMEM(*MI) ||
335  return std::max(WaitStates, checkMAILdStHazards(MI));
336 
337  return WaitStates;
338 }
339 
341  EmittedInstrs.push_front(nullptr);
342 }
343 
345  // When the scheduler detects a stall, it will call AdvanceCycle() without
346  // emitting any instructions.
347  if (!CurrCycleInstr) {
348  EmittedInstrs.push_front(nullptr);
349  return;
350  }
351 
352  if (CurrCycleInstr->isBundle()) {
353  processBundle();
354  return;
355  }
356 
357  unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
358  if (!NumWaitStates) {
359  CurrCycleInstr = nullptr;
360  return;
361  }
362 
363  // Keep track of emitted instructions
364  EmittedInstrs.push_front(CurrCycleInstr);
365 
366  // Add a nullptr for each additional wait state after the first. Make sure
367  // not to add more than getMaxLookAhead() items to the list, since we
368  // truncate the list to that size right after this loop.
369  for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
370  i < e; ++i) {
371  EmittedInstrs.push_front(nullptr);
372  }
373 
374  // getMaxLookahead() is the largest number of wait states we will ever need
375  // to insert, so there is no point in keeping track of more than that many
376  // wait states.
377  EmittedInstrs.resize(getMaxLookAhead());
378 
379  CurrCycleInstr = nullptr;
380 }
381 
383  llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
384 }
385 
386 //===----------------------------------------------------------------------===//
387 // Helper Functions
388 //===----------------------------------------------------------------------===//
389 
390 typedef function_ref<bool(const MachineInstr &, int WaitStates)> IsExpiredFn;
391 
392 // Returns a minimum wait states since \p I walking all predecessors.
393 // Only scans until \p IsExpired does not return true.
394 // Can only be run in a hazard recognizer mode.
396  const MachineBasicBlock *MBB,
398  int WaitStates, IsExpiredFn IsExpired,
400  for (auto E = MBB->instr_rend(); I != E; ++I) {
401  // Don't add WaitStates for parent BUNDLE instructions.
402  if (I->isBundle())
403  continue;
404 
405  if (IsHazard(*I))
406  return WaitStates;
407 
408  if (I->isInlineAsm())
409  continue;
410 
411  WaitStates += SIInstrInfo::getNumWaitStates(*I);
412 
413  if (IsExpired(*I, WaitStates))
415  }
416 
417  int MinWaitStates = std::numeric_limits<int>::max();
418  for (MachineBasicBlock *Pred : MBB->predecessors()) {
419  if (!Visited.insert(Pred).second)
420  continue;
421 
422  int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(),
423  WaitStates, IsExpired, Visited);
424 
425  MinWaitStates = std::min(MinWaitStates, W);
426  }
427 
428  return MinWaitStates;
429 }
430 
432  const MachineInstr *MI, IsExpiredFn IsExpired) {
434  return getWaitStatesSince(IsHazard, MI->getParent(),
435  std::next(MI->getReverseIterator()),
436  0, IsExpired, Visited);
437 }
438 
439 int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
440  if (IsHazardRecognizerMode) {
441  auto IsExpiredFn = [Limit](const MachineInstr &, int WaitStates) {
442  return WaitStates >= Limit;
443  };
444  return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn);
445  }
446 
447  int WaitStates = 0;
448  for (MachineInstr *MI : EmittedInstrs) {
449  if (MI) {
450  if (IsHazard(*MI))
451  return WaitStates;
452 
453  if (MI->isInlineAsm())
454  continue;
455  }
456  ++WaitStates;
457 
458  if (WaitStates >= Limit)
459  break;
460  }
462 }
463 
464 int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg,
465  IsHazardFn IsHazardDef,
466  int Limit) {
467  const SIRegisterInfo *TRI = ST.getRegisterInfo();
468 
469  auto IsHazardFn = [IsHazardDef, TRI, Reg](const MachineInstr &MI) {
470  return IsHazardDef(MI) && MI.modifiesRegister(Reg, TRI);
471  };
472 
473  return getWaitStatesSince(IsHazardFn, Limit);
474 }
475 
476 int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard,
477  int Limit) {
478  auto IsHazardFn = [IsHazard](const MachineInstr &MI) {
479  return isSSetReg(MI.getOpcode()) && IsHazard(MI);
480  };
481 
482  return getWaitStatesSince(IsHazardFn, Limit);
483 }
484 
485 //===----------------------------------------------------------------------===//
486 // No-op Hazard Detection
487 //===----------------------------------------------------------------------===//
488 
489 static void addRegUnits(const SIRegisterInfo &TRI, BitVector &BV,
490  MCRegister Reg) {
491  for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI)
492  BV.set(*RUI);
493 }
494 
495 static void addRegsToSet(const SIRegisterInfo &TRI,
497  BitVector &Set) {
498  for (const MachineOperand &Op : Ops) {
499  if (Op.isReg())
500  addRegUnits(TRI, Set, Op.getReg().asMCReg());
501  }
502 }
503 
504 void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
505  // XXX: Do we need to worry about implicit operands
506  addRegsToSet(TRI, MI.defs(), ClauseDefs);
507  addRegsToSet(TRI, MI.uses(), ClauseUses);
508 }
509 
511  return !SIInstrInfo::isSMRD(*MI);
512 }
513 
516 }
517 
518 int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
519  // SMEM soft clause are only present on VI+, and only matter if xnack is
520  // enabled.
521  if (!ST.isXNACKEnabled())
522  return 0;
523 
524  bool IsSMRD = TII.isSMRD(*MEM);
525 
526  resetClause();
527 
528  // A soft-clause is any group of consecutive SMEM instructions. The
529  // instructions in this group may return out of order and/or may be
530  // replayed (i.e. the same instruction issued more than once).
531  //
532  // In order to handle these situations correctly we need to make sure that
533  // when a clause has more than one instruction, no instruction in the clause
534  // writes to a register that is read by another instruction in the clause
535  // (including itself). If we encounter this situaion, we need to break the
536  // clause by inserting a non SMEM instruction.
537 
538  for (MachineInstr *MI : EmittedInstrs) {
539  // When we hit a non-SMEM instruction then we have passed the start of the
540  // clause and we can stop.
541  if (!MI)
542  break;
543 
545  break;
546 
547  addClauseInst(*MI);
548  }
549 
550  if (ClauseDefs.none())
551  return 0;
552 
553  // We need to make sure not to put loads and stores in the same clause if they
554  // use the same address. For now, just start a new clause whenever we see a
555  // store.
556  if (MEM->mayStore())
557  return 1;
558 
559  addClauseInst(*MEM);
560 
561  // If the set of defs and uses intersect then we cannot add this instruction
562  // to the clause, so we have a hazard.
563  return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0;
564 }
565 
566 int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
567  int WaitStatesNeeded = 0;
568 
569  WaitStatesNeeded = checkSoftClauseHazards(SMRD);
570 
571  // This SMRD hazard only affects SI.
572  if (!ST.hasSMRDReadVALUDefHazard())
573  return WaitStatesNeeded;
574 
575  // A read of an SGPR by SMRD instruction requires 4 wait states when the
576  // SGPR was written by a VALU instruction.
577  int SmrdSgprWaitStates = 4;
578  auto IsHazardDefFn = [this](const MachineInstr &MI) {
579  return TII.isVALU(MI);
580  };
581  auto IsBufferHazardDefFn = [this](const MachineInstr &MI) {
582  return TII.isSALU(MI);
583  };
584 
585  bool IsBufferSMRD = TII.isBufferSMRD(*SMRD);
586 
587  for (const MachineOperand &Use : SMRD->uses()) {
588  if (!Use.isReg())
589  continue;
590  int WaitStatesNeededForUse =
591  SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
592  SmrdSgprWaitStates);
593  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
594 
595  // This fixes what appears to be undocumented hardware behavior in SI where
596  // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor
597  // needs some number of nops in between. We don't know how many we need, but
598  // let's use 4. This wasn't discovered before probably because the only
599  // case when this happens is when we expand a 64-bit pointer into a full
600  // descriptor and use s_buffer_load_dword instead of s_load_dword, which was
601  // probably never encountered in the closed-source land.
602  if (IsBufferSMRD) {
603  int WaitStatesNeededForUse =
604  SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
605  IsBufferHazardDefFn,
606  SmrdSgprWaitStates);
607  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
608  }
609  }
610 
611  return WaitStatesNeeded;
612 }
613 
614 int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
616  return 0;
617 
618  int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
619 
620  // A read of an SGPR by a VMEM instruction requires 5 wait states when the
621  // SGPR was written by a VALU Instruction.
622  const int VmemSgprWaitStates = 5;
623  auto IsHazardDefFn = [this](const MachineInstr &MI) {
624  return TII.isVALU(MI);
625  };
626  for (const MachineOperand &Use : VMEM->uses()) {
627  if (!Use.isReg() || TRI.isVectorRegister(MF.getRegInfo(), Use.getReg()))
628  continue;
629 
630  int WaitStatesNeededForUse =
631  VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
632  VmemSgprWaitStates);
633  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
634  }
635  return WaitStatesNeeded;
636 }
637 
638 int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
639  const SIRegisterInfo *TRI = ST.getRegisterInfo();
640  const SIInstrInfo *TII = ST.getInstrInfo();
641 
642  // Check for DPP VGPR read after VALU VGPR write and EXEC write.
643  int DppVgprWaitStates = 2;
644  int DppExecWaitStates = 5;
645  int WaitStatesNeeded = 0;
646  auto IsHazardDefFn = [TII](const MachineInstr &MI) {
647  return TII->isVALU(MI);
648  };
649 
650  for (const MachineOperand &Use : DPP->uses()) {
651  if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
652  continue;
653  int WaitStatesNeededForUse =
654  DppVgprWaitStates - getWaitStatesSinceDef(
655  Use.getReg(),
656  [](const MachineInstr &) { return true; },
657  DppVgprWaitStates);
658  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
659  }
660 
661  WaitStatesNeeded = std::max(
662  WaitStatesNeeded,
663  DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn,
664  DppExecWaitStates));
665 
666  return WaitStatesNeeded;
667 }
668 
669 int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
670  const SIInstrInfo *TII = ST.getInstrInfo();
671 
672  // v_div_fmas requires 4 wait states after a write to vcc from a VALU
673  // instruction.
674  const int DivFMasWaitStates = 4;
675  auto IsHazardDefFn = [TII](const MachineInstr &MI) {
676  return TII->isVALU(MI);
677  };
678  int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn,
679  DivFMasWaitStates);
680 
681  return DivFMasWaitStates - WaitStatesNeeded;
682 }
683 
684 int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
685  const SIInstrInfo *TII = ST.getInstrInfo();
686  unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr);
687 
688  const int GetRegWaitStates = 2;
689  auto IsHazardFn = [TII, GetRegHWReg](const MachineInstr &MI) {
690  return GetRegHWReg == getHWReg(TII, MI);
691  };
692  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates);
693 
694  return GetRegWaitStates - WaitStatesNeeded;
695 }
696 
697 int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
698  const SIInstrInfo *TII = ST.getInstrInfo();
699  unsigned HWReg = getHWReg(TII, *SetRegInstr);
700 
701  const int SetRegWaitStates = ST.getSetRegWaitStates();
702  auto IsHazardFn = [TII, HWReg](const MachineInstr &MI) {
703  return HWReg == getHWReg(TII, MI);
704  };
705  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates);
706  return SetRegWaitStates - WaitStatesNeeded;
707 }
708 
709 int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
710  if (!MI.mayStore())
711  return -1;
712 
713  const SIInstrInfo *TII = ST.getInstrInfo();
714  unsigned Opcode = MI.getOpcode();
715  const MCInstrDesc &Desc = MI.getDesc();
716 
717  int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
718  int VDataRCID = -1;
719  if (VDataIdx != -1)
720  VDataRCID = Desc.OpInfo[VDataIdx].RegClass;
721 
722  if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) {
723  // There is no hazard if the instruction does not use vector regs
724  // (like wbinvl1)
725  if (VDataIdx == -1)
726  return -1;
727  // For MUBUF/MTBUF instructions this hazard only exists if the
728  // instruction is not using a register in the soffset field.
729  const MachineOperand *SOffset =
730  TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
731  // If we have no soffset operand, then assume this field has been
732  // hardcoded to zero.
733  if (AMDGPU::getRegBitWidth(VDataRCID) > 64 &&
734  (!SOffset || !SOffset->isReg()))
735  return VDataIdx;
736  }
737 
738  // MIMG instructions create a hazard if they don't use a 256-bit T# and
739  // the store size is greater than 8 bytes and they have more than two bits
740  // of their dmask set.
741  // All our MIMG definitions use a 256-bit T#, so we can skip checking for them.
742  if (TII->isMIMG(MI)) {
743  int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
744  assert(SRsrcIdx != -1 &&
745  AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256);
746  (void)SRsrcIdx;
747  }
748 
749  if (TII->isFLAT(MI)) {
750  int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
751  if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64)
752  return DataIdx;
753  }
754 
755  return -1;
756 }
757 
758 int
759 GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
760  const MachineRegisterInfo &MRI) {
761  // Helper to check for the hazard where VMEM instructions that store more than
762  // 8 bytes can have there store data over written by the next instruction.
763  const SIRegisterInfo *TRI = ST.getRegisterInfo();
764 
765  const int VALUWaitStates = 1;
766  int WaitStatesNeeded = 0;
767 
768  if (!TRI->isVectorRegister(MRI, Def.getReg()))
769  return WaitStatesNeeded;
770  Register Reg = Def.getReg();
771  auto IsHazardFn = [this, Reg, TRI](const MachineInstr &MI) {
772  int DataIdx = createsVALUHazard(MI);
773  return DataIdx >= 0 &&
774  TRI->regsOverlap(MI.getOperand(DataIdx).getReg(), Reg);
775  };
776  int WaitStatesNeededForDef =
777  VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates);
778  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
779 
780  return WaitStatesNeeded;
781 }
782 
783 int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
784  // This checks for the hazard where VMEM instructions that store more than
785  // 8 bytes can have there store data over written by the next instruction.
786  if (!ST.has12DWordStoreHazard())
787  return 0;
788 
789  const MachineRegisterInfo &MRI = MF.getRegInfo();
790  int WaitStatesNeeded = 0;
791 
792  for (const MachineOperand &Def : VALU->defs()) {
793  WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI));
794  }
795 
796  return WaitStatesNeeded;
797 }
798 
799 int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
800  // This checks for hazards associated with inline asm statements.
801  // Since inline asms can contain just about anything, we use this
802  // to call/leverage other check*Hazard routines. Note that
803  // this function doesn't attempt to address all possible inline asm
804  // hazards (good luck), but is a collection of what has been
805  // problematic thus far.
806 
807  // see checkVALUHazards()
808  if (!ST.has12DWordStoreHazard())
809  return 0;
810 
811  const MachineRegisterInfo &MRI = MF.getRegInfo();
812  int WaitStatesNeeded = 0;
813 
814  for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands();
815  I != E; ++I) {
816  const MachineOperand &Op = IA->getOperand(I);
817  if (Op.isReg() && Op.isDef()) {
818  WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
819  }
820  }
821 
822  return WaitStatesNeeded;
823 }
824 
825 int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
826  const SIInstrInfo *TII = ST.getInstrInfo();
827  const SIRegisterInfo *TRI = ST.getRegisterInfo();
828  const MachineRegisterInfo &MRI = MF.getRegInfo();
829 
830  const MachineOperand *LaneSelectOp =
831  TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
832 
833  if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg()))
834  return 0;
835 
836  Register LaneSelectReg = LaneSelectOp->getReg();
837  auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isVALU(MI); };
838 
839  const int RWLaneWaitStates = 4;
840  int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn,
841  RWLaneWaitStates);
842  return RWLaneWaitStates - WaitStatesSince;
843 }
844 
845 int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
846  if (!ST.hasRFEHazards())
847  return 0;
848 
849  const SIInstrInfo *TII = ST.getInstrInfo();
850 
851  const int RFEWaitStates = 1;
852 
853  auto IsHazardFn = [TII](const MachineInstr &MI) {
854  return getHWReg(TII, MI) == AMDGPU::Hwreg::ID_TRAPSTS;
855  };
856  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates);
857  return RFEWaitStates - WaitStatesNeeded;
858 }
859 
860 int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
861  const SIInstrInfo *TII = ST.getInstrInfo();
862  const int SMovRelWaitStates = 1;
863  auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isSALU(MI); };
864  return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn,
865  SMovRelWaitStates);
866 }
867 
868 void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
869  fixVMEMtoScalarWriteHazards(MI);
870  fixVcmpxPermlaneHazards(MI);
871  fixSMEMtoVectorWriteHazards(MI);
872  fixVcmpxExecWARHazard(MI);
873  fixLdsBranchVmemWARHazard(MI);
874 }
875 
876 bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
877  if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI))
878  return false;
879 
880  const SIInstrInfo *TII = ST.getInstrInfo();
881  auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isVOPC(MI); };
882 
883  auto IsExpiredFn = [](const MachineInstr &MI, int) {
884  unsigned Opc = MI.getOpcode();
885  return SIInstrInfo::isVALU(MI) && Opc != AMDGPU::V_NOP_e32 &&
886  Opc != AMDGPU::V_NOP_e64 && Opc != AMDGPU::V_NOP_sdwa;
887  };
888 
889  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
891  return false;
892 
893  // V_NOP will be discarded by SQ.
894  // Use V_MOB_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE*
895  // which is always a VGPR and available.
896  auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
897  Register Reg = Src0->getReg();
898  bool IsUndef = Src0->isUndef();
899  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
900  TII->get(AMDGPU::V_MOV_B32_e32))
901  .addReg(Reg, RegState::Define | (IsUndef ? RegState::Dead : 0))
902  .addReg(Reg, IsUndef ? RegState::Undef : RegState::Kill);
903 
904  return true;
905 }
906 
907 bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
908  if (!ST.hasVMEMtoScalarWriteHazard())
909  return false;
910 
912  return false;
913 
914  if (MI->getNumDefs() == 0)
915  return false;
916 
917  const SIRegisterInfo *TRI = ST.getRegisterInfo();
918 
919  auto IsHazardFn = [TRI, MI](const MachineInstr &I) {
922  return false;
923 
924  for (const MachineOperand &Def : MI->defs()) {
925  const MachineOperand *Op =
926  I.findRegisterUseOperand(Def.getReg(), false, TRI);
927  if (!Op)
928  continue;
929  return true;
930  }
931  return false;
932  };
933 
934  auto IsExpiredFn = [](const MachineInstr &MI, int) {
935  return SIInstrInfo::isVALU(MI) ||
936  (MI.getOpcode() == AMDGPU::S_WAITCNT &&
937  !MI.getOperand(0).getImm()) ||
938  (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
939  MI.getOperand(0).getImm() == 0xffe3);
940  };
941 
942  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
944  return false;
945 
946  const SIInstrInfo *TII = ST.getInstrInfo();
947  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
948  TII->get(AMDGPU::S_WAITCNT_DEPCTR))
949  .addImm(0xffe3);
950  return true;
951 }
952 
953 bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
954  if (!ST.hasSMEMtoVectorWriteHazard())
955  return false;
956 
957  if (!SIInstrInfo::isVALU(*MI))
958  return false;
959 
960  unsigned SDSTName;
961  switch (MI->getOpcode()) {
962  case AMDGPU::V_READLANE_B32:
963  case AMDGPU::V_READFIRSTLANE_B32:
964  SDSTName = AMDGPU::OpName::vdst;
965  break;
966  default:
967  SDSTName = AMDGPU::OpName::sdst;
968  break;
969  }
970 
971  const SIInstrInfo *TII = ST.getInstrInfo();
972  const SIRegisterInfo *TRI = ST.getRegisterInfo();
973  const AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST.getCPU());
974  const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName);
975  if (!SDST) {
976  for (const auto &MO : MI->implicit_operands()) {
977  if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) {
978  SDST = &MO;
979  break;
980  }
981  }
982  }
983 
984  if (!SDST)
985  return false;
986 
987  const Register SDSTReg = SDST->getReg();
988  auto IsHazardFn = [SDSTReg, TRI](const MachineInstr &I) {
989  return SIInstrInfo::isSMRD(I) && I.readsRegister(SDSTReg, TRI);
990  };
991 
992  auto IsExpiredFn = [TII, IV](const MachineInstr &MI, int) {
993  if (TII->isSALU(MI)) {
994  switch (MI.getOpcode()) {
995  case AMDGPU::S_SETVSKIP:
996  case AMDGPU::S_VERSION:
997  case AMDGPU::S_WAITCNT_VSCNT:
998  case AMDGPU::S_WAITCNT_VMCNT:
999  case AMDGPU::S_WAITCNT_EXPCNT:
1000  // These instructions cannot not mitigate the hazard.
1001  return false;
1002  case AMDGPU::S_WAITCNT_LGKMCNT:
1003  // Reducing lgkmcnt count to 0 always mitigates the hazard.
1004  return (MI.getOperand(1).getImm() == 0) &&
1005  (MI.getOperand(0).getReg() == AMDGPU::SGPR_NULL);
1006  case AMDGPU::S_WAITCNT: {
1007  const int64_t Imm = MI.getOperand(0).getImm();
1008  AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm);
1009  return (Decoded.LgkmCnt == 0);
1010  }
1011  default:
1012  // SOPP instructions cannot mitigate the hazard.
1013  if (TII->isSOPP(MI))
1014  return false;
1015  // At this point the SALU can be assumed to mitigate the hazard
1016  // because either:
1017  // (a) it is independent of the at risk SMEM (breaking chain),
1018  // or
1019  // (b) it is dependent on the SMEM, in which case an appropriate
1020  // s_waitcnt lgkmcnt _must_ exist between it and the at risk
1021  // SMEM instruction.
1022  return true;
1023  }
1024  }
1025  return false;
1026  };
1027 
1028  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1030  return false;
1031 
1032  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1033  TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL)
1034  .addImm(0);
1035  return true;
1036 }
1037 
1038 bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
1040  return false;
1041 
1042  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1043  if (!MI->modifiesRegister(AMDGPU::EXEC, TRI))
1044  return false;
1045 
1046  auto IsHazardFn = [TRI](const MachineInstr &I) {
1047  if (SIInstrInfo::isVALU(I))
1048  return false;
1049  return I.readsRegister(AMDGPU::EXEC, TRI);
1050  };
1051 
1052  const SIInstrInfo *TII = ST.getInstrInfo();
1053  auto IsExpiredFn = [TII, TRI](const MachineInstr &MI, int) {
1054  if (SIInstrInfo::isVALU(MI)) {
1055  if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst))
1056  return true;
1057  for (auto MO : MI.implicit_operands())
1058  if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg())))
1059  return true;
1060  }
1061  if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
1062  (MI.getOperand(0).getImm() & 0xfffe) == 0xfffe)
1063  return true;
1064  return false;
1065  };
1066 
1067  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1069  return false;
1070 
1071  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1072  TII->get(AMDGPU::S_WAITCNT_DEPCTR))
1073  .addImm(0xfffe);
1074  return true;
1075 }
1076 
1078  const GCNSubtarget &ST) {
1079  if (!ST.hasLdsBranchVmemWARHazard())
1080  return false;
1081 
1082  // Check if the necessary condition for the hazard is met: both LDS and VMEM
1083  // instructions need to appear in the same function.
1084  bool HasLds = false;
1085  bool HasVmem = false;
1086  for (auto &MBB : MF) {
1087  for (auto &MI : MBB) {
1088  HasLds |= SIInstrInfo::isDS(MI);
1089  HasVmem |=
1091  if (HasLds && HasVmem)
1092  return true;
1093  }
1094  }
1095  return false;
1096 }
1097 
1098 bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
1099  if (!RunLdsBranchVmemWARHazardFixup)
1100  return false;
1101 
1103 
1104  auto IsHazardInst = [](const MachineInstr &MI) {
1105  if (SIInstrInfo::isDS(MI))
1106  return 1;
1108  return 2;
1109  return 0;
1110  };
1111 
1112  auto InstType = IsHazardInst(*MI);
1113  if (!InstType)
1114  return false;
1115 
1116  auto IsExpiredFn = [&IsHazardInst](const MachineInstr &I, int) {
1117  return IsHazardInst(I) || (I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1118  I.getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1119  !I.getOperand(1).getImm());
1120  };
1121 
1122  auto IsHazardFn = [InstType, &IsHazardInst](const MachineInstr &I) {
1123  if (!I.isBranch())
1124  return false;
1125 
1126  auto IsHazardFn = [InstType, IsHazardInst](const MachineInstr &I) {
1127  auto InstType2 = IsHazardInst(I);
1128  return InstType2 && InstType != InstType2;
1129  };
1130 
1131  auto IsExpiredFn = [InstType, &IsHazardInst](const MachineInstr &I, int) {
1132  auto InstType2 = IsHazardInst(I);
1133  if (InstType == InstType2)
1134  return true;
1135 
1136  return I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1137  I.getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1138  !I.getOperand(1).getImm();
1139  };
1140 
1143  };
1144 
1145  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1147  return false;
1148 
1149  const SIInstrInfo *TII = ST.getInstrInfo();
1150  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1151  TII->get(AMDGPU::S_WAITCNT_VSCNT))
1152  .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
1153  .addImm(0);
1154 
1155  return true;
1156 }
1157 
1158 int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
1159  int NSAtoVMEMWaitStates = 1;
1160 
1161  if (!ST.hasNSAtoVMEMBug())
1162  return 0;
1163 
1165  return 0;
1166 
1167  const SIInstrInfo *TII = ST.getInstrInfo();
1168  const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
1169  if (!Offset || (Offset->getImm() & 6) == 0)
1170  return 0;
1171 
1172  auto IsHazardFn = [TII](const MachineInstr &I) {
1173  if (!SIInstrInfo::isMIMG(I))
1174  return false;
1175  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I.getOpcode());
1176  return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA &&
1177  TII->getInstSizeInBytes(I) >= 16;
1178  };
1179 
1180  return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);
1181 }
1182 
1183 int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
1184  int FPAtomicToDenormModeWaitStates = 3;
1185 
1186  if (MI->getOpcode() != AMDGPU::S_DENORM_MODE)
1187  return 0;
1188 
1189  auto IsHazardFn = [](const MachineInstr &I) {
1191  return false;
1192  return SIInstrInfo::isFPAtomic(I);
1193  };
1194 
1195  auto IsExpiredFn = [](const MachineInstr &MI, int WaitStates) {
1196  if (WaitStates >= 3 || SIInstrInfo::isVALU(MI))
1197  return true;
1198 
1199  switch (MI.getOpcode()) {
1200  case AMDGPU::S_WAITCNT:
1201  case AMDGPU::S_WAITCNT_VSCNT:
1202  case AMDGPU::S_WAITCNT_VMCNT:
1203  case AMDGPU::S_WAITCNT_EXPCNT:
1204  case AMDGPU::S_WAITCNT_LGKMCNT:
1205  case AMDGPU::S_WAIT_IDLE:
1206  return true;
1207  default:
1208  break;
1209  }
1210 
1211  return false;
1212  };
1213 
1214  return FPAtomicToDenormModeWaitStates -
1215  ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn);
1216 }
1217 
1218 int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
1220 
1221  return ST.hasGFX90AInsts() ? checkMAIHazards90A(MI) : checkMAIHazards908(MI);
1222 }
1223 
1224 int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) {
1225  int WaitStatesNeeded = 0;
1226  unsigned Opc = MI->getOpcode();
1227 
1228  auto IsVALUFn = [](const MachineInstr &MI) {
1229  return SIInstrInfo::isVALU(MI);
1230  };
1231 
1232  if (Opc != AMDGPU::V_ACCVGPR_READ_B32_e64) { // MFMA or v_accvgpr_write
1233  const int LegacyVALUWritesVGPRWaitStates = 2;
1234  const int VALUWritesExecWaitStates = 4;
1235  const int MaxWaitStates = 4;
1236 
1237  int WaitStatesNeededForUse = VALUWritesExecWaitStates -
1238  getWaitStatesSinceDef(AMDGPU::EXEC, IsVALUFn, MaxWaitStates);
1239  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1240 
1241  if (WaitStatesNeeded < MaxWaitStates) {
1242  for (const MachineOperand &Use : MI->explicit_uses()) {
1243  const int MaxWaitStates = 2;
1244 
1245  if (!Use.isReg() || !TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
1246  continue;
1247 
1248  int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates -
1249  getWaitStatesSinceDef(Use.getReg(), IsVALUFn, MaxWaitStates);
1250  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1251 
1252  if (WaitStatesNeeded == MaxWaitStates)
1253  break;
1254  }
1255  }
1256  }
1257 
1258  auto IsMFMAFn = [](const MachineInstr &MI) {
1259  return SIInstrInfo::isMAI(MI) &&
1260  MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
1261  MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
1262  };
1263 
1264  for (const MachineOperand &Op : MI->explicit_operands()) {
1265  if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg()))
1266  continue;
1267 
1268  if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
1269  continue;
1270 
1271  const int MFMAWritesAGPROverlappedSrcABWaitStates = 4;
1272  const int MFMAWritesAGPROverlappedSrcCWaitStates = 2;
1273  const int MFMA4x4WritesAGPRAccVgprReadWaitStates = 4;
1274  const int MFMA16x16WritesAGPRAccVgprReadWaitStates = 10;
1275  const int MFMA32x32WritesAGPRAccVgprReadWaitStates = 18;
1276  const int MFMA4x4WritesAGPRAccVgprWriteWaitStates = 1;
1277  const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7;
1278  const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15;
1279  const int MaxWaitStates = 18;
1280  Register Reg = Op.getReg();
1281  unsigned HazardDefLatency = 0;
1282 
1283  auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency,
1284  this](const MachineInstr &MI) {
1285  if (!IsMFMAFn(MI))
1286  return false;
1287  Register DstReg = MI.getOperand(0).getReg();
1288  if (DstReg == Reg)
1289  return false;
1290  HazardDefLatency =
1291  std::max(HazardDefLatency, TSchedModel.computeInstrLatency(&MI));
1292  return TRI.regsOverlap(DstReg, Reg);
1293  };
1294 
1295  int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn,
1296  MaxWaitStates);
1297  int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates;
1298  int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
1299  int OpNo = MI->getOperandNo(&Op);
1300  if (OpNo == SrcCIdx) {
1301  NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates;
1302  } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64) {
1303  switch (HazardDefLatency) {
1304  case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates;
1305  break;
1306  case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates;
1307  break;
1308  case 16: LLVM_FALLTHROUGH;
1309  default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates;
1310  break;
1311  }
1312  } else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
1313  switch (HazardDefLatency) {
1314  case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates;
1315  break;
1316  case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprWriteWaitStates;
1317  break;
1318  case 16: LLVM_FALLTHROUGH;
1319  default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprWriteWaitStates;
1320  break;
1321  }
1322  }
1323 
1324  int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
1325  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1326 
1327  if (WaitStatesNeeded == MaxWaitStates)
1328  return WaitStatesNeeded; // Early exit.
1329 
1330  auto IsAccVgprWriteFn = [Reg, this](const MachineInstr &MI) {
1331  if (MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
1332  return false;
1333  Register DstReg = MI.getOperand(0).getReg();
1334  return TRI.regsOverlap(Reg, DstReg);
1335  };
1336 
1337  const int AccVGPRWriteMFMAReadSrcCWaitStates = 1;
1338  const int AccVGPRWriteMFMAReadSrcABWaitStates = 3;
1339  const int AccVGPRWriteAccVgprReadWaitStates = 3;
1340  NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates;
1341  if (OpNo == SrcCIdx)
1342  NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates;
1343  else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64)
1344  NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates;
1345 
1346  WaitStatesNeededForUse = NeedWaitStates -
1347  getWaitStatesSinceDef(Reg, IsAccVgprWriteFn, MaxWaitStates);
1348  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1349 
1350  if (WaitStatesNeeded == MaxWaitStates)
1351  return WaitStatesNeeded; // Early exit.
1352  }
1353 
1354  if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
1355  const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0;
1356  const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5;
1357  const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13;
1358  const int MaxWaitStates = 13;
1359  Register DstReg = MI->getOperand(0).getReg();
1360  unsigned HazardDefLatency = 0;
1361 
1362  auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency,
1363  this](const MachineInstr &MI) {
1364  if (!IsMFMAFn(MI))
1365  return false;
1366  Register Reg = TII.getNamedOperand(MI, AMDGPU::OpName::src2)->getReg();
1367  HazardDefLatency =
1368  std::max(HazardDefLatency, TSchedModel.computeInstrLatency(&MI));
1369  return TRI.regsOverlap(Reg, DstReg);
1370  };
1371 
1372  int WaitStatesSince = getWaitStatesSince(IsSrcCMFMAFn, MaxWaitStates);
1373  int NeedWaitStates;
1374  switch (HazardDefLatency) {
1375  case 2: NeedWaitStates = MFMA4x4ReadSrcCAccVgprWriteWaitStates;
1376  break;
1377  case 8: NeedWaitStates = MFMA16x16ReadSrcCAccVgprWriteWaitStates;
1378  break;
1379  case 16: LLVM_FALLTHROUGH;
1380  default: NeedWaitStates = MFMA32x32ReadSrcCAccVgprWriteWaitStates;
1381  break;
1382  }
1383 
1384  int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince;
1385  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1386  }
1387 
1388  return WaitStatesNeeded;
1389 }
1390 
1391 int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
1392  int WaitStatesNeeded = 0;
1393  unsigned Opc = MI->getOpcode();
1394 
1395  auto IsMFMAFn = [](const MachineInstr &MI) {
1396  return SIInstrInfo::isMAI(MI) &&
1397  MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
1398  MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
1399  };
1400 
1401  auto IsLegacyVALUFn = [&IsMFMAFn](const MachineInstr &MI) {
1402  return SIInstrInfo::isVALU(MI) && !IsMFMAFn(MI);
1403  };
1404 
1405  auto IsLegacyVALUNotDotFn = [&IsMFMAFn](const MachineInstr &MI) {
1406  return SIInstrInfo::isVALU(MI) && !IsMFMAFn(MI) && !SIInstrInfo::isDOT(MI);
1407  };
1408 
1409  if (!IsMFMAFn(*MI))
1410  return WaitStatesNeeded;
1411 
1412  const int VALUWritesExecWaitStates = 4;
1413  int WaitStatesNeededForUse = VALUWritesExecWaitStates -
1414  getWaitStatesSinceDef(AMDGPU::EXEC, IsLegacyVALUFn,
1415  VALUWritesExecWaitStates);
1416  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1417 
1418  int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
1419 
1420  // Loop for both DGEMM and S/HGEMM 2nd instruction.
1421  for (const MachineOperand &Use : MI->explicit_uses()) {
1422  const int LegacyVALUNotDotWritesVGPRWaitStates = 2;
1423  const int SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates = 2;
1424  const int SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates = 8;
1425  const int SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates = 16;
1426  const int SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates = 3;
1427  const int SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates = 9;
1428  const int SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates = 17;
1429  const int DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 9;
1430  const int DMFMA4x4WritesVGPROverlappedSrcCWaitStates = 4;
1431  const int SMFMA4x4WritesVGPROverlappedSrcABWaitStates = 5;
1432  const int SMFMA16x16WritesVGPROverlappedSrcABWaitStates = 11;
1433  const int SMFMA32x32WritesVGPROverlappedSrcABWaitStates = 19;
1434  const int DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates = 6;
1435  const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11;
1436  const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4;
1437  const int MaxWaitStates = 19;
1438 
1439  if (!Use.isReg())
1440  continue;
1441  unsigned Reg = Use.getReg();
1442  bool FullReg;
1443  const MachineInstr *MI1;
1444 
1445  auto IsOverlappedDGEMMorXDLFn = [Reg, &IsMFMAFn, &FullReg, &MI1,
1446  this](const MachineInstr &MI) {
1447  if (!IsMFMAFn(MI))
1448  return false;
1449  if (!isDGEMM(MI.getOpcode()) && !isXDL(ST, MI))
1450  return false;
1451  Register DstReg = MI.getOperand(0).getReg();
1452  FullReg = (DstReg == Reg);
1453  MI1 = &MI;
1454  return TRI.regsOverlap(DstReg, Reg);
1455  };
1456 
1457  WaitStatesNeededForUse = LegacyVALUNotDotWritesVGPRWaitStates -
1458  getWaitStatesSinceDef(Reg, IsLegacyVALUNotDotFn, MaxWaitStates);
1459  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1460 
1461  int NumWaitStates = getWaitStatesSinceDef(Reg, IsOverlappedDGEMMorXDLFn,
1462  MaxWaitStates);
1463  if (NumWaitStates == std::numeric_limits<int>::max())
1464  continue;
1465 
1466  int OpNo = MI->getOperandNo(&Use);
1467  unsigned Opc1 = MI1->getOpcode();
1468  int NeedWaitStates = 0;
1469  if (OpNo == SrcCIdx) {
1470  if (!isDGEMM(Opc) && isDGEMM(Opc1)) {
1471  NeedWaitStates = 0;
1472  } else if (FullReg) {
1473  if ((Opc == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
1474  Opc == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64) &&
1475  (Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
1476  Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64))
1477  NeedWaitStates = DMFMA4x4WritesVGPRFullSrcCWaitStates;
1478  } else {
1479  switch (Opc1) {
1480  case AMDGPU::V_MFMA_F64_16X16X4F64_e64:
1481  case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64:
1482  if (!isXDL(ST, *MI))
1483  NeedWaitStates = DMFMA16x16WritesVGPROverlappedSrcCWaitStates;
1484  break;
1485  case AMDGPU::V_MFMA_F64_4X4X4F64_e64:
1486  case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64:
1487  if (!isXDL(ST, *MI))
1488  NeedWaitStates = DMFMA4x4WritesVGPROverlappedSrcCWaitStates;
1489  break;
1490  default:
1491  switch (TSchedModel.computeInstrLatency(MI1)) {
1492  case 2:
1493  NeedWaitStates = isDGEMM(Opc)
1494  ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates
1495  : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates;
1496  break;
1497  case 8:
1498  NeedWaitStates = isDGEMM(Opc)
1499  ? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates
1500  : SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates;
1501  break;
1502  case 16: LLVM_FALLTHROUGH;
1503  default:
1504  NeedWaitStates = isDGEMM(Opc)
1505  ? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates
1506  : SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates;
1507  }
1508  }
1509  }
1510  } else {
1511  switch (Opc1) {
1512  case AMDGPU::V_MFMA_F64_16X16X4F64_e64:
1513  case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64:
1514  NeedWaitStates = DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates;
1515  break;
1516  case AMDGPU::V_MFMA_F64_4X4X4F64_e64:
1517  case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64:
1518  NeedWaitStates = DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates;
1519  break;
1520  default:
1521  switch (TSchedModel.computeInstrLatency(MI1)) {
1522  case 2:
1523  NeedWaitStates = SMFMA4x4WritesVGPROverlappedSrcABWaitStates;
1524  break;
1525  case 8:
1526  NeedWaitStates = SMFMA16x16WritesVGPROverlappedSrcABWaitStates;
1527  break;
1528  case 16: LLVM_FALLTHROUGH;
1529  default:
1530  NeedWaitStates = SMFMA32x32WritesVGPROverlappedSrcABWaitStates;
1531  }
1532  }
1533  }
1534  if (WaitStatesNeeded >= NeedWaitStates)
1535  continue;
1536 
1537  WaitStatesNeededForUse = NeedWaitStates - NumWaitStates;
1538  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1539 
1540  if (WaitStatesNeeded == MaxWaitStates)
1541  break;
1542  }
1543 
1544  return WaitStatesNeeded;
1545 }
1546 
1547 int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
1548  // On gfx90a+ releveant hazards are checked in checkMAIVALUHazards()
1549  if (!ST.hasMAIInsts() || ST.hasGFX90AInsts())
1550  return 0;
1551 
1552  int WaitStatesNeeded = 0;
1553 
1554  auto IsAccVgprReadFn = [](const MachineInstr &MI) {
1555  return MI.getOpcode() == AMDGPU::V_ACCVGPR_READ_B32_e64;
1556  };
1557 
1558  for (const MachineOperand &Op : MI->explicit_uses()) {
1559  if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg()))
1560  continue;
1561 
1562  Register Reg = Op.getReg();
1563 
1564  const int AccVgprReadLdStWaitStates = 2;
1565  const int VALUWriteAccVgprRdWrLdStDepVALUWaitStates = 1;
1566  const int MaxWaitStates = 2;
1567 
1568  int WaitStatesNeededForUse = AccVgprReadLdStWaitStates -
1569  getWaitStatesSinceDef(Reg, IsAccVgprReadFn, MaxWaitStates);
1570  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1571 
1572  if (WaitStatesNeeded == MaxWaitStates)
1573  return WaitStatesNeeded; // Early exit.
1574 
1575  auto IsVALUAccVgprRdWrCheckFn = [Reg, this](const MachineInstr &MI) {
1576  if (MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64 &&
1577  MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
1578  return false;
1579  auto IsVALUFn = [](const MachineInstr &MI) {
1581  };
1582  return getWaitStatesSinceDef(Reg, IsVALUFn, 2 /*MaxWaitStates*/) <
1584  };
1585 
1586  WaitStatesNeededForUse = VALUWriteAccVgprRdWrLdStDepVALUWaitStates -
1587  getWaitStatesSince(IsVALUAccVgprRdWrCheckFn, MaxWaitStates);
1588  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1589  }
1590 
1591  return WaitStatesNeeded;
1592 }
1593 
1594 int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
1595  if (!ST.hasGFX90AInsts())
1596  return 0;
1597 
1598  auto IsMFMAFn = [](const MachineInstr &MI) -> bool {
1599  return SIInstrInfo::isMAI(MI) &&
1600  MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
1601  MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
1602  };
1603 
1604  auto IsDGEMMFn = [](const MachineInstr &MI) -> bool {
1605  return isDGEMM(MI.getOpcode());
1606  };
1607 
1608  // This is checked in checkMAIHazards90A()
1609  if (IsMFMAFn(*MI))
1610  return 0;
1611 
1612  int WaitStatesNeeded = 0;
1613 
1614  bool IsMemOrExport = SIInstrInfo::isVMEM(*MI) ||
1615  SIInstrInfo::isFLAT(*MI) ||
1616  SIInstrInfo::isDS(*MI) ||
1618  bool IsVALU = SIInstrInfo::isVALU(*MI);
1619 
1620  const MachineInstr *MFMA = nullptr;
1621  unsigned Reg;
1622  auto IsDGEMMorXDLWriteFn = [&Reg, &IsMFMAFn, &MFMA,
1623  this](const MachineInstr &MI) {
1624  if (!IsMFMAFn(MI) || !TRI.regsOverlap(MI.getOperand(0).getReg(), Reg))
1625  return false;
1626  if (!isDGEMM(MI.getOpcode()) && !isXDL(ST, MI))
1627  return false;
1628  MFMA = &MI;
1629  return true;
1630  };
1631 
1632  const MachineInstr *DOT = nullptr;
1633  auto IsDotWriteFn = [&Reg, &DOT, this](const MachineInstr &MI) {
1634  if (!SIInstrInfo::isDOT(MI) ||
1635  !TRI.regsOverlap(MI.getOperand(0).getReg(), Reg))
1636  return false;
1637  DOT = &MI;
1638  return true;
1639  };
1640 
1641  int SrcCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1642  AMDGPU::OpName::src2);
1643 
1644  if (IsMemOrExport || IsVALU) {
1645  const int SMFMA4x4WriteVgprVALUMemExpReadWaitStates = 5;
1646  const int SMFMA16x16WriteVgprVALUMemExpReadWaitStates = 11;
1647  const int SMFMA32x32WriteVgprVALUMemExpReadWaitStates = 19;
1648  const int DMFMA4x4WriteVgprMemExpReadWaitStates = 9;
1649  const int DMFMA16x16WriteVgprMemExpReadWaitStates = 18;
1650  const int DMFMA4x4WriteVgprVALUReadWaitStates = 6;
1651  const int DMFMA16x16WriteVgprVALUReadWaitStates = 11;
1652  const int DotWriteSameDotReadSrcAB = 3;
1653  const int DotWriteDifferentVALURead = 3;
1654  const int MaxWaitStates = 19;
1655 
1656  for (const MachineOperand &Use : MI->explicit_uses()) {
1657  if (!Use.isReg())
1658  continue;
1659  Reg = Use.getReg();
1660 
1661  DOT = nullptr;
1662  int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDotWriteFn,
1663  MaxWaitStates);
1664  if (DOT) {
1665  int NeedWaitStates = 0;
1666  if (DOT->getOpcode() == MI->getOpcode()) {
1667  if (&Use - &MI->getOperand(0) != SrcCIdx)
1668  NeedWaitStates = DotWriteSameDotReadSrcAB;
1669  } else {
1670  NeedWaitStates = DotWriteDifferentVALURead;
1671  }
1672 
1673  int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
1674  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1675  }
1676 
1677  MFMA = nullptr;
1678  WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDGEMMorXDLWriteFn,
1679  MaxWaitStates);
1680  if (!MFMA)
1681  continue;
1682 
1683  unsigned HazardDefLatency = TSchedModel.computeInstrLatency(MFMA);
1684  int NeedWaitStates = MaxWaitStates;
1685  switch (HazardDefLatency) {
1686  case 2:
1687  NeedWaitStates = SMFMA4x4WriteVgprVALUMemExpReadWaitStates;
1688  break;
1689  case 4:
1690  assert(isDGEMM(MFMA->getOpcode()));
1691  NeedWaitStates =
1692  IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates
1693  : DMFMA4x4WriteVgprVALUReadWaitStates;
1694  break;
1695  case 8:
1696  NeedWaitStates = SMFMA16x16WriteVgprVALUMemExpReadWaitStates;
1697  break;
1698  case 16: LLVM_FALLTHROUGH;
1699  default:
1700  NeedWaitStates =
1701  isDGEMM(MFMA->getOpcode())
1702  ? IsMemOrExport ? DMFMA16x16WriteVgprMemExpReadWaitStates
1703  : DMFMA16x16WriteVgprVALUReadWaitStates
1704  : SMFMA32x32WriteVgprVALUMemExpReadWaitStates;
1705  break;
1706  }
1707 
1708  int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
1709  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1710 
1711  if (WaitStatesNeeded == MaxWaitStates)
1712  break;
1713  }
1714  }
1715 
1716  unsigned Opc = MI->getOpcode();
1717  const int DMFMAToFMA64WaitStates = 2;
1718  if ((Opc == AMDGPU::V_FMA_F64_e64 ||
1719  Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64 ||
1720  Opc == AMDGPU::V_FMAC_F64_dpp) &&
1721  WaitStatesNeeded < DMFMAToFMA64WaitStates) {
1722  int WaitStatesNeededForUse = DMFMAToFMA64WaitStates -
1723  getWaitStatesSince(IsDGEMMFn, DMFMAToFMA64WaitStates);
1724  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1725  }
1726 
1727  if (!IsVALU && !IsMemOrExport)
1728  return WaitStatesNeeded;
1729 
1730  for (const MachineOperand &Def : MI->defs()) {
1731  const int SMFMA4x4WriteVgprVALUWawWaitStates = 5;
1732  const int SMFMA16x16WriteVgprVALUWawWaitStates = 11;
1733  const int SMFMA32x32WriteVgprVALUWawWaitStates = 19;
1734  const int SMFMA4x4ReadVgprVALUWarWaitStates = 1;
1735  const int SMFMA16x16ReadVgprVALUWarWaitStates = 7;
1736  const int SMFMA32x32ReadVgprVALUWarWaitStates = 15;
1737  const int DMFMA4x4WriteVgprVALUWriteWaitStates = 6;
1738  const int DMFMA16x16WriteVgprVALUWriteWaitStates = 11;
1739  const int DotWriteDifferentVALUWrite = 3;
1740  const int MaxWaitStates = 19;
1741  const int MaxWarWaitStates = 15;
1742 
1743  Reg = Def.getReg();
1744 
1745  DOT = nullptr;
1746  int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDotWriteFn,
1747  MaxWaitStates);
1748  if (DOT && DOT->getOpcode() != MI->getOpcode())
1749  WaitStatesNeeded = std::max(WaitStatesNeeded, DotWriteDifferentVALUWrite -
1750  WaitStatesSinceDef);
1751 
1752  MFMA = nullptr;
1753  WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDGEMMorXDLWriteFn,
1754  MaxWaitStates);
1755  if (MFMA) {
1756  int NeedWaitStates = MaxWaitStates;
1757  switch (TSchedModel.computeInstrLatency(MFMA)) {
1758  case 2:
1759  NeedWaitStates = SMFMA4x4WriteVgprVALUWawWaitStates;
1760  break;
1761  case 4:
1762  assert(isDGEMM(MFMA->getOpcode()));
1763  NeedWaitStates = DMFMA4x4WriteVgprVALUWriteWaitStates;
1764  break;
1765  case 8:
1766  NeedWaitStates = SMFMA16x16WriteVgprVALUWawWaitStates;
1767  break;
1768  case 16: LLVM_FALLTHROUGH;
1769  default:
1770  NeedWaitStates = isDGEMM(MFMA->getOpcode())
1771  ? DMFMA16x16WriteVgprVALUWriteWaitStates
1772  : SMFMA32x32WriteVgprVALUWawWaitStates;
1773  break;
1774  }
1775 
1776  int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
1777  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1778 
1779  if (WaitStatesNeeded == MaxWaitStates)
1780  break;
1781  }
1782 
1783  auto IsSMFMAReadAsCFn = [&Reg, &IsMFMAFn, &MFMA,
1784  this](const MachineInstr &MI) {
1785  if (!IsMFMAFn(MI) || isDGEMM(MI.getOpcode()) ||
1786  !MI.readsRegister(Reg, &TRI))
1787  return false;
1788 
1789  const MachineOperand *SrcC =
1790  TII.getNamedOperand(MI, AMDGPU::OpName::src2);
1791  assert(SrcC);
1792  if (!SrcC->isReg() || !TRI.regsOverlap(SrcC->getReg(), Reg))
1793  return false;
1794 
1795  MFMA = &MI;
1796  return true;
1797  };
1798 
1799  MFMA = nullptr;
1800  int WaitStatesSinceUse = getWaitStatesSince(IsSMFMAReadAsCFn,
1801  MaxWarWaitStates);
1802  if (!MFMA)
1803  continue;
1804 
1805  unsigned HazardDefLatency = TSchedModel.computeInstrLatency(MFMA);
1806  int NeedWaitStates = MaxWaitStates;
1807  switch (HazardDefLatency) {
1808  case 2: NeedWaitStates = SMFMA4x4ReadVgprVALUWarWaitStates;
1809  break;
1810  case 8: NeedWaitStates = SMFMA16x16ReadVgprVALUWarWaitStates;
1811  break;
1812  case 16: LLVM_FALLTHROUGH;
1813  default: NeedWaitStates = SMFMA32x32ReadVgprVALUWarWaitStates;
1814  break;
1815  }
1816 
1817  int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceUse;
1818  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1819  }
1820 
1821  return WaitStatesNeeded;
1822 }
1823 
1825  if (!SU->isInstr())
1826  return false;
1827 
1828  const MachineInstr *MAI = nullptr;
1829  auto IsMFMAFn = [&MAI](const MachineInstr &MI) {
1830  MAI = nullptr;
1831  if (SIInstrInfo::isMAI(MI) &&
1832  MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
1833  MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64)
1834  MAI = &MI;
1835  return MAI != nullptr;
1836  };
1837 
1838  MachineInstr *MI = SU->getInstr();
1839  if (IsMFMAFn(*MI)) {
1840  int W = getWaitStatesSince(IsMFMAFn, 16);
1841  if (MAI)
1842  return W < (int)TSchedModel.computeInstrLatency(MAI);
1843  }
1844 
1845  return false;
1846 }
i
i
Definition: README.txt:29
ScheduleDAG.h
llvm::MachineInstr::uses
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:666
llvm::ScheduleHazardRecognizer::getMaxLookAhead
unsigned getMaxLookAhead() const
Definition: ScheduleHazardRecognizer.h:43
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:103
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::MachineRegisterInfo::isPhysRegUsed
bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
Definition: MachineRegisterInfo.cpp:585
llvm::GCNHazardRecognizer::getHazardType
HazardType getHazardType(SUnit *SU, int Stalls) override
getHazardType - Return the hazard type of emitting this node.
Definition: GCNHazardRecognizer.cpp:152
llvm::AMDGPU::getIsaVersion
IsaVersion getIsaVersion(StringRef GPU)
Definition: TargetParser.cpp:189
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:343
llvm::BitVector::none
bool none() const
none - Returns true if none of the bits are set.
Definition: BitVector.h:180
llvm::ScheduleHazardRecognizer::MaxLookAhead
unsigned MaxLookAhead
MaxLookAhead - Indicate the number of cycles in the scoreboard state.
Definition: ScheduleHazardRecognizer.h:31
llvm::SIInstrInfo::isBufferSMRD
bool isBufferSMRD(const MachineInstr &MI) const
Definition: SIInstrInfo.cpp:7573
llvm::SIInstrInfo::getNumWaitStates
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
Definition: SIInstrInfo.cpp:1639
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:199
llvm::GCNSubtarget::hasVMEMReadSGPRVALUDefHazard
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition: GCNSubtarget.h:433
llvm::GCNHazardRecognizer::GCNHazardRecognizer
GCNHazardRecognizer(const MachineFunction &MF)
Definition: GCNHazardRecognizer.cpp:29
llvm::SIInstrInfo::isEXP
static bool isEXP(const MachineInstr &MI)
Definition: SIInstrInfo.h:552
isRWLane
static bool isRWLane(unsigned Opcode)
Definition: GCNHazardRecognizer.cpp:74
llvm::GCNSubtarget::getSetRegWaitStates
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition: GCNSubtarget.h:442
llvm::AMDGPU::Hwreg::ID_MASK_
@ ID_MASK_
Definition: SIDefines.h:384
llvm::ScheduleHazardRecognizer::Hazard
@ Hazard
Definition: ScheduleHazardRecognizer.h:39
isXDL
static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI)
Definition: GCNHazardRecognizer.cpp:101
addRegUnits
static void addRegUnits(const SIRegisterInfo &TRI, BitVector &BV, MCRegister Reg)
Definition: GCNHazardRecognizer.cpp:489
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
isSGetReg
static bool isSGetReg(unsigned Opcode)
Definition: GCNHazardRecognizer.cpp:59
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
TargetParser.h
llvm::AMDGPU::getNamedOperandIdx
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
getWaitStatesSince
static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, const MachineBasicBlock *MBB, MachineBasicBlock::const_reverse_instr_iterator I, int WaitStates, IsExpiredFn IsExpired, DenseSet< const MachineBasicBlock * > &Visited)
Definition: GCNHazardRecognizer.cpp:395
llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:225
llvm::InlineAsm::MIOp_FirstOperand
@ MIOp_FirstOperand
Definition: InlineAsm.h:220
llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:213
llvm::TargetSchedModel::init
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
Definition: TargetSchedule.cpp:63
llvm::AMDGPU::IsaVersion
Instruction set architecture version.
Definition: TargetParser.h:105
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:636
llvm::SIInstrInfo::isMIMG
static bool isMIMG(const MachineInstr &MI)
Definition: SIInstrInfo.h:499
GCNSubtarget.h
getHWReg
static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr)
Definition: GCNHazardRecognizer.cpp:145
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:537
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:499
llvm::SIInstrFlags::DPP
@ DPP
Definition: SIDefines.h:49
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::SIInstrInfo::isMAI
static bool isMAI(const MachineInstr &MI)
Definition: SIInstrInfo.h:650
shouldRunLdsBranchVmemWARHazardFixup
static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF, const GCNSubtarget &ST)
Definition: GCNHazardRecognizer.cpp:1077
llvm::GCNSubtarget::hasSMRDReadVALUDefHazard
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition: GCNSubtarget.h:427
isRFE
static bool isRFE(unsigned Opcode)
Definition: GCNHazardRecognizer.cpp:78
false
Definition: StackSlotColoring.cpp:142
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::GraphProgram::DOT
@ DOT
Definition: GraphWriter.h:53
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:195
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:371
llvm::TargetRegisterInfo::regsOverlap
bool regsOverlap(Register regA, Register regB) const
Returns true if the two registers are equal or alias each other.
Definition: TargetRegisterInfo.h:418
llvm::AMDGPU::decodeWaitcnt
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
Definition: AMDGPUBaseInfo.cpp:944
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::RegState::Define
@ Define
Register definition.
Definition: MachineInstrBuilder.h:44
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:28
llvm::BitVector
Definition: BitVector.h:74
llvm::MCOperandInfo::RegClass
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:89
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::GCNSubtarget::hasVcmpxExecWARHazard
bool hasVcmpxExecWARHazard() const
Definition: GCNSubtarget.h:949
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
isSendMsgTraceDataOrGDS
static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII, const MachineInstr &MI)
Definition: GCNHazardRecognizer.cpp:113
isPermlane
static bool isPermlane(const MachineInstr &MI)
Definition: GCNHazardRecognizer.cpp:139
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::AMDGPU::getRegBitWidth
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
Definition: AMDGPUBaseInfo.cpp:1607
llvm::SIInstrInfo::isDPP
static bool isDPP(const MachineInstr &MI)
Definition: SIInstrInfo.h:618
llvm::ScheduleHazardRecognizer::HazardType
HazardType
Definition: ScheduleHazardRecognizer.h:37
llvm::SIInstrFlags::VALU
@ VALU
Definition: SIDefines.h:29
AMDGPUMCTargetDesc.h
llvm::GCNHazardRecognizer::PreEmitNoopsCommon
unsigned PreEmitNoopsCommon(MachineInstr *)
Definition: GCNHazardRecognizer.cpp:273
llvm::MachineBasicBlock::instr_rend
reverse_instr_iterator instr_rend()
Definition: MachineBasicBlock.h:258
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:321
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::GCNSubtarget::hasVMEMtoScalarWriteHazard
bool hasVMEMtoScalarWriteHazard() const
Definition: GCNSubtarget.h:933
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::SIInstrInfo::isDS
static bool isDS(const MachineInstr &MI)
Definition: SIInstrInfo.h:489
llvm::SIInstrInfo::isSMRD
static bool isSMRD(const MachineInstr &MI)
Definition: SIInstrInfo.h:479
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
addRegsToSet
static void addRegsToSet(const SIRegisterInfo &TRI, iterator_range< MachineInstr::const_mop_iterator > Ops, BitVector &Set)
Definition: GCNHazardRecognizer.cpp:495
llvm::GCNHazardRecognizer::PreEmitNoops
unsigned PreEmitNoops(MachineInstr *) override
This overload will be used when the hazard recognizer is being used by a non-scheduling pass,...
Definition: GCNHazardRecognizer.cpp:264
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SIInstrInfo::isFPAtomic
static bool isFPAtomic(const MachineInstr &MI)
Definition: SIInstrInfo.h:732
llvm::GCNSubtarget::hasReadM0MovRelInterpHazard
bool hasReadM0MovRelInterpHazard() const
Definition: GCNSubtarget.h:920
llvm::MCInstrDesc::OpInfo
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:206
llvm::GCNSubtarget::hasRFEHazards
bool hasRFEHazards() const
Definition: GCNSubtarget.h:437
llvm::AMDGPU::Waitcnt::LgkmCnt
unsigned LgkmCnt
Definition: AMDGPUBaseInfo.h:476
llvm::GCNSubtarget::hasGFX90AInsts
bool hasGFX90AInsts() const
Definition: GCNSubtarget.h:965
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::AMDGPU::getMIMGInfo
const LLVM_READONLY MIMGInfo * getMIMGInfo(unsigned Opc)
llvm::GCNSubtarget::hasNoDataDepHazard
bool hasNoDataDepHazard() const
Definition: GCNSubtarget.h:740
llvm::GCNHazardRecognizer::EmitInstruction
void EmitInstruction(SUnit *SU) override
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
Definition: GCNHazardRecognizer.cpp:47
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition: MachineBasicBlock.h:349
IsExpiredFn
function_ref< bool(const MachineInstr &, int WaitStates)> IsExpiredFn
Definition: GCNHazardRecognizer.cpp:390
llvm::GCNHazardRecognizer::EmitNoop
void EmitNoop() override
EmitNoop - This callback is invoked when a noop was added to the instruction stream.
Definition: GCNHazardRecognizer.cpp:340
breaksVMEMSoftClause
static bool breaksVMEMSoftClause(MachineInstr *MI)
Definition: GCNHazardRecognizer.cpp:514
llvm::GCNSubtarget::hasLdsBranchVmemWARHazard
bool hasLdsBranchVmemWARHazard() const
Definition: GCNSubtarget.h:953
llvm::SIInstrInfo::isSALU
static bool isSALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:351
llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition: MachineBasicBlock.h:254
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
insertNoopsInBundle
static void insertNoopsInBundle(MachineInstr *MI, const SIInstrInfo &TII, unsigned Quantity)
Definition: GCNHazardRecognizer.cpp:228
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
isSSetReg
static bool isSSetReg(unsigned Opcode)
Definition: GCNHazardRecognizer.cpp:63
llvm::SIInstrInfo::isMUBUF
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:463
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
llvm::SIInstrInfo::isDOT
static bool isDOT(const MachineInstr &MI)
Definition: SIInstrInfo.h:658
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:273
llvm::GCNHazardRecognizer::Reset
void Reset() override
Reset - This callback is invoked when a new block of instructions is about to be schedule.
Definition: GCNHazardRecognizer.cpp:43
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
isDGEMM
static bool isDGEMM(unsigned Opcode)
Definition: GCNHazardRecognizer.cpp:94
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ScheduleHazardRecognizer::NoopHazard
@ NoopHazard
Definition: ScheduleHazardRecognizer.h:40
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::GCNSubtarget::hasNSAtoVMEMBug
bool hasNSAtoVMEMBug() const
Definition: GCNSubtarget.h:957
llvm::AMDGPU::Waitcnt
Represents the counter values to wait for in an s_waitcnt instruction.
Definition: AMDGPUBaseInfo.h:473
llvm::SIInstrFlags::SMRD
@ SMRD
Definition: SIDefines.h:55
llvm::GCNHazardRecognizer::RecedeCycle
void RecedeCycle() override
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
Definition: GCNHazardRecognizer.cpp:382
llvm::ScheduleHazardRecognizer::NoHazard
@ NoHazard
Definition: ScheduleHazardRecognizer.h:38
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:321
llvm::SIInstrInfo::isSegmentSpecificFLAT
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:521
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::MachineInstr::mayStore
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:1018
llvm::SIInstrInfo::isVMEM
static bool isVMEM(const MachineInstr &MI)
Definition: SIInstrInfo.h:367
llvm::AMDGPU::MIMGInfo
Definition: AMDGPUBaseInfo.h:359
llvm::SIInstrInfo::isMTBUF
static bool isMTBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:471
llvm::MachineInstr::isBundle
bool isBundle() const
Definition: MachineInstr.h:1287
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
GCNHazardRecognizer.h
llvm::RegState::Dead
@ Dead
Unused definition.
Definition: MachineInstrBuilder.h:50
llvm::GCNHazardRecognizer::AdvanceCycle
void AdvanceCycle() override
AdvanceCycle - This callback is invoked whenever the next top-down instruction to be scheduled cannot...
Definition: GCNHazardRecognizer.cpp:344
llvm::MCRegUnitIterator
Definition: MCRegisterInfo.h:677
llvm::GCNSubtarget::isXNACKEnabled
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:532
isDivFMas
static bool isDivFMas(unsigned Opcode)
Definition: GCNHazardRecognizer.cpp:55
llvm::SUnit::isInstr
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
Definition: ScheduleDAG.h:362
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::SIInstrInfo::isVALU
static bool isVALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:359
llvm::MCRegisterInfo::DiffListIterator::isValid
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
Definition: MCRegisterInfo.h:224
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:492
llvm::GCNHazardRecognizer::ShouldPreferAnother
bool ShouldPreferAnother(SUnit *SU) override
ShouldPreferAnother - This callback may be invoked if getHazardType returns NoHazard.
Definition: GCNHazardRecognizer.cpp:1824
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::iterator_range
A range adaptor for a pair of iterators.
Definition: iterator_range.h:30
llvm::GCNHazardRecognizer::IsHazardFn
function_ref< bool(const MachineInstr &)> IsHazardFn
Definition: GCNHazardRecognizer.h:35
llvm::GCNSubtarget::hasReadM0SendMsgHazard
bool hasReadM0SendMsgHazard() const
Definition: GCNSubtarget.h:924
llvm::BitVector::anyCommon
bool anyCommon(const BitVector &RHS) const
Test if any common bits are set.
Definition: BitVector.h:469
llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
llvm::GCNSubtarget::hasSMEMtoVectorWriteHazard
bool hasSMEMtoVectorWriteHazard() const
Definition: GCNSubtarget.h:937
llvm::AMDGPU::Hwreg::ID_TRAPSTS
@ ID_TRAPSTS
Definition: SIDefines.h:363
llvm::SIInstrInfo::isFLAT
static bool isFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:515
breaksSMEMSoftClause
static bool breaksSMEMSoftClause(MachineInstr *MI)
Definition: GCNHazardRecognizer.cpp:510
llvm::GCNSubtarget::has12DWordStoreHazard
bool has12DWordStoreHazard() const
Definition: GCNSubtarget.h:911
MachineFunction.h
llvm::GCNSubtarget::hasVcmpxPermlaneHazard
bool hasVcmpxPermlaneHazard() const
Definition: GCNSubtarget.h:929
isSMovRel
static bool isSMovRel(unsigned Opcode)
Definition: GCNHazardRecognizer.cpp:82
llvm::SIInstrInfo::isVINTRP
static bool isVINTRP(const MachineInstr &MI)
Definition: SIInstrInfo.h:642
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::GCNSubtarget::hasMAIInsts
bool hasMAIInsts() const
Definition: GCNSubtarget.h:700
llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:23