LLVM  14.0.0git
GCNHazardRecognizer.cpp
Go to the documentation of this file.
1 //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements hazard recognizers for scheduling on GCN processors.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "GCNHazardRecognizer.h"
14 #include "GCNSubtarget.h"
19 
20 using namespace llvm;
21 
22 //===----------------------------------------------------------------------===//
23 // Hazard Recoginizer Implementation
24 //===----------------------------------------------------------------------===//
25 
27  const GCNSubtarget &ST);
28 
30  IsHazardRecognizerMode(false),
31  CurrCycleInstr(nullptr),
32  MF(MF),
33  ST(MF.getSubtarget<GCNSubtarget>()),
34  TII(*ST.getInstrInfo()),
35  TRI(TII.getRegisterInfo()),
36  ClauseUses(TRI.getNumRegUnits()),
37  ClauseDefs(TRI.getNumRegUnits()) {
38  MaxLookAhead = MF.getRegInfo().isPhysRegUsed(AMDGPU::AGPR0) ? 19 : 5;
39  TSchedModel.init(&ST);
40  RunLdsBranchVmemWARHazardFixup = shouldRunLdsBranchVmemWARHazardFixup(MF, ST);
41 }
42 
44  EmittedInstrs.clear();
45 }
46 
49 }
50 
52  CurrCycleInstr = MI;
53 }
54 
55 static bool isDivFMas(unsigned Opcode) {
56  return Opcode == AMDGPU::V_DIV_FMAS_F32_e64 || Opcode == AMDGPU::V_DIV_FMAS_F64_e64;
57 }
58 
59 static bool isSGetReg(unsigned Opcode) {
60  return Opcode == AMDGPU::S_GETREG_B32;
61 }
62 
63 static bool isSSetReg(unsigned Opcode) {
64  switch (Opcode) {
65  case AMDGPU::S_SETREG_B32:
66  case AMDGPU::S_SETREG_B32_mode:
67  case AMDGPU::S_SETREG_IMM32_B32:
68  case AMDGPU::S_SETREG_IMM32_B32_mode:
69  return true;
70  }
71  return false;
72 }
73 
74 static bool isRWLane(unsigned Opcode) {
75  return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32;
76 }
77 
78 static bool isRFE(unsigned Opcode) {
79  return Opcode == AMDGPU::S_RFE_B64;
80 }
81 
82 static bool isSMovRel(unsigned Opcode) {
83  switch (Opcode) {
84  case AMDGPU::S_MOVRELS_B32:
85  case AMDGPU::S_MOVRELS_B64:
86  case AMDGPU::S_MOVRELD_B32:
87  case AMDGPU::S_MOVRELD_B64:
88  return true;
89  default:
90  return false;
91  }
92 }
93 
94 static bool isDGEMM(unsigned Opcode) {
95  return Opcode == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
96  Opcode == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64 ||
97  Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_e64 ||
98  Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64;
99 }
100 
101 static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI) {
102  unsigned Opcode = MI.getOpcode();
103 
104  if (!SIInstrInfo::isMAI(MI) ||
105  isDGEMM(Opcode) ||
106  Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
107  Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
108  return false;
109 
110  return true;
111 }
112 
114  const MachineInstr &MI) {
115  if (TII.isAlwaysGDS(MI.getOpcode()))
116  return true;
117 
118  switch (MI.getOpcode()) {
119  case AMDGPU::S_SENDMSG:
120  case AMDGPU::S_SENDMSGHALT:
121  case AMDGPU::S_TTRACEDATA:
122  return true;
123  // These DS opcodes don't support GDS.
124  case AMDGPU::DS_NOP:
125  case AMDGPU::DS_PERMUTE_B32:
126  case AMDGPU::DS_BPERMUTE_B32:
127  return false;
128  default:
129  if (TII.isDS(MI.getOpcode())) {
130  int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
131  AMDGPU::OpName::gds);
132  if (MI.getOperand(GDS).getImm())
133  return true;
134  }
135  return false;
136  }
137 }
138 
139 static bool isPermlane(const MachineInstr &MI) {
140  unsigned Opcode = MI.getOpcode();
141  return Opcode == AMDGPU::V_PERMLANE16_B32_e64 ||
142  Opcode == AMDGPU::V_PERMLANEX16_B32_e64;
143 }
144 
145 static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
146  const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
147  AMDGPU::OpName::simm16);
148  return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
149 }
150 
153  MachineInstr *MI = SU->getInstr();
154  // If we are not in "HazardRecognizerMode" and therefore not being run from
155  // the scheduler, track possible stalls from hazards but don't insert noops.
156  auto HazardType = IsHazardRecognizerMode ? NoopHazard : Hazard;
157 
158  if (MI->isBundle())
159  return NoHazard;
160 
161  if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
162  return HazardType;
163 
164  if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
165  return HazardType;
166 
167  if (checkFPAtomicToDenormModeHazard(MI) > 0)
168  return HazardType;
169 
170  if (ST.hasNoDataDepHazard())
171  return NoHazard;
172 
173  // FIXME: Should flat be considered vmem?
174  if ((SIInstrInfo::isVMEM(*MI) ||
176  && checkVMEMHazards(MI) > 0)
177  return HazardType;
178 
179  if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
180  return HazardType;
181 
182  if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
183  return HazardType;
184 
185  if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
186  return HazardType;
187 
188  if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
189  return HazardType;
190 
193  SIInstrInfo::isEXP(*MI)) && checkMAIVALUHazards(MI) > 0)
194  return HazardType;
195 
196  if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
197  return HazardType;
198 
199  if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
200  return HazardType;
201 
202  if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
203  return HazardType;
204 
205  if (ST.hasReadM0MovRelInterpHazard() &&
206  (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
207  checkReadM0Hazards(MI) > 0)
208  return HazardType;
209 
211  checkReadM0Hazards(MI) > 0)
212  return HazardType;
213 
214  if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0)
215  return HazardType;
216 
217  if ((SIInstrInfo::isVMEM(*MI) ||
219  SIInstrInfo::isDS(*MI)) && checkMAILdStHazards(MI) > 0)
220  return HazardType;
221 
222  if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
223  return HazardType;
224 
225  return NoHazard;
226 }
227 
229  unsigned Quantity) {
230  while (Quantity > 0) {
231  unsigned Arg = std::min(Quantity, 8u);
232  Quantity -= Arg;
233  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
234  .addImm(Arg - 1);
235  }
236 }
237 
238 void GCNHazardRecognizer::processBundle() {
239  MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator());
241  // Check bundled MachineInstr's for hazards.
242  for (; MI != E && MI->isInsideBundle(); ++MI) {
243  CurrCycleInstr = &*MI;
244  unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
245 
246  if (IsHazardRecognizerMode) {
247  fixHazards(CurrCycleInstr);
248 
249  insertNoopsInBundle(CurrCycleInstr, TII, WaitStates);
250  }
251 
252  // It’s unnecessary to track more than MaxLookAhead instructions. Since we
253  // include the bundled MI directly after, only add a maximum of
254  // (MaxLookAhead - 1) noops to EmittedInstrs.
255  for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i)
256  EmittedInstrs.push_front(nullptr);
257 
258  EmittedInstrs.push_front(CurrCycleInstr);
259  EmittedInstrs.resize(MaxLookAhead);
260  }
261  CurrCycleInstr = nullptr;
262 }
263 
265  IsHazardRecognizerMode = true;
266  CurrCycleInstr = MI;
267  unsigned W = PreEmitNoopsCommon(MI);
268  fixHazards(MI);
269  CurrCycleInstr = nullptr;
270  return W;
271 }
272 
274  if (MI->isBundle())
275  return 0;
276 
277  int WaitStates = 0;
278 
279  if (SIInstrInfo::isSMRD(*MI))
280  return std::max(WaitStates, checkSMRDHazards(MI));
281 
282  if (ST.hasNSAtoVMEMBug())
283  WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
284 
285  WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI));
286 
287  if (ST.hasNoDataDepHazard())
288  return WaitStates;
289 
291  WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
292 
293  if (SIInstrInfo::isVALU(*MI))
294  WaitStates = std::max(WaitStates, checkVALUHazards(MI));
295 
296  if (SIInstrInfo::isDPP(*MI))
297  WaitStates = std::max(WaitStates, checkDPPHazards(MI));
298 
299  if (isDivFMas(MI->getOpcode()))
300  WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
301 
302  if (isRWLane(MI->getOpcode()))
303  WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
304 
307  SIInstrInfo::isEXP(*MI)) && checkMAIVALUHazards(MI) > 0)
308  WaitStates = std::max(WaitStates, checkMAIVALUHazards(MI));
309 
310  if (MI->isInlineAsm())
311  return std::max(WaitStates, checkInlineAsmHazards(MI));
312 
313  if (isSGetReg(MI->getOpcode()))
314  return std::max(WaitStates, checkGetRegHazards(MI));
315 
316  if (isSSetReg(MI->getOpcode()))
317  return std::max(WaitStates, checkSetRegHazards(MI));
318 
319  if (isRFE(MI->getOpcode()))
320  return std::max(WaitStates, checkRFEHazards(MI));
321 
322  if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
323  isSMovRel(MI->getOpcode())))
324  return std::max(WaitStates, checkReadM0Hazards(MI));
325 
327  return std::max(WaitStates, checkReadM0Hazards(MI));
328 
329  if (SIInstrInfo::isMAI(*MI))
330  return std::max(WaitStates, checkMAIHazards(MI));
331 
332  if (SIInstrInfo::isVMEM(*MI) ||
335  return std::max(WaitStates, checkMAILdStHazards(MI));
336 
337  return WaitStates;
338 }
339 
341  EmittedInstrs.push_front(nullptr);
342 }
343 
345  // When the scheduler detects a stall, it will call AdvanceCycle() without
346  // emitting any instructions.
347  if (!CurrCycleInstr) {
348  EmittedInstrs.push_front(nullptr);
349  return;
350  }
351 
352  if (CurrCycleInstr->isBundle()) {
353  processBundle();
354  return;
355  }
356 
357  unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
358  if (!NumWaitStates) {
359  CurrCycleInstr = nullptr;
360  return;
361  }
362 
363  // Keep track of emitted instructions
364  EmittedInstrs.push_front(CurrCycleInstr);
365 
366  // Add a nullptr for each additional wait state after the first. Make sure
367  // not to add more than getMaxLookAhead() items to the list, since we
368  // truncate the list to that size right after this loop.
369  for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
370  i < e; ++i) {
371  EmittedInstrs.push_front(nullptr);
372  }
373 
374  // getMaxLookahead() is the largest number of wait states we will ever need
375  // to insert, so there is no point in keeping track of more than that many
376  // wait states.
377  EmittedInstrs.resize(getMaxLookAhead());
378 
379  CurrCycleInstr = nullptr;
380 }
381 
383  llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
384 }
385 
386 //===----------------------------------------------------------------------===//
387 // Helper Functions
388 //===----------------------------------------------------------------------===//
389 
390 typedef function_ref<bool(const MachineInstr &, int WaitStates)> IsExpiredFn;
391 
392 // Returns a minimum wait states since \p I walking all predecessors.
393 // Only scans until \p IsExpired does not return true.
394 // Can only be run in a hazard recognizer mode.
396  const MachineBasicBlock *MBB,
398  int WaitStates, IsExpiredFn IsExpired,
400  for (auto E = MBB->instr_rend(); I != E; ++I) {
401  // Don't add WaitStates for parent BUNDLE instructions.
402  if (I->isBundle())
403  continue;
404 
405  if (IsHazard(*I))
406  return WaitStates;
407 
408  if (I->isInlineAsm())
409  continue;
410 
411  WaitStates += SIInstrInfo::getNumWaitStates(*I);
412 
413  if (IsExpired(*I, WaitStates))
415  }
416 
417  int MinWaitStates = std::numeric_limits<int>::max();
418  for (MachineBasicBlock *Pred : MBB->predecessors()) {
419  if (!Visited.insert(Pred).second)
420  continue;
421 
422  int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(),
423  WaitStates, IsExpired, Visited);
424 
425  MinWaitStates = std::min(MinWaitStates, W);
426  }
427 
428  return MinWaitStates;
429 }
430 
432  const MachineInstr *MI, IsExpiredFn IsExpired) {
434  return getWaitStatesSince(IsHazard, MI->getParent(),
435  std::next(MI->getReverseIterator()),
436  0, IsExpired, Visited);
437 }
438 
439 int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
440  if (IsHazardRecognizerMode) {
441  auto IsExpiredFn = [Limit](const MachineInstr &, int WaitStates) {
442  return WaitStates >= Limit;
443  };
444  return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn);
445  }
446 
447  int WaitStates = 0;
448  for (MachineInstr *MI : EmittedInstrs) {
449  if (MI) {
450  if (IsHazard(*MI))
451  return WaitStates;
452 
453  if (MI->isInlineAsm())
454  continue;
455  }
456  ++WaitStates;
457 
458  if (WaitStates >= Limit)
459  break;
460  }
462 }
463 
464 int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg,
465  IsHazardFn IsHazardDef,
466  int Limit) {
467  const SIRegisterInfo *TRI = ST.getRegisterInfo();
468 
469  auto IsHazardFn = [IsHazardDef, TRI, Reg](const MachineInstr &MI) {
470  return IsHazardDef(MI) && MI.modifiesRegister(Reg, TRI);
471  };
472 
473  return getWaitStatesSince(IsHazardFn, Limit);
474 }
475 
476 int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard,
477  int Limit) {
478  auto IsHazardFn = [IsHazard](const MachineInstr &MI) {
479  return isSSetReg(MI.getOpcode()) && IsHazard(MI);
480  };
481 
482  return getWaitStatesSince(IsHazardFn, Limit);
483 }
484 
485 //===----------------------------------------------------------------------===//
486 // No-op Hazard Detection
487 //===----------------------------------------------------------------------===//
488 
489 static void addRegUnits(const SIRegisterInfo &TRI, BitVector &BV,
490  MCRegister Reg) {
491  for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI)
492  BV.set(*RUI);
493 }
494 
495 static void addRegsToSet(const SIRegisterInfo &TRI,
497  BitVector &Set) {
498  for (const MachineOperand &Op : Ops) {
499  if (Op.isReg())
500  addRegUnits(TRI, Set, Op.getReg().asMCReg());
501  }
502 }
503 
504 void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
505  // XXX: Do we need to worry about implicit operands
506  addRegsToSet(TRI, MI.defs(), ClauseDefs);
507  addRegsToSet(TRI, MI.uses(), ClauseUses);
508 }
509 
511  return !SIInstrInfo::isSMRD(*MI);
512 }
513 
516 }
517 
518 int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
519  // SMEM soft clause are only present on VI+, and only matter if xnack is
520  // enabled.
521  if (!ST.isXNACKEnabled())
522  return 0;
523 
524  bool IsSMRD = TII.isSMRD(*MEM);
525 
526  resetClause();
527 
528  // A soft-clause is any group of consecutive SMEM instructions. The
529  // instructions in this group may return out of order and/or may be
530  // replayed (i.e. the same instruction issued more than once).
531  //
532  // In order to handle these situations correctly we need to make sure that
533  // when a clause has more than one instruction, no instruction in the clause
534  // writes to a register that is read by another instruction in the clause
535  // (including itself). If we encounter this situaion, we need to break the
536  // clause by inserting a non SMEM instruction.
537 
538  for (MachineInstr *MI : EmittedInstrs) {
539  // When we hit a non-SMEM instruction then we have passed the start of the
540  // clause and we can stop.
541  if (!MI)
542  break;
543 
545  break;
546 
547  addClauseInst(*MI);
548  }
549 
550  if (ClauseDefs.none())
551  return 0;
552 
553  // We need to make sure not to put loads and stores in the same clause if they
554  // use the same address. For now, just start a new clause whenever we see a
555  // store.
556  if (MEM->mayStore())
557  return 1;
558 
559  addClauseInst(*MEM);
560 
561  // If the set of defs and uses intersect then we cannot add this instruction
562  // to the clause, so we have a hazard.
563  return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0;
564 }
565 
566 int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
567  int WaitStatesNeeded = 0;
568 
569  WaitStatesNeeded = checkSoftClauseHazards(SMRD);
570 
571  // This SMRD hazard only affects SI.
572  if (!ST.hasSMRDReadVALUDefHazard())
573  return WaitStatesNeeded;
574 
575  // A read of an SGPR by SMRD instruction requires 4 wait states when the
576  // SGPR was written by a VALU instruction.
577  int SmrdSgprWaitStates = 4;
578  auto IsHazardDefFn = [this](const MachineInstr &MI) {
579  return TII.isVALU(MI);
580  };
581  auto IsBufferHazardDefFn = [this](const MachineInstr &MI) {
582  return TII.isSALU(MI);
583  };
584 
585  bool IsBufferSMRD = TII.isBufferSMRD(*SMRD);
586 
587  for (const MachineOperand &Use : SMRD->uses()) {
588  if (!Use.isReg())
589  continue;
590  int WaitStatesNeededForUse =
591  SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
592  SmrdSgprWaitStates);
593  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
594 
595  // This fixes what appears to be undocumented hardware behavior in SI where
596  // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor
597  // needs some number of nops in between. We don't know how many we need, but
598  // let's use 4. This wasn't discovered before probably because the only
599  // case when this happens is when we expand a 64-bit pointer into a full
600  // descriptor and use s_buffer_load_dword instead of s_load_dword, which was
601  // probably never encountered in the closed-source land.
602  if (IsBufferSMRD) {
603  int WaitStatesNeededForUse =
604  SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
605  IsBufferHazardDefFn,
606  SmrdSgprWaitStates);
607  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
608  }
609  }
610 
611  return WaitStatesNeeded;
612 }
613 
614 int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
616  return 0;
617 
618  int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
619 
620  // A read of an SGPR by a VMEM instruction requires 5 wait states when the
621  // SGPR was written by a VALU Instruction.
622  const int VmemSgprWaitStates = 5;
623  auto IsHazardDefFn = [this](const MachineInstr &MI) {
624  return TII.isVALU(MI);
625  };
626  for (const MachineOperand &Use : VMEM->uses()) {
627  if (!Use.isReg() || TRI.isVectorRegister(MF.getRegInfo(), Use.getReg()))
628  continue;
629 
630  int WaitStatesNeededForUse =
631  VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
632  VmemSgprWaitStates);
633  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
634  }
635  return WaitStatesNeeded;
636 }
637 
638 int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
639  const SIRegisterInfo *TRI = ST.getRegisterInfo();
640  const SIInstrInfo *TII = ST.getInstrInfo();
641 
642  // Check for DPP VGPR read after VALU VGPR write and EXEC write.
643  int DppVgprWaitStates = 2;
644  int DppExecWaitStates = 5;
645  int WaitStatesNeeded = 0;
646  auto IsHazardDefFn = [TII](const MachineInstr &MI) {
647  return TII->isVALU(MI);
648  };
649 
650  for (const MachineOperand &Use : DPP->uses()) {
651  if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
652  continue;
653  int WaitStatesNeededForUse =
654  DppVgprWaitStates - getWaitStatesSinceDef(
655  Use.getReg(),
656  [](const MachineInstr &) { return true; },
657  DppVgprWaitStates);
658  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
659  }
660 
661  WaitStatesNeeded = std::max(
662  WaitStatesNeeded,
663  DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn,
664  DppExecWaitStates));
665 
666  return WaitStatesNeeded;
667 }
668 
669 int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
670  const SIInstrInfo *TII = ST.getInstrInfo();
671 
672  // v_div_fmas requires 4 wait states after a write to vcc from a VALU
673  // instruction.
674  const int DivFMasWaitStates = 4;
675  auto IsHazardDefFn = [TII](const MachineInstr &MI) {
676  return TII->isVALU(MI);
677  };
678  int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn,
679  DivFMasWaitStates);
680 
681  return DivFMasWaitStates - WaitStatesNeeded;
682 }
683 
684 int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
685  const SIInstrInfo *TII = ST.getInstrInfo();
686  unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr);
687 
688  const int GetRegWaitStates = 2;
689  auto IsHazardFn = [TII, GetRegHWReg](const MachineInstr &MI) {
690  return GetRegHWReg == getHWReg(TII, MI);
691  };
692  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates);
693 
694  return GetRegWaitStates - WaitStatesNeeded;
695 }
696 
697 int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
698  const SIInstrInfo *TII = ST.getInstrInfo();
699  unsigned HWReg = getHWReg(TII, *SetRegInstr);
700 
701  const int SetRegWaitStates = ST.getSetRegWaitStates();
702  auto IsHazardFn = [TII, HWReg](const MachineInstr &MI) {
703  return HWReg == getHWReg(TII, MI);
704  };
705  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates);
706  return SetRegWaitStates - WaitStatesNeeded;
707 }
708 
709 int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
710  if (!MI.mayStore())
711  return -1;
712 
713  const SIInstrInfo *TII = ST.getInstrInfo();
714  unsigned Opcode = MI.getOpcode();
715  const MCInstrDesc &Desc = MI.getDesc();
716 
717  int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
718  int VDataRCID = -1;
719  if (VDataIdx != -1)
720  VDataRCID = Desc.OpInfo[VDataIdx].RegClass;
721 
722  if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) {
723  // There is no hazard if the instruction does not use vector regs
724  // (like wbinvl1)
725  if (VDataIdx == -1)
726  return -1;
727  // For MUBUF/MTBUF instructions this hazard only exists if the
728  // instruction is not using a register in the soffset field.
729  const MachineOperand *SOffset =
730  TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
731  // If we have no soffset operand, then assume this field has been
732  // hardcoded to zero.
733  if (AMDGPU::getRegBitWidth(VDataRCID) > 64 &&
734  (!SOffset || !SOffset->isReg()))
735  return VDataIdx;
736  }
737 
738  // MIMG instructions create a hazard if they don't use a 256-bit T# and
739  // the store size is greater than 8 bytes and they have more than two bits
740  // of their dmask set.
741  // All our MIMG definitions use a 256-bit T#, so we can skip checking for them.
742  if (TII->isMIMG(MI)) {
743  int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
744  assert(SRsrcIdx != -1 &&
745  AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256);
746  (void)SRsrcIdx;
747  }
748 
749  if (TII->isFLAT(MI)) {
750  int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
751  if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64)
752  return DataIdx;
753  }
754 
755  return -1;
756 }
757 
758 int
759 GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
760  const MachineRegisterInfo &MRI) {
761  // Helper to check for the hazard where VMEM instructions that store more than
762  // 8 bytes can have there store data over written by the next instruction.
763  const SIRegisterInfo *TRI = ST.getRegisterInfo();
764 
765  const int VALUWaitStates = 1;
766  int WaitStatesNeeded = 0;
767 
768  if (!TRI->isVectorRegister(MRI, Def.getReg()))
769  return WaitStatesNeeded;
770  Register Reg = Def.getReg();
771  auto IsHazardFn = [this, Reg, TRI](const MachineInstr &MI) {
772  int DataIdx = createsVALUHazard(MI);
773  return DataIdx >= 0 &&
774  TRI->regsOverlap(MI.getOperand(DataIdx).getReg(), Reg);
775  };
776  int WaitStatesNeededForDef =
777  VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates);
778  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
779 
780  return WaitStatesNeeded;
781 }
782 
783 int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
784  // This checks for the hazard where VMEM instructions that store more than
785  // 8 bytes can have there store data over written by the next instruction.
786  if (!ST.has12DWordStoreHazard())
787  return 0;
788 
789  const MachineRegisterInfo &MRI = MF.getRegInfo();
790  int WaitStatesNeeded = 0;
791 
792  for (const MachineOperand &Def : VALU->defs()) {
793  WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI));
794  }
795 
796  return WaitStatesNeeded;
797 }
798 
799 int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
800  // This checks for hazards associated with inline asm statements.
801  // Since inline asms can contain just about anything, we use this
802  // to call/leverage other check*Hazard routines. Note that
803  // this function doesn't attempt to address all possible inline asm
804  // hazards (good luck), but is a collection of what has been
805  // problematic thus far.
806 
807  // see checkVALUHazards()
808  if (!ST.has12DWordStoreHazard())
809  return 0;
810 
811  const MachineRegisterInfo &MRI = MF.getRegInfo();
812  int WaitStatesNeeded = 0;
813 
814  for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands();
815  I != E; ++I) {
816  const MachineOperand &Op = IA->getOperand(I);
817  if (Op.isReg() && Op.isDef()) {
818  WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
819  }
820  }
821 
822  return WaitStatesNeeded;
823 }
824 
825 int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
826  const SIInstrInfo *TII = ST.getInstrInfo();
827  const SIRegisterInfo *TRI = ST.getRegisterInfo();
828  const MachineRegisterInfo &MRI = MF.getRegInfo();
829 
830  const MachineOperand *LaneSelectOp =
831  TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
832 
833  if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg()))
834  return 0;
835 
836  Register LaneSelectReg = LaneSelectOp->getReg();
837  auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isVALU(MI); };
838 
839  const int RWLaneWaitStates = 4;
840  int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn,
841  RWLaneWaitStates);
842  return RWLaneWaitStates - WaitStatesSince;
843 }
844 
845 int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
846  if (!ST.hasRFEHazards())
847  return 0;
848 
849  const SIInstrInfo *TII = ST.getInstrInfo();
850 
851  const int RFEWaitStates = 1;
852 
853  auto IsHazardFn = [TII](const MachineInstr &MI) {
854  return getHWReg(TII, MI) == AMDGPU::Hwreg::ID_TRAPSTS;
855  };
856  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates);
857  return RFEWaitStates - WaitStatesNeeded;
858 }
859 
860 int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
861  const SIInstrInfo *TII = ST.getInstrInfo();
862  const int SMovRelWaitStates = 1;
863  auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isSALU(MI); };
864  return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn,
865  SMovRelWaitStates);
866 }
867 
868 void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
869  fixVMEMtoScalarWriteHazards(MI);
870  fixVcmpxPermlaneHazards(MI);
871  fixSMEMtoVectorWriteHazards(MI);
872  fixVcmpxExecWARHazard(MI);
873  fixLdsBranchVmemWARHazard(MI);
874 }
875 
876 bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
877  if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI))
878  return false;
879 
880  const SIInstrInfo *TII = ST.getInstrInfo();
881  auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isVOPC(MI); };
882 
883  auto IsExpiredFn = [](const MachineInstr &MI, int) {
884  unsigned Opc = MI.getOpcode();
885  return SIInstrInfo::isVALU(MI) && Opc != AMDGPU::V_NOP_e32 &&
886  Opc != AMDGPU::V_NOP_e64 && Opc != AMDGPU::V_NOP_sdwa;
887  };
888 
889  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
891  return false;
892 
893  // V_NOP will be discarded by SQ.
894  // Use V_MOB_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE*
895  // which is always a VGPR and available.
896  auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
897  Register Reg = Src0->getReg();
898  bool IsUndef = Src0->isUndef();
899  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
900  TII->get(AMDGPU::V_MOV_B32_e32))
901  .addReg(Reg, RegState::Define | (IsUndef ? RegState::Dead : 0))
902  .addReg(Reg, IsUndef ? RegState::Undef : RegState::Kill);
903 
904  return true;
905 }
906 
907 bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
908  if (!ST.hasVMEMtoScalarWriteHazard())
909  return false;
910 
912  return false;
913 
914  if (MI->getNumDefs() == 0)
915  return false;
916 
917  const SIRegisterInfo *TRI = ST.getRegisterInfo();
918 
919  auto IsHazardFn = [TRI, MI](const MachineInstr &I) {
922  return false;
923 
924  for (const MachineOperand &Def : MI->defs()) {
925  const MachineOperand *Op =
926  I.findRegisterUseOperand(Def.getReg(), false, TRI);
927  if (!Op)
928  continue;
929  return true;
930  }
931  return false;
932  };
933 
934  auto IsExpiredFn = [](const MachineInstr &MI, int) {
935  return SIInstrInfo::isVALU(MI) ||
936  (MI.getOpcode() == AMDGPU::S_WAITCNT &&
937  !MI.getOperand(0).getImm()) ||
938  (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
939  MI.getOperand(0).getImm() == 0xffe3);
940  };
941 
942  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
944  return false;
945 
946  const SIInstrInfo *TII = ST.getInstrInfo();
947  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
948  TII->get(AMDGPU::S_WAITCNT_DEPCTR))
949  .addImm(0xffe3);
950  return true;
951 }
952 
953 bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
954  if (!ST.hasSMEMtoVectorWriteHazard())
955  return false;
956 
957  if (!SIInstrInfo::isVALU(*MI))
958  return false;
959 
960  unsigned SDSTName;
961  switch (MI->getOpcode()) {
962  case AMDGPU::V_READLANE_B32:
963  case AMDGPU::V_READFIRSTLANE_B32:
964  SDSTName = AMDGPU::OpName::vdst;
965  break;
966  default:
967  SDSTName = AMDGPU::OpName::sdst;
968  break;
969  }
970 
971  const SIInstrInfo *TII = ST.getInstrInfo();
972  const SIRegisterInfo *TRI = ST.getRegisterInfo();
973  const AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST.getCPU());
974  const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName);
975  if (!SDST) {
976  for (const auto &MO : MI->implicit_operands()) {
977  if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) {
978  SDST = &MO;
979  break;
980  }
981  }
982  }
983 
984  if (!SDST)
985  return false;
986 
987  const Register SDSTReg = SDST->getReg();
988  auto IsHazardFn = [SDSTReg, TRI](const MachineInstr &I) {
989  return SIInstrInfo::isSMRD(I) && I.readsRegister(SDSTReg, TRI);
990  };
991 
992  auto IsExpiredFn = [TII, IV](const MachineInstr &MI, int) {
993  if (TII->isSALU(MI)) {
994  switch (MI.getOpcode()) {
995  case AMDGPU::S_SETVSKIP:
996  case AMDGPU::S_VERSION:
997  case AMDGPU::S_WAITCNT_VSCNT:
998  case AMDGPU::S_WAITCNT_VMCNT:
999  case AMDGPU::S_WAITCNT_EXPCNT:
1000  // These instructions cannot not mitigate the hazard.
1001  return false;
1002  case AMDGPU::S_WAITCNT_LGKMCNT:
1003  // Reducing lgkmcnt count to 0 always mitigates the hazard.
1004  return (MI.getOperand(1).getImm() == 0) &&
1005  (MI.getOperand(0).getReg() == AMDGPU::SGPR_NULL);
1006  case AMDGPU::S_WAITCNT: {
1007  const int64_t Imm = MI.getOperand(0).getImm();
1008  AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm);
1009  return (Decoded.LgkmCnt == 0);
1010  }
1011  default:
1012  // SOPP instructions cannot mitigate the hazard.
1013  if (TII->isSOPP(MI))
1014  return false;
1015  // At this point the SALU can be assumed to mitigate the hazard
1016  // because either:
1017  // (a) it is independent of the at risk SMEM (breaking chain),
1018  // or
1019  // (b) it is dependent on the SMEM, in which case an appropriate
1020  // s_waitcnt lgkmcnt _must_ exist between it and the at risk
1021  // SMEM instruction.
1022  return true;
1023  }
1024  }
1025  return false;
1026  };
1027 
1028  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1030  return false;
1031 
1032  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1033  TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL)
1034  .addImm(0);
1035  return true;
1036 }
1037 
1038 bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
1040  return false;
1041 
1042  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1043  if (!MI->modifiesRegister(AMDGPU::EXEC, TRI))
1044  return false;
1045 
1046  auto IsHazardFn = [TRI](const MachineInstr &I) {
1047  if (SIInstrInfo::isVALU(I))
1048  return false;
1049  return I.readsRegister(AMDGPU::EXEC, TRI);
1050  };
1051 
1052  const SIInstrInfo *TII = ST.getInstrInfo();
1053  auto IsExpiredFn = [TII, TRI](const MachineInstr &MI, int) {
1054  if (SIInstrInfo::isVALU(MI)) {
1055  if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst))
1056  return true;
1057  for (auto MO : MI.implicit_operands())
1058  if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg())))
1059  return true;
1060  }
1061  if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
1062  (MI.getOperand(0).getImm() & 0xfffe) == 0xfffe)
1063  return true;
1064  return false;
1065  };
1066 
1067  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1069  return false;
1070 
1071  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1072  TII->get(AMDGPU::S_WAITCNT_DEPCTR))
1073  .addImm(0xfffe);
1074  return true;
1075 }
1076 
1078  const GCNSubtarget &ST) {
1079  if (!ST.hasLdsBranchVmemWARHazard())
1080  return false;
1081 
1082  // Check if the necessary condition for the hazard is met: both LDS and VMEM
1083  // instructions need to appear in the same function.
1084  bool HasLds = false;
1085  bool HasVmem = false;
1086  for (auto &MBB : MF) {
1087  for (auto &MI : MBB) {
1088  HasLds |= SIInstrInfo::isDS(MI);
1089  HasVmem |=
1091  if (HasLds && HasVmem)
1092  return true;
1093  }
1094  }
1095  return false;
1096 }
1097 
1098 bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
1099  if (!RunLdsBranchVmemWARHazardFixup)
1100  return false;
1101 
1103 
1104  auto IsHazardInst = [](const MachineInstr &MI) {
1105  if (SIInstrInfo::isDS(MI))
1106  return 1;
1108  return 2;
1109  return 0;
1110  };
1111 
1112  auto InstType = IsHazardInst(*MI);
1113  if (!InstType)
1114  return false;
1115 
1116  auto IsExpiredFn = [&IsHazardInst](const MachineInstr &I, int) {
1117  return IsHazardInst(I) || (I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1118  I.getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1119  !I.getOperand(1).getImm());
1120  };
1121 
1122  auto IsHazardFn = [InstType, &IsHazardInst](const MachineInstr &I) {
1123  if (!I.isBranch())
1124  return false;
1125 
1126  auto IsHazardFn = [InstType, IsHazardInst](const MachineInstr &I) {
1127  auto InstType2 = IsHazardInst(I);
1128  return InstType2 && InstType != InstType2;
1129  };
1130 
1131  auto IsExpiredFn = [InstType, &IsHazardInst](const MachineInstr &I, int) {
1132  auto InstType2 = IsHazardInst(I);
1133  if (InstType == InstType2)
1134  return true;
1135 
1136  return I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1137  I.getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1138  !I.getOperand(1).getImm();
1139  };
1140 
1143  };
1144 
1145  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1147  return false;
1148 
1149  const SIInstrInfo *TII = ST.getInstrInfo();
1150  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1151  TII->get(AMDGPU::S_WAITCNT_VSCNT))
1152  .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
1153  .addImm(0);
1154 
1155  return true;
1156 }
1157 
1158 int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
1159  int NSAtoVMEMWaitStates = 1;
1160 
1161  if (!ST.hasNSAtoVMEMBug())
1162  return 0;
1163 
1165  return 0;
1166 
1167  const SIInstrInfo *TII = ST.getInstrInfo();
1168  const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
1169  if (!Offset || (Offset->getImm() & 6) == 0)
1170  return 0;
1171 
1172  auto IsHazardFn = [TII](const MachineInstr &I) {
1173  if (!SIInstrInfo::isMIMG(I))
1174  return false;
1175  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I.getOpcode());
1176  return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA &&
1177  TII->getInstSizeInBytes(I) >= 16;
1178  };
1179 
1180  return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);
1181 }
1182 
1183 int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
1184  int FPAtomicToDenormModeWaitStates = 3;
1185 
1186  if (MI->getOpcode() != AMDGPU::S_DENORM_MODE)
1187  return 0;
1188 
1189  auto IsHazardFn = [](const MachineInstr &I) {
1191  return false;
1192  return SIInstrInfo::isFPAtomic(I);
1193  };
1194 
1195  auto IsExpiredFn = [](const MachineInstr &MI, int WaitStates) {
1196  if (WaitStates >= 3 || SIInstrInfo::isVALU(MI))
1197  return true;
1198 
1199  switch (MI.getOpcode()) {
1200  case AMDGPU::S_WAITCNT:
1201  case AMDGPU::S_WAITCNT_VSCNT:
1202  case AMDGPU::S_WAITCNT_VMCNT:
1203  case AMDGPU::S_WAITCNT_EXPCNT:
1204  case AMDGPU::S_WAITCNT_LGKMCNT:
1205  case AMDGPU::S_WAIT_IDLE:
1206  return true;
1207  default:
1208  break;
1209  }
1210 
1211  return false;
1212  };
1213 
1214  return FPAtomicToDenormModeWaitStates -
1215  ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn);
1216 }
1217 
1218 int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
1220 
1221  return ST.hasGFX90AInsts() ? checkMAIHazards90A(MI) : checkMAIHazards908(MI);
1222 }
1223 
1224 int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) {
1225  int WaitStatesNeeded = 0;
1226  unsigned Opc = MI->getOpcode();
1227 
1228  auto IsVALUFn = [](const MachineInstr &MI) {
1229  return SIInstrInfo::isVALU(MI);
1230  };
1231 
1232  if (Opc != AMDGPU::V_ACCVGPR_READ_B32_e64) { // MFMA or v_accvgpr_write
1233  const int LegacyVALUWritesVGPRWaitStates = 2;
1234  const int VALUWritesExecWaitStates = 4;
1235  const int MaxWaitStates = 4;
1236 
1237  int WaitStatesNeededForUse = VALUWritesExecWaitStates -
1238  getWaitStatesSinceDef(AMDGPU::EXEC, IsVALUFn, MaxWaitStates);
1239  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1240 
1241  if (WaitStatesNeeded < MaxWaitStates) {
1242  for (const MachineOperand &Use : MI->explicit_uses()) {
1243  const int MaxWaitStates = 2;
1244 
1245  if (!Use.isReg() || !TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
1246  continue;
1247 
1248  int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates -
1249  getWaitStatesSinceDef(Use.getReg(), IsVALUFn, MaxWaitStates);
1250  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1251 
1252  if (WaitStatesNeeded == MaxWaitStates)
1253  break;
1254  }
1255  }
1256  }
1257 
1258  auto IsMFMAFn = [](const MachineInstr &MI) {
1259  return SIInstrInfo::isMAI(MI) &&
1260  MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
1261  MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
1262  };
1263 
1264  for (const MachineOperand &Op : MI->explicit_operands()) {
1265  if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg()))
1266  continue;
1267 
1268  if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
1269  continue;
1270 
1271  const int MFMAWritesAGPROverlappedSrcABWaitStates = 4;
1272  const int MFMAWritesAGPROverlappedSrcCWaitStates = 2;
1273  const int MFMA4x4WritesAGPRAccVgprReadWaitStates = 4;
1274  const int MFMA16x16WritesAGPRAccVgprReadWaitStates = 10;
1275  const int MFMA32x32WritesAGPRAccVgprReadWaitStates = 18;
1276  const int MFMA4x4WritesAGPRAccVgprWriteWaitStates = 1;
1277  const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7;
1278  const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15;
1279  const int MaxWaitStates = 18;
1280  Register Reg = Op.getReg();
1281  unsigned HazardDefLatency = 0;
1282 
1283  auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency,
1284  this](const MachineInstr &MI) {
1285  if (!IsMFMAFn(MI))
1286  return false;
1287  Register DstReg = MI.getOperand(0).getReg();
1288  if (DstReg == Reg)
1289  return false;
1290  HazardDefLatency =
1291  std::max(HazardDefLatency, TSchedModel.computeInstrLatency(&MI));
1292  return TRI.regsOverlap(DstReg, Reg);
1293  };
1294 
1295  int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn,
1296  MaxWaitStates);
1297  int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates;
1298  int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
1299  int OpNo = MI->getOperandNo(&Op);
1300  if (OpNo == SrcCIdx) {
1301  NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates;
1302  } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64) {
1303  switch (HazardDefLatency) {
1304  case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates;
1305  break;
1306  case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates;
1307  break;
1308  case 16: LLVM_FALLTHROUGH;
1309  default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates;
1310  break;
1311  }
1312  } else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
1313  switch (HazardDefLatency) {
1314  case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates;
1315  break;
1316  case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprWriteWaitStates;
1317  break;
1318  case 16: LLVM_FALLTHROUGH;
1319  default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprWriteWaitStates;
1320  break;
1321  }
1322  }
1323 
1324  int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
1325  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1326 
1327  if (WaitStatesNeeded == MaxWaitStates)
1328  return WaitStatesNeeded; // Early exit.
1329 
1330  auto IsAccVgprWriteFn = [Reg, this](const MachineInstr &MI) {
1331  if (MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
1332  return false;
1333  Register DstReg = MI.getOperand(0).getReg();
1334  return TRI.regsOverlap(Reg, DstReg);
1335  };
1336 
1337  const int AccVGPRWriteMFMAReadSrcCWaitStates = 1;
1338  const int AccVGPRWriteMFMAReadSrcABWaitStates = 3;
1339  const int AccVGPRWriteAccVgprReadWaitStates = 3;
1340  NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates;
1341  if (OpNo == SrcCIdx)
1342  NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates;
1343  else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64)
1344  NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates;
1345 
1346  WaitStatesNeededForUse = NeedWaitStates -
1347  getWaitStatesSinceDef(Reg, IsAccVgprWriteFn, MaxWaitStates);
1348  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1349 
1350  if (WaitStatesNeeded == MaxWaitStates)
1351  return WaitStatesNeeded; // Early exit.
1352  }
1353 
1354  if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
1355  const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0;
1356  const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5;
1357  const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13;
1358  const int MaxWaitStates = 13;
1359  Register DstReg = MI->getOperand(0).getReg();
1360  unsigned HazardDefLatency = 0;
1361 
1362  auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency,
1363  this](const MachineInstr &MI) {
1364  if (!IsMFMAFn(MI))
1365  return false;
1366  Register Reg = TII.getNamedOperand(MI, AMDGPU::OpName::src2)->getReg();
1367  HazardDefLatency =
1368  std::max(HazardDefLatency, TSchedModel.computeInstrLatency(&MI));
1369  return TRI.regsOverlap(Reg, DstReg);
1370  };
1371 
1372  int WaitStatesSince = getWaitStatesSince(IsSrcCMFMAFn, MaxWaitStates);
1373  int NeedWaitStates;
1374  switch (HazardDefLatency) {
1375  case 2: NeedWaitStates = MFMA4x4ReadSrcCAccVgprWriteWaitStates;
1376  break;
1377  case 8: NeedWaitStates = MFMA16x16ReadSrcCAccVgprWriteWaitStates;
1378  break;
1379  case 16: LLVM_FALLTHROUGH;
1380  default: NeedWaitStates = MFMA32x32ReadSrcCAccVgprWriteWaitStates;
1381  break;
1382  }
1383 
1384  int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince;
1385  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1386  }
1387 
1388  return WaitStatesNeeded;
1389 }
1390 
1391 int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
1392  int WaitStatesNeeded = 0;
1393  unsigned Opc = MI->getOpcode();
1394 
1395  auto IsMFMAFn = [](const MachineInstr &MI) {
1396  return SIInstrInfo::isMAI(MI) &&
1397  MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
1398  MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
1399  };
1400 
1401  auto IsLegacyVALUFn = [&IsMFMAFn](const MachineInstr &MI) {
1402  return SIInstrInfo::isVALU(MI) && !IsMFMAFn(MI);
1403  };
1404 
1405  auto IsLegacyVALUNotDotFn = [&IsMFMAFn](const MachineInstr &MI) {
1406  return SIInstrInfo::isVALU(MI) && !IsMFMAFn(MI) && !SIInstrInfo::isDOT(MI);
1407  };
1408 
1409  if (!IsMFMAFn(*MI))
1410  return WaitStatesNeeded;
1411 
1412  const int VALUWritesExecWaitStates = 4;
1413  int WaitStatesNeededForUse = VALUWritesExecWaitStates -
1414  getWaitStatesSinceDef(AMDGPU::EXEC, IsLegacyVALUFn,
1415  VALUWritesExecWaitStates);
1416  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1417 
1418  int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
1419 
1420  // Loop for both DGEMM and S/HGEMM 2nd instruction.
1421  for (const MachineOperand &Use : MI->explicit_uses()) {
1422  const int LegacyVALUNotDotWritesVGPRWaitStates = 2;
1423  const int SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates = 2;
1424  const int SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates = 8;
1425  const int SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates = 16;
1426  const int SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates = 3;
1427  const int SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates = 9;
1428  const int SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates = 17;
1429  const int DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 9;
1430  const int DMFMA4x4WritesVGPROverlappedSrcCWaitStates = 4;
1431  const int SMFMA4x4WritesVGPROverlappedSrcABWaitStates = 5;
1432  const int SMFMA16x16WritesVGPROverlappedSrcABWaitStates = 11;
1433  const int SMFMA32x32WritesVGPROverlappedSrcABWaitStates = 19;
1434  const int DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates = 6;
1435  const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11;
1436  const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4;
1437  const int MaxWaitStates = 19;
1438 
1439  if (!Use.isReg())
1440  continue;
1441  Register Reg = Use.getReg();
1442  bool FullReg;
1443  const MachineInstr *MI1;
1444 
1445  auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &FullReg, &MI1,
1446  this](const MachineInstr &MI) {
1447  if (!IsMFMAFn(MI))
1448  return false;
1449  Register DstReg = MI.getOperand(0).getReg();
1450  FullReg = (DstReg == Reg);
1451  MI1 = &MI;
1452  return TRI.regsOverlap(DstReg, Reg);
1453  };
1454 
1455  WaitStatesNeededForUse = LegacyVALUNotDotWritesVGPRWaitStates -
1456  getWaitStatesSinceDef(Reg, IsLegacyVALUNotDotFn, MaxWaitStates);
1457  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1458 
1459  int NumWaitStates =
1460  getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn, MaxWaitStates);
1461  if (NumWaitStates == std::numeric_limits<int>::max())
1462  continue;
1463 
1464  int OpNo = MI->getOperandNo(&Use);
1465  unsigned Opc1 = MI1->getOpcode();
1466  int NeedWaitStates = 0;
1467  if (OpNo == SrcCIdx) {
1468  if (!isDGEMM(Opc) && isDGEMM(Opc1)) {
1469  NeedWaitStates = 0;
1470  } else if (FullReg) {
1471  if ((Opc == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
1472  Opc == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64) &&
1473  (Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
1474  Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64))
1475  NeedWaitStates = DMFMA4x4WritesVGPRFullSrcCWaitStates;
1476  } else {
1477  switch (Opc1) {
1478  case AMDGPU::V_MFMA_F64_16X16X4F64_e64:
1479  case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64:
1480  if (!isXDL(ST, *MI))
1481  NeedWaitStates = DMFMA16x16WritesVGPROverlappedSrcCWaitStates;
1482  break;
1483  case AMDGPU::V_MFMA_F64_4X4X4F64_e64:
1484  case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64:
1485  if (!isXDL(ST, *MI))
1486  NeedWaitStates = DMFMA4x4WritesVGPROverlappedSrcCWaitStates;
1487  break;
1488  default:
1489  switch (TSchedModel.computeInstrLatency(MI1)) {
1490  case 2:
1491  NeedWaitStates = isDGEMM(Opc)
1492  ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates
1493  : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates;
1494  break;
1495  case 8:
1496  NeedWaitStates = isDGEMM(Opc)
1497  ? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates
1498  : SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates;
1499  break;
1500  case 16: LLVM_FALLTHROUGH;
1501  default:
1502  NeedWaitStates = isDGEMM(Opc)
1503  ? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates
1504  : SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates;
1505  }
1506  }
1507  }
1508  } else {
1509  switch (Opc1) {
1510  case AMDGPU::V_MFMA_F64_16X16X4F64_e64:
1511  case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64:
1512  NeedWaitStates = DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates;
1513  break;
1514  case AMDGPU::V_MFMA_F64_4X4X4F64_e64:
1515  case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64:
1516  NeedWaitStates = DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates;
1517  break;
1518  default:
1519  switch (TSchedModel.computeInstrLatency(MI1)) {
1520  case 2:
1521  NeedWaitStates = SMFMA4x4WritesVGPROverlappedSrcABWaitStates;
1522  break;
1523  case 8:
1524  NeedWaitStates = SMFMA16x16WritesVGPROverlappedSrcABWaitStates;
1525  break;
1526  case 16: LLVM_FALLTHROUGH;
1527  default:
1528  NeedWaitStates = SMFMA32x32WritesVGPROverlappedSrcABWaitStates;
1529  }
1530  }
1531  }
1532  if (WaitStatesNeeded >= NeedWaitStates)
1533  continue;
1534 
1535  WaitStatesNeededForUse = NeedWaitStates - NumWaitStates;
1536  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1537 
1538  if (WaitStatesNeeded == MaxWaitStates)
1539  break;
1540  }
1541 
1542  return WaitStatesNeeded;
1543 }
1544 
1545 int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
1546  // On gfx90a+ relevant hazards are checked in checkMAIVALUHazards()
1547  if (!ST.hasMAIInsts() || ST.hasGFX90AInsts())
1548  return 0;
1549 
1550  int WaitStatesNeeded = 0;
1551 
1552  auto IsAccVgprReadFn = [](const MachineInstr &MI) {
1553  return MI.getOpcode() == AMDGPU::V_ACCVGPR_READ_B32_e64;
1554  };
1555 
1556  for (const MachineOperand &Op : MI->explicit_uses()) {
1557  if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg()))
1558  continue;
1559 
1560  Register Reg = Op.getReg();
1561 
1562  const int AccVgprReadLdStWaitStates = 2;
1563  const int VALUWriteAccVgprRdWrLdStDepVALUWaitStates = 1;
1564  const int MaxWaitStates = 2;
1565 
1566  int WaitStatesNeededForUse = AccVgprReadLdStWaitStates -
1567  getWaitStatesSinceDef(Reg, IsAccVgprReadFn, MaxWaitStates);
1568  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1569 
1570  if (WaitStatesNeeded == MaxWaitStates)
1571  return WaitStatesNeeded; // Early exit.
1572 
1573  auto IsVALUAccVgprRdWrCheckFn = [Reg, this](const MachineInstr &MI) {
1574  if (MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64 &&
1575  MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
1576  return false;
1577  auto IsVALUFn = [](const MachineInstr &MI) {
1579  };
1580  return getWaitStatesSinceDef(Reg, IsVALUFn, 2 /*MaxWaitStates*/) <
1582  };
1583 
1584  WaitStatesNeededForUse = VALUWriteAccVgprRdWrLdStDepVALUWaitStates -
1585  getWaitStatesSince(IsVALUAccVgprRdWrCheckFn, MaxWaitStates);
1586  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1587  }
1588 
1589  return WaitStatesNeeded;
1590 }
1591 
1592 int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
1593  if (!ST.hasGFX90AInsts())
1594  return 0;
1595 
1596  auto IsMFMAFn = [](const MachineInstr &MI) -> bool {
1597  return SIInstrInfo::isMAI(MI) &&
1598  MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
1599  MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
1600  };
1601 
1602  auto IsDGEMMFn = [](const MachineInstr &MI) -> bool {
1603  return isDGEMM(MI.getOpcode());
1604  };
1605 
1606  // This is checked in checkMAIHazards90A()
1607  if (IsMFMAFn(*MI))
1608  return 0;
1609 
1610  int WaitStatesNeeded = 0;
1611 
1612  bool IsMemOrExport = SIInstrInfo::isVMEM(*MI) ||
1613  SIInstrInfo::isFLAT(*MI) ||
1614  SIInstrInfo::isDS(*MI) ||
1616  bool IsVALU = SIInstrInfo::isVALU(*MI);
1617 
1618  const MachineInstr *MFMA = nullptr;
1619  unsigned Reg;
1620  auto IsMFMAWriteFn = [&Reg, &IsMFMAFn, &MFMA, this](const MachineInstr &MI) {
1621  if (!IsMFMAFn(MI) || !TRI.regsOverlap(MI.getOperand(0).getReg(), Reg))
1622  return false;
1623  MFMA = &MI;
1624  return true;
1625  };
1626 
1627  const MachineInstr *DOT = nullptr;
1628  auto IsDotWriteFn = [&Reg, &DOT, this](const MachineInstr &MI) {
1629  if (!SIInstrInfo::isDOT(MI) ||
1630  !TRI.regsOverlap(MI.getOperand(0).getReg(), Reg))
1631  return false;
1632  DOT = &MI;
1633  return true;
1634  };
1635 
1636  int SrcCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1637  AMDGPU::OpName::src2);
1638 
1639  if (IsMemOrExport || IsVALU) {
1640  const int SMFMA4x4WriteVgprVALUMemExpReadWaitStates = 5;
1641  const int SMFMA16x16WriteVgprVALUMemExpReadWaitStates = 11;
1642  const int SMFMA32x32WriteVgprVALUMemExpReadWaitStates = 19;
1643  const int DMFMA4x4WriteVgprMemExpReadWaitStates = 9;
1644  const int DMFMA16x16WriteVgprMemExpReadWaitStates = 18;
1645  const int DMFMA4x4WriteVgprVALUReadWaitStates = 6;
1646  const int DMFMA16x16WriteVgprVALUReadWaitStates = 11;
1647  const int DotWriteSameDotReadSrcAB = 3;
1648  const int DotWriteDifferentVALURead = 3;
1649  const int MaxWaitStates = 19;
1650 
1651  for (const MachineOperand &Use : MI->explicit_uses()) {
1652  if (!Use.isReg())
1653  continue;
1654  Reg = Use.getReg();
1655 
1656  DOT = nullptr;
1657  int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDotWriteFn,
1658  MaxWaitStates);
1659  if (DOT) {
1660  int NeedWaitStates = 0;
1661  if (DOT->getOpcode() == MI->getOpcode()) {
1662  if (&Use - &MI->getOperand(0) != SrcCIdx)
1663  NeedWaitStates = DotWriteSameDotReadSrcAB;
1664  } else {
1665  NeedWaitStates = DotWriteDifferentVALURead;
1666  }
1667 
1668  int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
1669  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1670  }
1671 
1672  MFMA = nullptr;
1673  WaitStatesSinceDef =
1674  getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates);
1675  if (!MFMA)
1676  continue;
1677 
1678  unsigned HazardDefLatency = TSchedModel.computeInstrLatency(MFMA);
1679  int NeedWaitStates = MaxWaitStates;
1680  switch (HazardDefLatency) {
1681  case 2:
1682  NeedWaitStates = SMFMA4x4WriteVgprVALUMemExpReadWaitStates;
1683  break;
1684  case 4:
1685  assert(isDGEMM(MFMA->getOpcode()));
1686  NeedWaitStates =
1687  IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates
1688  : DMFMA4x4WriteVgprVALUReadWaitStates;
1689  break;
1690  case 8:
1691  NeedWaitStates = SMFMA16x16WriteVgprVALUMemExpReadWaitStates;
1692  break;
1693  case 16: LLVM_FALLTHROUGH;
1694  default:
1695  NeedWaitStates =
1696  isDGEMM(MFMA->getOpcode())
1697  ? IsMemOrExport ? DMFMA16x16WriteVgprMemExpReadWaitStates
1698  : DMFMA16x16WriteVgprVALUReadWaitStates
1699  : SMFMA32x32WriteVgprVALUMemExpReadWaitStates;
1700  break;
1701  }
1702 
1703  int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
1704  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1705 
1706  if (WaitStatesNeeded == MaxWaitStates)
1707  break;
1708  }
1709  }
1710 
1711  unsigned Opc = MI->getOpcode();
1712  const int DMFMAToFMA64WaitStates = 2;
1713  if ((Opc == AMDGPU::V_FMA_F64_e64 ||
1714  Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64 ||
1715  Opc == AMDGPU::V_FMAC_F64_dpp) &&
1716  WaitStatesNeeded < DMFMAToFMA64WaitStates) {
1717  int WaitStatesNeededForUse = DMFMAToFMA64WaitStates -
1718  getWaitStatesSince(IsDGEMMFn, DMFMAToFMA64WaitStates);
1719  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1720  }
1721 
1722  if (!IsVALU && !IsMemOrExport)
1723  return WaitStatesNeeded;
1724 
1725  for (const MachineOperand &Def : MI->defs()) {
1726  const int SMFMA4x4WriteVgprVALUWawWaitStates = 5;
1727  const int SMFMA16x16WriteVgprVALUWawWaitStates = 11;
1728  const int SMFMA32x32WriteVgprVALUWawWaitStates = 19;
1729  const int SMFMA4x4ReadVgprVALUWarWaitStates = 1;
1730  const int SMFMA16x16ReadVgprVALUWarWaitStates = 7;
1731  const int SMFMA32x32ReadVgprVALUWarWaitStates = 15;
1732  const int DMFMA4x4WriteVgprVALUWriteWaitStates = 6;
1733  const int DMFMA16x16WriteVgprVALUWriteWaitStates = 11;
1734  const int DotWriteDifferentVALUWrite = 3;
1735  const int MaxWaitStates = 19;
1736  const int MaxWarWaitStates = 15;
1737 
1738  Reg = Def.getReg();
1739 
1740  DOT = nullptr;
1741  int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDotWriteFn,
1742  MaxWaitStates);
1743  if (DOT && DOT->getOpcode() != MI->getOpcode())
1744  WaitStatesNeeded = std::max(WaitStatesNeeded, DotWriteDifferentVALUWrite -
1745  WaitStatesSinceDef);
1746 
1747  MFMA = nullptr;
1748  WaitStatesSinceDef =
1749  getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates);
1750  if (MFMA) {
1751  int NeedWaitStates = MaxWaitStates;
1752  switch (TSchedModel.computeInstrLatency(MFMA)) {
1753  case 2:
1754  NeedWaitStates = SMFMA4x4WriteVgprVALUWawWaitStates;
1755  break;
1756  case 4:
1757  assert(isDGEMM(MFMA->getOpcode()));
1758  NeedWaitStates = DMFMA4x4WriteVgprVALUWriteWaitStates;
1759  break;
1760  case 8:
1761  NeedWaitStates = SMFMA16x16WriteVgprVALUWawWaitStates;
1762  break;
1763  case 16: LLVM_FALLTHROUGH;
1764  default:
1765  NeedWaitStates = isDGEMM(MFMA->getOpcode())
1766  ? DMFMA16x16WriteVgprVALUWriteWaitStates
1767  : SMFMA32x32WriteVgprVALUWawWaitStates;
1768  break;
1769  }
1770 
1771  int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
1772  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1773 
1774  if (WaitStatesNeeded == MaxWaitStates)
1775  break;
1776  }
1777 
1778  auto IsSMFMAReadAsCFn = [&Reg, &IsMFMAFn, &MFMA,
1779  this](const MachineInstr &MI) {
1780  if (!IsMFMAFn(MI) || isDGEMM(MI.getOpcode()) ||
1781  !MI.readsRegister(Reg, &TRI))
1782  return false;
1783 
1784  const MachineOperand *SrcC =
1785  TII.getNamedOperand(MI, AMDGPU::OpName::src2);
1786  assert(SrcC);
1787  if (!SrcC->isReg() || !TRI.regsOverlap(SrcC->getReg(), Reg))
1788  return false;
1789 
1790  MFMA = &MI;
1791  return true;
1792  };
1793 
1794  MFMA = nullptr;
1795  int WaitStatesSinceUse = getWaitStatesSince(IsSMFMAReadAsCFn,
1796  MaxWarWaitStates);
1797  if (!MFMA)
1798  continue;
1799 
1800  unsigned HazardDefLatency = TSchedModel.computeInstrLatency(MFMA);
1801  int NeedWaitStates = MaxWaitStates;
1802  switch (HazardDefLatency) {
1803  case 2: NeedWaitStates = SMFMA4x4ReadVgprVALUWarWaitStates;
1804  break;
1805  case 8: NeedWaitStates = SMFMA16x16ReadVgprVALUWarWaitStates;
1806  break;
1807  case 16: LLVM_FALLTHROUGH;
1808  default: NeedWaitStates = SMFMA32x32ReadVgprVALUWarWaitStates;
1809  break;
1810  }
1811 
1812  int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceUse;
1813  WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1814  }
1815 
1816  return WaitStatesNeeded;
1817 }
1818 
1820  if (!SU->isInstr())
1821  return false;
1822 
1823  const MachineInstr *MAI = nullptr;
1824  auto IsMFMAFn = [&MAI](const MachineInstr &MI) {
1825  MAI = nullptr;
1826  if (SIInstrInfo::isMAI(MI) &&
1827  MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
1828  MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64)
1829  MAI = &MI;
1830  return MAI != nullptr;
1831  };
1832 
1833  MachineInstr *MI = SU->getInstr();
1834  if (IsMFMAFn(*MI)) {
1835  int W = getWaitStatesSince(IsMFMAFn, 16);
1836  if (MAI)
1837  return W < (int)TSchedModel.computeInstrLatency(MAI);
1838  }
1839 
1840  return false;
1841 }
i
i
Definition: README.txt:29
ScheduleDAG.h
llvm::MachineInstr::uses
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:666
llvm::ScheduleHazardRecognizer::getMaxLookAhead
unsigned getMaxLookAhead() const
Definition: ScheduleHazardRecognizer.h:43
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:22
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::MachineRegisterInfo::isPhysRegUsed
bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
Definition: MachineRegisterInfo.cpp:583
llvm::GCNHazardRecognizer::getHazardType
HazardType getHazardType(SUnit *SU, int Stalls) override
getHazardType - Return the hazard type of emitting this node.
Definition: GCNHazardRecognizer.cpp:152
llvm::RegState::Dead
@ Dead
Unused definition.
Definition: MachineInstrBuilder.h:50
llvm::AMDGPU::getIsaVersion
IsaVersion getIsaVersion(StringRef GPU)
Definition: TargetParser.cpp:187
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::SIInstrFlags::VALU
@ VALU
Definition: SIDefines.h:30
llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:343
llvm::BitVector::none
bool none() const
none - Returns true if none of the bits are set.
Definition: BitVector.h:180
llvm::ScheduleHazardRecognizer::MaxLookAhead
unsigned MaxLookAhead
MaxLookAhead - Indicate the number of cycles in the scoreboard state.
Definition: ScheduleHazardRecognizer.h:31
llvm::SIInstrInfo::isBufferSMRD
bool isBufferSMRD(const MachineInstr &MI) const
Definition: SIInstrInfo.cpp:7574
llvm::SIInstrInfo::getNumWaitStates
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
Definition: SIInstrInfo.cpp:1719
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:209
llvm::GCNSubtarget::hasVMEMReadSGPRVALUDefHazard
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition: GCNSubtarget.h:426
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::GCNHazardRecognizer::GCNHazardRecognizer
GCNHazardRecognizer(const MachineFunction &MF)
Definition: GCNHazardRecognizer.cpp:29
llvm::SIInstrInfo::isEXP
static bool isEXP(const MachineInstr &MI)
Definition: SIInstrInfo.h:548
isRWLane
static bool isRWLane(unsigned Opcode)
Definition: GCNHazardRecognizer.cpp:74
llvm::GCNSubtarget::getSetRegWaitStates
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition: GCNSubtarget.h:435
llvm::AMDGPU::Hwreg::ID_MASK_
@ ID_MASK_
Definition: SIDefines.h:390
llvm::ScheduleHazardRecognizer::Hazard
@ Hazard
Definition: ScheduleHazardRecognizer.h:39
isXDL
static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI)
Definition: GCNHazardRecognizer.cpp:101
addRegUnits
static void addRegUnits(const SIRegisterInfo &TRI, BitVector &BV, MCRegister Reg)
Definition: GCNHazardRecognizer.cpp:489
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:80
isSGetReg
static bool isSGetReg(unsigned Opcode)
Definition: GCNHazardRecognizer.cpp:59
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
TargetParser.h
llvm::AMDGPU::getNamedOperandIdx
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:205
llvm::InlineAsm::MIOp_FirstOperand
@ MIOp_FirstOperand
Definition: InlineAsm.h:225
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1564
getWaitStatesSince
static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, const MachineBasicBlock *MBB, MachineBasicBlock::const_reverse_instr_iterator I, int WaitStates, IsExpiredFn IsExpired, DenseSet< const MachineBasicBlock * > &Visited)
Definition: GCNHazardRecognizer.cpp:395
llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:218
llvm::SIInstrFlags::SMRD
@ SMRD
Definition: SIDefines.h:56
llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:206
llvm::TargetSchedModel::init
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
Definition: TargetSchedule.cpp:63
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:185
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:651
llvm::SIInstrInfo::isMIMG
static bool isMIMG(const MachineInstr &MI)
Definition: SIInstrInfo.h:495
GCNSubtarget.h
getHWReg
static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr)
Definition: GCNHazardRecognizer.cpp:145
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:537
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:499
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::SIInstrInfo::isMAI
static bool isMAI(const MachineInstr &MI)
Definition: SIInstrInfo.h:646
shouldRunLdsBranchVmemWARHazardFixup
static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF, const GCNSubtarget &ST)
Definition: GCNHazardRecognizer.cpp:1077
llvm::GCNSubtarget::hasSMRDReadVALUDefHazard
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition: GCNSubtarget.h:420
isRFE
static bool isRFE(unsigned Opcode)
Definition: GCNHazardRecognizer.cpp:78
false
Definition: StackSlotColoring.cpp:142
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:127
llvm::GraphProgram::DOT
@ DOT
Definition: GraphWriter.h:51
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:195
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:370
llvm::TargetRegisterInfo::regsOverlap
bool regsOverlap(Register regA, Register regB) const
Returns true if the two registers are equal or alias each other.
Definition: TargetRegisterInfo.h:418
llvm::AMDGPU::decodeWaitcnt
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
Definition: AMDGPUBaseInfo.cpp:947
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::BitVector
Definition: BitVector.h:74
llvm::MCOperandInfo::RegClass
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:89
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::GCNSubtarget::hasVcmpxExecWARHazard
bool hasVcmpxExecWARHazard() const
Definition: GCNSubtarget.h:938
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
isSendMsgTraceDataOrGDS
static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII, const MachineInstr &MI)
Definition: GCNHazardRecognizer.cpp:113
isPermlane
static bool isPermlane(const MachineInstr &MI)
Definition: GCNHazardRecognizer.cpp:139
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:267
llvm::AMDGPU::getRegBitWidth
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
Definition: AMDGPUBaseInfo.cpp:1621
llvm::SIInstrInfo::isDPP
static bool isDPP(const MachineInstr &MI)
Definition: SIInstrInfo.h:614
llvm::ScheduleHazardRecognizer::HazardType
HazardType
Definition: ScheduleHazardRecognizer.h:37
AMDGPUMCTargetDesc.h
llvm::GCNHazardRecognizer::PreEmitNoopsCommon
unsigned PreEmitNoopsCommon(MachineInstr *)
Definition: GCNHazardRecognizer.cpp:273
llvm::MachineBasicBlock::instr_rend
reverse_instr_iterator instr_rend()
Definition: MachineBasicBlock.h:262
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:321
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::GCNSubtarget::hasVMEMtoScalarWriteHazard
bool hasVMEMtoScalarWriteHazard() const
Definition: GCNSubtarget.h:922
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::SIInstrInfo::isDS
static bool isDS(const MachineInstr &MI)
Definition: SIInstrInfo.h:485
llvm::SIInstrInfo::isSMRD
static bool isSMRD(const MachineInstr &MI)
Definition: SIInstrInfo.h:475
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
addRegsToSet
static void addRegsToSet(const SIRegisterInfo &TRI, iterator_range< MachineInstr::const_mop_iterator > Ops, BitVector &Set)
Definition: GCNHazardRecognizer.cpp:495
llvm::GCNHazardRecognizer::PreEmitNoops
unsigned PreEmitNoops(MachineInstr *) override
This overload will be used when the hazard recognizer is being used by a non-scheduling pass,...
Definition: GCNHazardRecognizer.cpp:264
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SIInstrInfo::isFPAtomic
static bool isFPAtomic(const MachineInstr &MI)
Definition: SIInstrInfo.h:728
llvm::GCNSubtarget::hasReadM0MovRelInterpHazard
bool hasReadM0MovRelInterpHazard() const
Definition: GCNSubtarget.h:909
llvm::MCInstrDesc::OpInfo
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:206
llvm::GCNSubtarget::hasRFEHazards
bool hasRFEHazards() const
Definition: GCNSubtarget.h:430
llvm::GCNSubtarget::hasGFX90AInsts
bool hasGFX90AInsts() const
Definition: GCNSubtarget.h:954
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::AMDGPU::getMIMGInfo
const LLVM_READONLY MIMGInfo * getMIMGInfo(unsigned Opc)
llvm::GCNSubtarget::hasNoDataDepHazard
bool hasNoDataDepHazard() const
Definition: GCNSubtarget.h:729
llvm::GCNHazardRecognizer::EmitInstruction
void EmitInstruction(SUnit *SU) override
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
Definition: GCNHazardRecognizer.cpp:47
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition: MachineBasicBlock.h:353
IsExpiredFn
function_ref< bool(const MachineInstr &, int WaitStates)> IsExpiredFn
Definition: GCNHazardRecognizer.cpp:390
llvm::GCNHazardRecognizer::EmitNoop
void EmitNoop() override
EmitNoop - This callback is invoked when a noop was added to the instruction stream.
Definition: GCNHazardRecognizer.cpp:340
breaksVMEMSoftClause
static bool breaksVMEMSoftClause(MachineInstr *MI)
Definition: GCNHazardRecognizer.cpp:514
llvm::GCNSubtarget::hasLdsBranchVmemWARHazard
bool hasLdsBranchVmemWARHazard() const
Definition: GCNSubtarget.h:942
llvm::SIInstrInfo::isSALU
static bool isSALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:347
llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition: MachineBasicBlock.h:258
llvm::MachineFunction
Definition: MachineFunction.h:241
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
insertNoopsInBundle
static void insertNoopsInBundle(MachineInstr *MI, const SIInstrInfo &TII, unsigned Quantity)
Definition: GCNHazardRecognizer.cpp:228
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
llvm::RegState::Define
@ Define
Register definition.
Definition: MachineInstrBuilder.h:44
isSSetReg
static bool isSSetReg(unsigned Opcode)
Definition: GCNHazardRecognizer.cpp:63
llvm::SIInstrInfo::isMUBUF
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:459
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
llvm::SIInstrInfo::isDOT
static bool isDOT(const MachineInstr &MI)
Definition: SIInstrInfo.h:654
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:287
llvm::GCNHazardRecognizer::Reset
void Reset() override
Reset - This callback is invoked when a new block of instructions is about to be schedule.
Definition: GCNHazardRecognizer.cpp:43
llvm::SIInstrFlags::DPP
@ DPP
Definition: SIDefines.h:50
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
isDGEMM
static bool isDGEMM(unsigned Opcode)
Definition: GCNHazardRecognizer.cpp:94
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ScheduleHazardRecognizer::NoopHazard
@ NoopHazard
Definition: ScheduleHazardRecognizer.h:40
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::GCNSubtarget::hasNSAtoVMEMBug
bool hasNSAtoVMEMBug() const
Definition: GCNSubtarget.h:946
llvm::GCNHazardRecognizer::RecedeCycle
void RecedeCycle() override
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
Definition: GCNHazardRecognizer.cpp:382
llvm::ScheduleHazardRecognizer::NoHazard
@ NoHazard
Definition: ScheduleHazardRecognizer.h:38
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:325
llvm::SIInstrInfo::isSegmentSpecificFLAT
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:517
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::MachineInstr::mayStore
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:1018
llvm::SIInstrInfo::isVMEM
static bool isVMEM(const MachineInstr &MI)
Definition: SIInstrInfo.h:363
llvm::SIInstrInfo::isMTBUF
static bool isMTBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:467
llvm::MachineInstr::isBundle
bool isBundle() const
Definition: MachineInstr.h:1281
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
GCNHazardRecognizer.h
llvm::GCNHazardRecognizer::AdvanceCycle
void AdvanceCycle() override
AdvanceCycle - This callback is invoked whenever the next top-down instruction to be scheduled cannot...
Definition: GCNHazardRecognizer.cpp:344
llvm::MCRegUnitIterator
Definition: MCRegisterInfo.h:677
llvm::GCNSubtarget::isXNACKEnabled
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:525
isDivFMas
static bool isDivFMas(unsigned Opcode)
Definition: GCNHazardRecognizer.cpp:55
llvm::SUnit::isInstr
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
Definition: ScheduleDAG.h:362
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::SIInstrInfo::isVALU
static bool isVALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:355
llvm::MCRegisterInfo::DiffListIterator::isValid
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
Definition: MCRegisterInfo.h:224
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:492
llvm::GCNHazardRecognizer::ShouldPreferAnother
bool ShouldPreferAnother(SUnit *SU) override
ShouldPreferAnother - This callback may be invoked if getHazardType returns NoHazard.
Definition: GCNHazardRecognizer.cpp:1819
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::iterator_range
A range adaptor for a pair of iterators.
Definition: iterator_range.h:30
llvm::GCNHazardRecognizer::IsHazardFn
function_ref< bool(const MachineInstr &)> IsHazardFn
Definition: GCNHazardRecognizer.h:34
llvm::GCNSubtarget::hasReadM0SendMsgHazard
bool hasReadM0SendMsgHazard() const
Definition: GCNSubtarget.h:913
llvm::BitVector::anyCommon
bool anyCommon(const BitVector &RHS) const
Test if any common bits are set.
Definition: BitVector.h:481
llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
llvm::GCNSubtarget::hasSMEMtoVectorWriteHazard
bool hasSMEMtoVectorWriteHazard() const
Definition: GCNSubtarget.h:926
llvm::AMDGPU::Hwreg::ID_TRAPSTS
@ ID_TRAPSTS
Definition: SIDefines.h:367
llvm::SIInstrInfo::isFLAT
static bool isFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:511
breaksSMEMSoftClause
static bool breaksSMEMSoftClause(MachineInstr *MI)
Definition: GCNHazardRecognizer.cpp:510
llvm::GCNSubtarget::has12DWordStoreHazard
bool has12DWordStoreHazard() const
Definition: GCNSubtarget.h:900
MachineFunction.h
llvm::GCNSubtarget::hasVcmpxPermlaneHazard
bool hasVcmpxPermlaneHazard() const
Definition: GCNSubtarget.h:918
isSMovRel
static bool isSMovRel(unsigned Opcode)
Definition: GCNHazardRecognizer.cpp:82
llvm::SIInstrInfo::isVINTRP
static bool isVINTRP(const MachineInstr &MI)
Definition: SIInstrInfo.h:638
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::GCNSubtarget::hasMAIInsts
bool hasMAIInsts() const
Definition: GCNSubtarget.h:693
llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24