25struct MFMAPaddingRatioParser :
public cl::parser<unsigned> {
30 return O.error(
"'" + Arg +
"' value invalid for uint argument!");
33 return O.error(
"'" + Arg +
"' value must be in the range [0, 100]!");
43 cl::desc(
"Fill a percentage of the latency between "
44 "neighboring MFMA with s_nops."));
54 IsHazardRecognizerMode(
false),
55 CurrCycleInstr(nullptr),
58 TII(*ST.getInstrInfo()),
59 TRI(
TII.getRegisterInfo()),
60 ClauseUses(
TRI.getNumRegUnits()),
61 ClauseDefs(
TRI.getNumRegUnits()) {
63 TSchedModel.
init(&ST);
68 EmittedInstrs.clear();
80 return Opcode == AMDGPU::V_DIV_FMAS_F32_e64 || Opcode == AMDGPU::V_DIV_FMAS_F64_e64;
84 return Opcode == AMDGPU::S_GETREG_B32;
89 case AMDGPU::S_SETREG_B32:
90 case AMDGPU::S_SETREG_B32_mode:
91 case AMDGPU::S_SETREG_IMM32_B32:
92 case AMDGPU::S_SETREG_IMM32_B32_mode:
99 return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32;
103 return Opcode == AMDGPU::S_RFE_B64;
108 case AMDGPU::S_MOVRELS_B32:
109 case AMDGPU::S_MOVRELS_B64:
110 case AMDGPU::S_MOVRELD_B32:
111 case AMDGPU::S_MOVRELD_B64:
123 unsigned Opcode =
MI.getOpcode();
127 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
128 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
131 if (!ST.hasGFX940Insts())
139 if (
TII.isAlwaysGDS(
MI.getOpcode()))
142 switch (
MI.getOpcode()) {
143 case AMDGPU::S_SENDMSG:
144 case AMDGPU::S_SENDMSGHALT:
145 case AMDGPU::S_TTRACEDATA:
149 case AMDGPU::DS_PERMUTE_B32:
150 case AMDGPU::DS_BPERMUTE_B32:
153 if (
TII.isDS(
MI.getOpcode())) {
155 AMDGPU::OpName::gds);
156 if (
MI.getOperand(GDS).getImm())
164 unsigned Opcode =
MI.getOpcode();
165 return Opcode == AMDGPU::V_PERMLANE16_B32_e64 ||
166 Opcode == AMDGPU::V_PERMLANEX16_B32_e64;
176 AMDGPU::OpName::simm16);
196 if (checkFPAtomicToDenormModeHazard(
MI) > 0)
205 && checkVMEMHazards(
MI) > 0)
214 if (
isDivFMas(
MI->getOpcode()) && checkDivFMasHazards(
MI) > 0)
217 if (
isRWLane(
MI->getOpcode()) && checkRWLaneHazards(
MI) > 0)
225 if (
isSGetReg(
MI->getOpcode()) && checkGetRegHazards(
MI) > 0)
228 if (
isSSetReg(
MI->getOpcode()) && checkSetRegHazards(
MI) > 0)
231 if (
isRFE(
MI->getOpcode()) && checkRFEHazards(
MI) > 0)
236 MI->getOpcode() == AMDGPU::DS_WRITE_ADDTID_B32 ||
237 MI->getOpcode() == AMDGPU::DS_READ_ADDTID_B32)) ||
241 MI->readsRegister(AMDGPU::LDS_DIRECT))) &&
242 checkReadM0Hazards(
MI) > 0)
253 if (
MI->isInlineAsm() && checkInlineAsmHazards(
MI) > 0)
261 while (Quantity > 0) {
262 unsigned Arg = std::min(Quantity, 8u);
270GCNHazardRecognizer::getMFMAPipelineWaitStates(
const MachineInstr &
MI)
const {
277void GCNHazardRecognizer::processBundle() {
281 for (;
MI !=
E &&
MI->isInsideBundle(); ++
MI) {
282 CurrCycleInstr = &*
MI;
285 if (IsHazardRecognizerMode) {
286 fixHazards(CurrCycleInstr);
294 for (
unsigned i = 0, e = std::min(WaitStates,
MaxLookAhead - 1); i <
e; ++i)
295 EmittedInstrs.push_front(
nullptr);
297 EmittedInstrs.push_front(CurrCycleInstr);
300 CurrCycleInstr =
nullptr;
304 assert(IsHazardRecognizerMode);
308 if (
MI->isInsideBundle())
318 IsHazardRecognizerMode =
true;
322 CurrCycleInstr =
nullptr;
333 return std::max(WaitStates, checkSMRDHazards(
MI));
336 WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(
MI));
338 WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(
MI));
344 WaitStates = std::max(WaitStates, checkVMEMHazards(
MI));
347 WaitStates = std::max(WaitStates, checkVALUHazards(
MI));
350 WaitStates = std::max(WaitStates, checkDPPHazards(
MI));
353 WaitStates = std::max(WaitStates, checkDivFMasHazards(
MI));
356 WaitStates = std::max(WaitStates, checkRWLaneHazards(
MI));
361 WaitStates = std::max(WaitStates, checkMAIVALUHazards(
MI));
363 if (
MI->isInlineAsm())
364 return std::max(WaitStates, checkInlineAsmHazards(
MI));
367 return std::max(WaitStates, checkGetRegHazards(
MI));
370 return std::max(WaitStates, checkSetRegHazards(
MI));
373 return std::max(WaitStates, checkRFEHazards(
MI));
377 MI->getOpcode() == AMDGPU::DS_WRITE_ADDTID_B32 ||
378 MI->getOpcode() == AMDGPU::DS_READ_ADDTID_B32)) ||
382 return std::max(WaitStates, checkReadM0Hazards(
MI));
385 return std::max(WaitStates, checkMAIHazards(
MI));
390 return std::max(WaitStates, checkMAILdStHazards(
MI));
396 EmittedInstrs.push_front(
nullptr);
402 if (!CurrCycleInstr) {
403 EmittedInstrs.push_front(
nullptr);
413 if (!NumWaitStates) {
414 CurrCycleInstr =
nullptr;
419 EmittedInstrs.push_front(CurrCycleInstr);
426 EmittedInstrs.push_front(
nullptr);
434 CurrCycleInstr =
nullptr;
438 llvm_unreachable(
"hazard recognizer does not support bottom-up scheduling.");
451template <
typename StateT>
464 switch (IsHazard(State, *
I)) {
474 if (
I->isInlineAsm() ||
I->isMetaInstruction())
477 UpdateState(State, *
I);
481 if (!Visited.
insert(Pred).second)
484 if (
hasHazard(State, IsHazard, UpdateState, Pred, Pred->instr_rbegin(),
508 if (
I->isInlineAsm())
511 WaitStates += GetNumWaitStates(*
I);
513 if (IsExpired(*
I, WaitStates))
514 return std::numeric_limits<int>::max();
517 int MinWaitStates = std::numeric_limits<int>::max();
519 if (!Visited.
insert(Pred).second)
523 IsExpired, Visited, GetNumWaitStates);
525 MinWaitStates = std::min(MinWaitStates, W);
528 return MinWaitStates;
535 std::next(
MI->getReverseIterator()),
536 0, IsExpired, Visited);
539int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard,
int Limit) {
540 if (IsHazardRecognizerMode) {
542 return WaitStates >= Limit;
544 return ::getWaitStatesSince(IsHazard, CurrCycleInstr,
IsExpiredFn);
553 if (
MI->isInlineAsm())
558 if (WaitStates >= Limit)
561 return std::numeric_limits<int>::max();
564int GCNHazardRecognizer::getWaitStatesSinceDef(
unsigned Reg,
565 IsHazardFn IsHazardDef,
570 return IsHazardDef(
MI) &&
MI.modifiesRegister(Reg, TRI);
576int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard,
616int GCNHazardRecognizer::checkSoftClauseHazards(
MachineInstr *MEM) {
622 bool IsSMRD = TII.
isSMRD(*MEM);
648 if (ClauseDefs.
none())
661 return ClauseDefs.
anyCommon(ClauseUses) ? 1 : 0;
664int GCNHazardRecognizer::checkSMRDHazards(
MachineInstr *SMRD) {
665 int WaitStatesNeeded = 0;
667 WaitStatesNeeded = checkSoftClauseHazards(SMRD);
671 return WaitStatesNeeded;
675 int SmrdSgprWaitStates = 4;
688 int WaitStatesNeededForUse =
689 SmrdSgprWaitStates - getWaitStatesSinceDef(
Use.getReg(), IsHazardDefFn,
691 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
701 int WaitStatesNeededForUse =
702 SmrdSgprWaitStates - getWaitStatesSinceDef(
Use.getReg(),
705 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
709 return WaitStatesNeeded;
712int GCNHazardRecognizer::checkVMEMHazards(
MachineInstr* VMEM) {
716 int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
720 const int VmemSgprWaitStates = 5;
728 int WaitStatesNeededForUse =
729 VmemSgprWaitStates - getWaitStatesSinceDef(
Use.getReg(), IsHazardDefFn,
731 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
733 return WaitStatesNeeded;
741 int DppVgprWaitStates = 2;
742 int DppExecWaitStates = 5;
743 int WaitStatesNeeded = 0;
745 return TII->isVALU(
MI);
751 int WaitStatesNeededForUse =
752 DppVgprWaitStates - getWaitStatesSinceDef(
756 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
759 WaitStatesNeeded = std::max(
761 DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn,
764 return WaitStatesNeeded;
767int GCNHazardRecognizer::checkDivFMasHazards(
MachineInstr *DivFMas) {
772 const int DivFMasWaitStates = 4;
774 return TII->isVALU(
MI);
776 int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn,
779 return DivFMasWaitStates - WaitStatesNeeded;
782int GCNHazardRecognizer::checkGetRegHazards(
MachineInstr *GetRegInstr) {
784 unsigned GetRegHWReg =
getHWReg(TII, *GetRegInstr);
786 const int GetRegWaitStates = 2;
790 int WaitStatesNeeded = getWaitStatesSinceSetReg(
IsHazardFn, GetRegWaitStates);
792 return GetRegWaitStates - WaitStatesNeeded;
795int GCNHazardRecognizer::checkSetRegHazards(
MachineInstr *SetRegInstr) {
797 unsigned HWReg =
getHWReg(TII, *SetRegInstr);
803 int WaitStatesNeeded = getWaitStatesSinceSetReg(
IsHazardFn, SetRegWaitStates);
804 return SetRegWaitStates - WaitStatesNeeded;
807int GCNHazardRecognizer::createsVALUHazard(
const MachineInstr &
MI) {
812 unsigned Opcode =
MI.getOpcode();
818 VDataRCID =
Desc.operands()[VDataIdx].RegClass;
828 TII->getNamedOperand(
MI, AMDGPU::OpName::soffset);
832 (!SOffset || !SOffset->
isReg()))
840 if (
TII->isMIMG(
MI)) {
847 if (
TII->isFLAT(
MI)) {
857GCNHazardRecognizer::checkVALUHazardsHelper(
const MachineOperand &Def,
864 int WaitStatesNeeded = 0;
866 if (!
TRI->isVectorRegister(
MRI,
Def.getReg()))
867 return WaitStatesNeeded;
870 int DataIdx = createsVALUHazard(
MI);
871 return DataIdx >= 0 &&
872 TRI->regsOverlap(
MI.getOperand(DataIdx).getReg(), Reg);
874 int WaitStatesNeededForDef =
875 VALUWaitStates - getWaitStatesSince(
IsHazardFn, VALUWaitStates);
876 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
878 return WaitStatesNeeded;
881int GCNHazardRecognizer::checkVALUHazards(
MachineInstr *VALU) {
882 int WaitStatesNeeded = 0;
885 const int TransDefWaitstates = 1;
895 if (
Use.isReg() &&
TRI->regsOverlap(Def,
Use.getReg()))
902 int WaitStatesNeededForDef =
904 getWaitStatesSince(IsTransDefFn, TransDefWaitstates);
905 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
909 const int Shift16DefWaitstates = 1;
916 if (
auto *DstSel =
TII->getNamedOperand(
MI, AMDGPU::OpName::dst_sel))
921 !(
TII->getNamedOperand(
MI, AMDGPU::OpName::src0_modifiers)
927 if (
auto *Dst =
TII->getNamedOperand(
MI, AMDGPU::OpName::vdst)) {
931 if (
Use.isReg() &&
TRI->regsOverlap(Def,
Use.getReg()))
939 int WaitStatesNeededForDef =
940 Shift16DefWaitstates -
941 getWaitStatesSince(IsShift16BitDefFn, Shift16DefWaitstates);
942 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
946 const int VALUWriteSGPRVALUReadWaitstates = 2;
947 const int VALUWriteEXECRWLane = 4;
948 const int VALUWriteVGPRReadlaneRead = 1;
956 return MI.modifiesRegister(
UseReg, TRI);
965 int WaitStatesNeededForDef =
966 VALUWriteSGPRVALUReadWaitstates -
967 getWaitStatesSince(IsVALUDefSGPRFn,
968 VALUWriteSGPRVALUReadWaitstates);
969 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
973 if (
VALU->readsRegister(AMDGPU::VCC, TRI)) {
975 int WaitStatesNeededForDef =
976 VALUWriteSGPRVALUReadWaitstates -
977 getWaitStatesSince(IsVALUDefSGPRFn, VALUWriteSGPRVALUReadWaitstates);
978 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
981 switch (
VALU->getOpcode()) {
982 case AMDGPU::V_READLANE_B32:
983 case AMDGPU::V_READFIRSTLANE_B32: {
986 int WaitStatesNeededForDef =
987 VALUWriteVGPRReadlaneRead -
988 getWaitStatesSince(IsVALUDefSGPRFn, VALUWriteVGPRReadlaneRead);
989 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
992 case AMDGPU::V_WRITELANE_B32: {
994 int WaitStatesNeededForDef =
995 VALUWriteEXECRWLane -
996 getWaitStatesSince(IsVALUDefSGPRFn, VALUWriteEXECRWLane);
997 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
1008 return WaitStatesNeeded;
1013 WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def,
MRI));
1016 return WaitStatesNeeded;
1019int GCNHazardRecognizer::checkInlineAsmHazards(
MachineInstr *IA) {
1032 int WaitStatesNeeded = 0;
1036 if (
Op.isReg() &&
Op.isDef()) {
1038 std::max(WaitStatesNeeded, checkVALUHazardsHelper(
Op,
MRI));
1042 return WaitStatesNeeded;
1045int GCNHazardRecognizer::checkRWLaneHazards(
MachineInstr *RWLane) {
1051 TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
1059 const int RWLaneWaitStates = 4;
1060 int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg,
IsHazardFn,
1062 return RWLaneWaitStates - WaitStatesSince;
1065int GCNHazardRecognizer::checkRFEHazards(
MachineInstr *RFE) {
1071 const int RFEWaitStates = 1;
1076 int WaitStatesNeeded = getWaitStatesSinceSetReg(
IsHazardFn, RFEWaitStates);
1077 return RFEWaitStates - WaitStatesNeeded;
1082 const int ReadM0WaitStates = 1;
1084 return ReadM0WaitStates -
1085 getWaitStatesSinceDef(AMDGPU::M0,
IsHazardFn, ReadM0WaitStates);
1089 fixVMEMtoScalarWriteHazards(
MI);
1090 fixVcmpxPermlaneHazards(
MI);
1091 fixSMEMtoVectorWriteHazards(
MI);
1092 fixVcmpxExecWARHazard(
MI);
1093 fixLdsBranchVmemWARHazard(
MI);
1095 fixLdsDirectVALUHazard(
MI);
1096 fixLdsDirectVMEMHazard(
MI);
1098 fixVALUPartialForwardingHazard(
MI);
1099 fixVALUTransUseHazard(
MI);
1101 fixShift64HighRegBug(
MI);
1102 fixVALUMaskWriteHazard(
MI);
1105bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(
MachineInstr *
MI) {
1112 return (
TII->isVOPC(
MI) ||
1113 ((
TII->isVOP3(
MI) ||
TII->isSDWA(
MI)) &&
MI.isCompare())) &&
1114 MI.modifiesRegister(AMDGPU::EXEC, TRI);
1118 unsigned Opc =
MI.getOpcode();
1120 Opc != AMDGPU::V_NOP_e64 && Opc != AMDGPU::V_NOP_sdwa;
1124 std::numeric_limits<int>::max())
1130 auto *Src0 =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src0);
1132 bool IsUndef = Src0->isUndef();
1134 TII->get(AMDGPU::V_MOV_B32_e32))
1141bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(
MachineInstr *
MI) {
1148 if (
MI->getNumDefs() == 0)
1160 I.findRegisterUseOperand(
Def.getReg(),
false,
TRI);
1170 (
MI.getOpcode() == AMDGPU::S_WAITCNT &&
1171 !
MI.getOperand(0).getImm()) ||
1172 (
MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
1177 std::numeric_limits<int>::max())
1182 TII->get(AMDGPU::S_WAITCNT_DEPCTR))
1187bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(
MachineInstr *
MI) {
1195 switch (
MI->getOpcode()) {
1196 case AMDGPU::V_READLANE_B32:
1197 case AMDGPU::V_READFIRSTLANE_B32:
1198 SDSTName = AMDGPU::OpName::vdst;
1201 SDSTName = AMDGPU::OpName::sdst;
1210 for (
const auto &MO :
MI->implicit_operands()) {
1211 if (MO.isDef() &&
TRI->isSGPRClass(
TRI->getPhysRegBaseClass(MO.getReg()))) {
1227 if (
TII->isSALU(
MI)) {
1228 switch (
MI.getOpcode()) {
1229 case AMDGPU::S_SETVSKIP:
1230 case AMDGPU::S_VERSION:
1231 case AMDGPU::S_WAITCNT_VSCNT:
1232 case AMDGPU::S_WAITCNT_VMCNT:
1233 case AMDGPU::S_WAITCNT_EXPCNT:
1236 case AMDGPU::S_WAITCNT_LGKMCNT:
1238 return (
MI.getOperand(1).getImm() == 0) &&
1239 (
MI.getOperand(0).getReg() == AMDGPU::SGPR_NULL);
1240 case AMDGPU::S_WAITCNT: {
1241 const int64_t
Imm =
MI.getOperand(0).getImm();
1243 return (Decoded.
LgkmCnt == 0);
1247 if (
TII->isSOPP(
MI))
1263 std::numeric_limits<int>::max())
1267 TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL)
1272bool GCNHazardRecognizer::fixVcmpxExecWARHazard(
MachineInstr *
MI) {
1277 if (!
MI->modifiesRegister(AMDGPU::EXEC, TRI))
1283 return I.readsRegister(AMDGPU::EXEC, TRI);
1289 if (
TII->getNamedOperand(
MI, AMDGPU::OpName::sdst))
1291 for (
auto MO :
MI.implicit_operands())
1292 if (MO.isDef() &&
TRI->isSGPRClass(
TRI->getPhysRegBaseClass(MO.getReg())))
1295 if (
MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
1302 std::numeric_limits<int>::max())
1306 TII->get(AMDGPU::S_WAITCNT_DEPCTR))
1313 if (!ST.hasLdsBranchVmemWARHazard())
1318 bool HasLds =
false;
1319 bool HasVmem =
false;
1320 for (
auto &
MBB : MF) {
1321 for (
auto &
MI :
MBB) {
1325 if (HasLds && HasVmem)
1333 return I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1334 I.getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1335 !
I.getOperand(1).getImm();
1338bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(
MachineInstr *
MI) {
1339 if (!RunLdsBranchVmemWARHazardFixup)
1352 auto InstType = IsHazardInst(*
MI);
1365 auto InstType2 = IsHazardInst(
I);
1366 return InstType2 && InstType != InstType2;
1370 auto InstType2 = IsHazardInst(
I);
1371 if (InstType == InstType2)
1378 std::numeric_limits<int>::max();
1382 std::numeric_limits<int>::max())
1387 TII->get(AMDGPU::S_WAITCNT_VSCNT))
1394bool GCNHazardRecognizer::fixLdsDirectVALUHazard(
MachineInstr *
MI) {
1398 const int NoHazardWaitStates = 15;
1402 bool VisitedTrans =
false;
1408 return I.readsRegister(VDSTReg, &TRI) ||
I.modifiesRegister(VDSTReg, &TRI);
1411 if (WaitStates >= NoHazardWaitStates)
1422 auto Count = ::getWaitStatesSince(
IsHazardFn,
MI->getParent(),
1423 std::next(
MI->getReverseIterator()), 0,
1432 TII.getNamedOperand(*
MI, AMDGPU::OpName::waitvdst);
1433 WaitVdstOp->
setImm(std::min(Count, NoHazardWaitStates));
1438bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(
MachineInstr *
MI) {
1449 return I.readsRegister(VDSTReg, &TRI) ||
I.modifiesRegister(VDSTReg, &TRI);
1453 (
I.getOpcode() == AMDGPU::S_WAITCNT && !
I.getOperand(0).getImm()) ||
1454 (
I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
1459 std::numeric_limits<int>::max())
1463 TII.get(AMDGPU::S_WAITCNT_DEPCTR))
1469bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(
MachineInstr *
MI) {
1485 if (SrcVGPRs.
size() <= 1)
1503 const int Intv1plus2MaxVALUs = 2;
1504 const int Intv3MaxVALUs = 4;
1505 const int IntvMaxVALUs = 6;
1506 const int NoHazardVALUWaitStates = IntvMaxVALUs + 2;
1510 int ExecPos = std::numeric_limits<int>::max();
1519 if (State.VALUs > NoHazardVALUWaitStates)
1525 (
I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
1530 bool Changed =
false;
1533 if (!State.DefPos.count(Src) &&
I.modifiesRegister(Src, &TRI)) {
1534 State.DefPos[Src] = State.VALUs;
1539 if (State.ExecPos == std::numeric_limits<int>::max()) {
1540 if (!State.DefPos.empty() &&
I.modifiesRegister(AMDGPU::EXEC, &TRI)) {
1541 State.ExecPos = State.VALUs;
1548 if (State.VALUs > Intv3MaxVALUs && State.DefPos.empty())
1556 if (State.ExecPos == std::numeric_limits<int>::max())
1559 int PreExecPos = std::numeric_limits<int>::max();
1560 int PostExecPos = std::numeric_limits<int>::max();
1562 for (
auto Entry : State.DefPos) {
1563 int DefVALUs = Entry.second;
1564 if (DefVALUs != std::numeric_limits<int>::max()) {
1565 if (DefVALUs >= State.ExecPos)
1566 PreExecPos = std::min(PreExecPos, DefVALUs);
1567 else if (DefVALUs < State.ExecPos)
1568 PostExecPos = std::min(PostExecPos, DefVALUs);
1573 if (PostExecPos == std::numeric_limits<int>::max())
1577 int Intv3VALUs = PostExecPos;
1578 if (Intv3VALUs > Intv3MaxVALUs)
1582 int Intv2VALUs = (State.ExecPos - PostExecPos) - 1;
1583 if (Intv2VALUs > Intv1plus2MaxVALUs)
1587 if (PreExecPos == std::numeric_limits<int>::max())
1591 int Intv1VALUs = PreExecPos - State.ExecPos;
1592 if (Intv1VALUs > Intv1plus2MaxVALUs)
1596 if (Intv1VALUs + Intv2VALUs > Intv1plus2MaxVALUs)
1601 auto UpdateStateFn = [](StateType &State,
const MachineInstr &
MI) {
1607 if (!hasHazard<StateType>(State,
IsHazardFn, UpdateStateFn,
MI->getParent(),
1608 std::next(
MI->getReverseIterator()), Visited))
1612 TII.get(AMDGPU::S_WAITCNT_DEPCTR))
1618bool GCNHazardRecognizer::fixVALUTransUseHazard(
MachineInstr *
MI) {
1641 const int IntvMaxVALUs = 5;
1642 const int IntvMaxTRANS = 1;
1654 if (State.VALUs > IntvMaxVALUs || State.TRANS > IntvMaxTRANS)
1660 (
I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
1661 I.getOperand(0).getImm() == 0x0fff))
1667 if (
I.modifiesRegister(Src, &TRI)) {
1675 auto UpdateStateFn = [](StateType &State,
const MachineInstr &
MI) {
1683 if (!hasHazard<StateType>(State,
IsHazardFn, UpdateStateFn,
MI->getParent(),
1684 std::next(
MI->getReverseIterator()), Visited))
1690 TII.get(AMDGPU::S_WAITCNT_DEPCTR))
1710 TII->getNamedOperand(*
MI, AMDGPU::OpName::src0)->getReg();
1712 TII->getNamedOperand(*
MI, AMDGPU::OpName::src1)->getReg();
1715 TII->getNamedOperand(
I, AMDGPU::OpName::vdst)->getReg();
1717 if (
TRI->regsOverlap(PrevDstReg, CurSrc0Reg) ||
1718 TRI->regsOverlap(PrevDstReg, CurSrc1Reg)) {
1725 TII->getNamedOperand(*
MI, AMDGPU::OpName::src2);
1728 if (CurSrc2Reg != AMDGPU::NoRegister &&
1729 TRI->regsOverlap(PrevDstReg, CurSrc2Reg)) {
1732 TII->getNamedOperand(*
MI, AMDGPU::OpName::src2_modifiers);
1733 const bool NoSrc2Mods =
1737 return !(NoSrc2Mods && (
TII->pseudoToMCOpcode(
I.getOpcode()) ==
1738 TII->pseudoToMCOpcode(
MI->getOpcode())));
1749 std::numeric_limits<int>::max())
1752 BuildMI(*
MI->getParent(),
MI,
MI->getDebugLoc(),
TII->get(AMDGPU::V_NOP_e32));
1757bool GCNHazardRecognizer::fixShift64HighRegBug(
MachineInstr *
MI) {
1761 switch (
MI->getOpcode()) {
1764 case AMDGPU::V_LSHLREV_B64_e64:
1765 case AMDGPU::V_LSHRREV_B64_e64:
1766 case AMDGPU::V_ASHRREV_I64_e64:
1777 if (!
TRI.isVGPR(
MRI, AmtReg) || ((AmtReg - AMDGPU::VGPR0) & 7) != 7)
1780 if (AmtReg != AMDGPU::VGPR255 &&
MRI.isPhysRegUsed(AmtReg + 1))
1784 bool OverlappedSrc = Src1->
isReg() &&
TRI.regsOverlap(Src1->
getReg(), AmtReg);
1785 bool OverlappedDst =
MI->modifiesRegister(AmtReg, &TRI);
1786 bool Overlapped = OverlappedSrc || OverlappedDst;
1788 assert(!OverlappedDst || !OverlappedSrc ||
1789 Src1->
getReg() ==
MI->getOperand(0).getReg());
1791 static_assert(AMDGPU::VGPR0 + 1 == AMDGPU::VGPR1);
1794 for (
MCRegister Reg : Overlapped ? AMDGPU::VReg_64_Align2RegClass
1795 : AMDGPU::VGPR_32RegClass) {
1796 if (!
MI->modifiesRegister(Reg, &TRI) && !
MI->readsRegister(Reg, &TRI)) {
1807 NewAmtLo =
TRI.getSubReg(NewReg, AMDGPU::sub0);
1849 MI->getOperand(0).setReg(NewReg);
1850 if (OverlappedSrc) {
1860 int NSAtoVMEMWaitStates = 1;
1869 const auto *
Offset =
TII->getNamedOperand(*
MI, AMDGPU::OpName::offset);
1877 return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA &&
1878 TII->getInstSizeInBytes(
I) >= 16;
1881 return NSAtoVMEMWaitStates - getWaitStatesSince(
IsHazardFn, 1);
1884int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(
MachineInstr *
MI) {
1885 int FPAtomicToDenormModeWaitStates = 3;
1890 if (
MI->getOpcode() != AMDGPU::S_DENORM_MODE)
1903 switch (
MI.getOpcode()) {
1904 case AMDGPU::S_WAITCNT:
1905 case AMDGPU::S_WAITCNT_VSCNT:
1906 case AMDGPU::S_WAITCNT_VMCNT:
1907 case AMDGPU::S_WAITCNT_EXPCNT:
1908 case AMDGPU::S_WAITCNT_LGKMCNT:
1909 case AMDGPU::S_WAIT_IDLE:
1918 return FPAtomicToDenormModeWaitStates -
1937 int NeighborMFMALatency = 0;
1938 auto IsNeighboringMFMA = [&NeighborMFMALatency,
1943 NeighborMFMALatency = this->getMFMAPipelineWaitStates(
MI);
1947 const int MaxMFMAPipelineWaitStates = 16;
1948 int WaitStatesSinceNeighborMFMA =
1949 getWaitStatesSince(IsNeighboringMFMA, MaxMFMAPipelineWaitStates);
1951 int NeighborMFMAPaddingNeeded =
1953 WaitStatesSinceNeighborMFMA;
1955 return std::max(0, NeighborMFMAPaddingNeeded);
1959 int WaitStatesNeeded = 0;
1960 unsigned Opc =
MI->getOpcode();
1966 if (Opc != AMDGPU::V_ACCVGPR_READ_B32_e64) {
1967 const int LegacyVALUWritesVGPRWaitStates = 2;
1968 const int VALUWritesExecWaitStates = 4;
1969 const int MaxWaitStates = 4;
1971 int WaitStatesNeededForUse = VALUWritesExecWaitStates -
1972 getWaitStatesSinceDef(AMDGPU::EXEC, IsVALUFn, MaxWaitStates);
1973 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1975 if (WaitStatesNeeded < MaxWaitStates) {
1977 const int MaxWaitStates = 2;
1982 int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates -
1983 getWaitStatesSinceDef(
Use.getReg(), IsVALUFn, MaxWaitStates);
1984 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
1986 if (WaitStatesNeeded == MaxWaitStates)
1996 if (
Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
1999 const int MFMAWritesAGPROverlappedSrcABWaitStates = 4;
2000 const int MFMAWritesAGPROverlappedSrcCWaitStates = 2;
2001 const int MFMA4x4WritesAGPRAccVgprReadWaitStates = 4;
2002 const int MFMA16x16WritesAGPRAccVgprReadWaitStates = 10;
2003 const int MFMA32x32WritesAGPRAccVgprReadWaitStates = 18;
2004 const int MFMA4x4WritesAGPRAccVgprWriteWaitStates = 1;
2005 const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7;
2006 const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15;
2007 const int MaxWaitStates = 18;
2009 unsigned HazardDefLatency = 0;
2011 auto IsOverlappedMFMAFn = [
Reg, &HazardDefLatency,
2019 std::max(HazardDefLatency, TSchedModel.computeInstrLatency(&
MI));
2020 return TRI.regsOverlap(DstReg, Reg);
2023 int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn,
2025 int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates;
2027 int OpNo =
Op.getOperandNo();
2028 if (OpNo == SrcCIdx) {
2029 NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates;
2030 }
else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64) {
2031 switch (HazardDefLatency) {
2032 case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates;
2034 case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates;
2036 case 16: [[fallthrough]];
2037 default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates;
2040 }
else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
2041 switch (HazardDefLatency) {
2042 case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates;
2044 case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprWriteWaitStates;
2046 case 16: [[fallthrough]];
2047 default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprWriteWaitStates;
2052 int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
2053 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2055 if (WaitStatesNeeded == MaxWaitStates)
2056 return WaitStatesNeeded;
2059 if (
MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
2062 return TRI.regsOverlap(Reg, DstReg);
2065 const int AccVGPRWriteMFMAReadSrcCWaitStates = 1;
2066 const int AccVGPRWriteMFMAReadSrcABWaitStates = 3;
2067 const int AccVGPRWriteAccVgprReadWaitStates = 3;
2068 NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates;
2069 if (OpNo == SrcCIdx)
2070 NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates;
2071 else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64)
2072 NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates;
2074 WaitStatesNeededForUse = NeedWaitStates -
2075 getWaitStatesSinceDef(Reg, IsAccVgprWriteFn, MaxWaitStates);
2076 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2078 if (WaitStatesNeeded == MaxWaitStates)
2079 return WaitStatesNeeded;
2082 if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
2083 const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0;
2084 const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5;
2085 const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13;
2086 const int MaxWaitStates = 13;
2087 Register DstReg =
MI->getOperand(0).getReg();
2088 unsigned HazardDefLatency = 0;
2090 auto IsSrcCMFMAFn = [DstReg, &HazardDefLatency,
2096 std::max(HazardDefLatency, TSchedModel.computeInstrLatency(&
MI));
2097 return TRI.regsOverlap(Reg, DstReg);
2100 int WaitStatesSince = getWaitStatesSince(IsSrcCMFMAFn, MaxWaitStates);
2102 switch (HazardDefLatency) {
2103 case 2: NeedWaitStates = MFMA4x4ReadSrcCAccVgprWriteWaitStates;
2105 case 8: NeedWaitStates = MFMA16x16ReadSrcCAccVgprWriteWaitStates;
2107 case 16: [[fallthrough]];
2108 default: NeedWaitStates = MFMA32x32ReadSrcCAccVgprWriteWaitStates;
2112 int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince;
2113 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2117 WaitStatesNeeded = std::max(WaitStatesNeeded, checkMFMAPadding(
MI));
2119 return WaitStatesNeeded;
2123 int WaitStatesNeeded = 0;
2124 unsigned Opc =
MI->getOpcode();
2136 return WaitStatesNeeded;
2138 const int VALUWritesExecWaitStates = 4;
2139 int WaitStatesNeededForUse = VALUWritesExecWaitStates -
2140 getWaitStatesSinceDef(AMDGPU::EXEC, IsLegacyVALUFn,
2141 VALUWritesExecWaitStates);
2142 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2148 const int LegacyVALUNotDotWritesVGPRWaitStates = 2;
2149 const int SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates = 2;
2150 const int GFX940_XDL2PassWritesVGPROverlappedSMFMASrcCWaitStates = 3;
2151 const int GFX940_XDL4PassWritesVGPROverlappedSMFMASrcCWaitStates = 5;
2152 const int GFX940_SMFMA4PassWritesVGPROverlappedSMFMASrcCWaitStates = 4;
2153 const int GFX940_XDL8PassWritesVGPROverlappedSMFMASrcCWaitStates = 9;
2154 const int GFX940_SMFMA8PassWritesVGPROverlappedSMFMASrcCWaitStates = 8;
2155 const int GFX940_XDL16PassWritesVGPROverlappedSMFMASrcCWaitStates = 17;
2156 const int GFX940_SMFMA16PassWritesVGPROverlappedSMFMASrcCWaitStates = 16;
2157 const int SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates = 8;
2158 const int SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates = 16;
2159 const int SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates = 3;
2160 const int SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates = 9;
2161 const int SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates = 17;
2162 const int DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 9;
2163 const int DMFMA4x4WritesVGPROverlappedSrcCWaitStates = 4;
2164 const int SMFMA4x4WritesVGPROverlappedSrcABWaitStates = 5;
2165 const int SMFMA16x16WritesVGPROverlappedSrcABWaitStates = 11;
2166 const int SMFMA32x32WritesVGPROverlappedSrcABWaitStates = 19;
2167 const int GFX940_SMFMA2PassWritesVGPROverlappedSrcABWaitStates = 4;
2168 const int GFX940_SMFMA4PassWritesVGPROverlappedSrcABWaitStates = 6;
2169 const int GFX940_SMFMA8PassWritesVGPROverlappedSrcABWaitStates = 10;
2170 const int GFX940_SMFMA16PassWritesVGPROverlappedSrcABWaitStates = 18;
2171 const int GFX940_XDL2PassWritesVGPROverlappedSrcABWaitStates = 5;
2172 const int GFX940_XDL4PassWritesVGPROverlappedSrcABWaitStates = 7;
2173 const int GFX940_XDL8PassWritesVGPROverlappedSrcABWaitStates = 11;
2174 const int GFX940_XDL16PassWritesVGPROverlappedSrcABWaitStates = 19;
2175 const int DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates = 6;
2176 const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11;
2177 const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4;
2178 const int GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2;
2179 const int MaxWaitStates = 19;
2187 auto IsOverlappedMFMAFn = [
Reg, &FullReg, &MI1,
2192 FullReg = (DstReg ==
Reg);
2194 return TRI.regsOverlap(DstReg, Reg);
2197 WaitStatesNeededForUse = LegacyVALUNotDotWritesVGPRWaitStates -
2198 getWaitStatesSinceDef(Reg, IsLegacyVALUNotDotFn, MaxWaitStates);
2199 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2202 getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn, MaxWaitStates);
2203 if (NumWaitStates == std::numeric_limits<int>::max())
2208 int NeedWaitStates = 0;
2209 if (OpNo == SrcCIdx) {
2212 }
else if (FullReg) {
2213 if ((Opc == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
2214 Opc == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64) &&
2215 (Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
2216 Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64))
2217 NeedWaitStates = DMFMA4x4WritesVGPRFullSrcCWaitStates;
2219 TSchedModel.computeInstrLatency(MI1) == 2)
2220 NeedWaitStates = GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates;
2223 case AMDGPU::V_MFMA_F64_16X16X4F64_e64:
2224 case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64:
2225 case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64:
2226 case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64:
2228 NeedWaitStates = DMFMA16x16WritesVGPROverlappedSrcCWaitStates;
2230 case AMDGPU::V_MFMA_F64_4X4X4F64_e64:
2231 case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64:
2233 NeedWaitStates = DMFMA4x4WritesVGPROverlappedSrcCWaitStates;
2238 switch (TSchedModel.computeInstrLatency(MI1)) {
2242 ? GFX940_XDL2PassWritesVGPROverlappedSMFMASrcCWaitStates
2243 : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates
2245 ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates
2246 : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates;
2250 NeedWaitStates =
isXDL(ST, *MI1)
2251 ? GFX940_XDL4PassWritesVGPROverlappedSMFMASrcCWaitStates
2252 : GFX940_SMFMA4PassWritesVGPROverlappedSMFMASrcCWaitStates;
2257 ? GFX940_XDL8PassWritesVGPROverlappedSMFMASrcCWaitStates
2258 : GFX940_SMFMA8PassWritesVGPROverlappedSMFMASrcCWaitStates
2260 ? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates
2261 : SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates;
2263 case 16: [[fallthrough]];
2267 ? GFX940_XDL16PassWritesVGPROverlappedSMFMASrcCWaitStates
2268 : GFX940_SMFMA16PassWritesVGPROverlappedSMFMASrcCWaitStates
2270 ? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates
2271 : SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates;
2277 case AMDGPU::V_MFMA_F64_16X16X4F64_e64:
2278 case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64:
2279 case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64:
2280 case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64:
2281 NeedWaitStates = DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates;
2283 case AMDGPU::V_MFMA_F64_4X4X4F64_e64:
2284 case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64:
2285 NeedWaitStates = DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates;
2288 switch (TSchedModel.computeInstrLatency(MI1)) {
2292 ? GFX940_XDL2PassWritesVGPROverlappedSrcABWaitStates
2293 : GFX940_SMFMA2PassWritesVGPROverlappedSrcABWaitStates
2294 : SMFMA4x4WritesVGPROverlappedSrcABWaitStates;
2298 NeedWaitStates =
isXDL(ST, *MI1)
2299 ? GFX940_XDL4PassWritesVGPROverlappedSrcABWaitStates
2300 : GFX940_SMFMA4PassWritesVGPROverlappedSrcABWaitStates;
2305 ? GFX940_XDL8PassWritesVGPROverlappedSrcABWaitStates
2306 : GFX940_SMFMA8PassWritesVGPROverlappedSrcABWaitStates
2307 : SMFMA16x16WritesVGPROverlappedSrcABWaitStates;
2309 case 16: [[fallthrough]];
2313 ? GFX940_XDL16PassWritesVGPROverlappedSrcABWaitStates
2314 : GFX940_SMFMA16PassWritesVGPROverlappedSrcABWaitStates
2315 : SMFMA32x32WritesVGPROverlappedSrcABWaitStates;
2319 if (WaitStatesNeeded >= NeedWaitStates)
2322 WaitStatesNeededForUse = NeedWaitStates - NumWaitStates;
2323 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2325 if (WaitStatesNeeded == MaxWaitStates)
2329 return WaitStatesNeeded;
2337 int WaitStatesNeeded = 0;
2340 return MI.getOpcode() == AMDGPU::V_ACCVGPR_READ_B32_e64;
2349 const int AccVgprReadLdStWaitStates = 2;
2350 const int VALUWriteAccVgprRdWrLdStDepVALUWaitStates = 1;
2351 const int MaxWaitStates = 2;
2353 int WaitStatesNeededForUse = AccVgprReadLdStWaitStates -
2354 getWaitStatesSinceDef(Reg, IsAccVgprReadFn, MaxWaitStates);
2355 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2357 if (WaitStatesNeeded == MaxWaitStates)
2358 return WaitStatesNeeded;
2361 if (
MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64 &&
2362 MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
2367 return getWaitStatesSinceDef(Reg, IsVALUFn, 2 ) <
2368 std::numeric_limits<int>::max();
2371 WaitStatesNeededForUse = VALUWriteAccVgprRdWrLdStDepVALUWaitStates -
2372 getWaitStatesSince(IsVALUAccVgprRdWrCheckFn, MaxWaitStates);
2373 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2376 return WaitStatesNeeded;
2393 int WaitStatesNeeded = 0;
2405 !
TRI.regsOverlap(
MI.getOperand(0).getReg(), Reg))
2414 !
TRI.regsOverlap(
MI.getOperand(0).getReg(),
Reg))
2420 bool DGEMMAfterVALUWrite =
false;
2421 auto IsDGEMMHazard = [&DGEMMAfterVALUWrite,
this](
const MachineInstr &
MI) {
2424 DGEMMAfterVALUWrite =
true;
2428 if (!
TII.isVALU(
MI) || !DGEMMAfterVALUWrite)
2435 AMDGPU::OpName::src2);
2437 if (IsMemOrExport || IsVALU) {
2438 const int SMFMA4x4WriteVgprVALUMemExpReadWaitStates = 5;
2439 const int SMFMA16x16WriteVgprVALUMemExpReadWaitStates = 11;
2440 const int SMFMA32x32WriteVgprVALUMemExpReadWaitStates = 19;
2441 const int GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates = 4;
2442 const int GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates = 6;
2443 const int GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates = 10;
2444 const int GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates = 18;
2445 const int GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates = 5;
2446 const int GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates = 7;
2447 const int GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates = 11;
2448 const int GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates = 19;
2449 const int DMFMA4x4WriteVgprMemExpReadWaitStates = 9;
2450 const int DMFMA16x16WriteVgprMemExpReadWaitStates = 18;
2451 const int DMFMA4x4WriteVgprVALUReadWaitStates = 6;
2452 const int DMFMA16x16WriteVgprVALUReadWaitStates = 11;
2453 const int DotWriteSameDotReadSrcAB = 3;
2454 const int DotWriteDifferentVALURead = 3;
2455 const int DMFMABetweenVALUWriteVMEMRead = 2;
2456 const int MaxWaitStates = 19;
2464 int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDotWriteFn,
2467 int NeedWaitStates = 0;
2468 if (
DOT->getOpcode() ==
MI->getOpcode()) {
2469 if (&
Use - &
MI->getOperand(0) != SrcCIdx)
2470 NeedWaitStates = DotWriteSameDotReadSrcAB;
2472 NeedWaitStates = DotWriteDifferentVALURead;
2475 int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
2476 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2484 DGEMMAfterVALUWrite =
false;
2485 if (
TRI.isVectorRegister(
MRI, Reg)) {
2486 int WaitStatesNeededForUse =
2487 DMFMABetweenVALUWriteVMEMRead -
2488 getWaitStatesSinceDef(Reg, IsDGEMMHazard,
2489 DMFMABetweenVALUWriteVMEMRead);
2491 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2496 WaitStatesSinceDef =
2497 getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates);
2501 unsigned HazardDefLatency = TSchedModel.computeInstrLatency(MFMA);
2502 int NeedWaitStates = MaxWaitStates;
2503 switch (HazardDefLatency) {
2508 ? GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates
2509 : GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates
2510 : SMFMA4x4WriteVgprVALUMemExpReadWaitStates;
2516 ? IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates
2517 : DMFMA4x4WriteVgprVALUReadWaitStates
2519 ? GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates
2520 : GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates;
2526 ? GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates
2527 : GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates
2528 : SMFMA16x16WriteVgprVALUMemExpReadWaitStates;
2530 case 16: [[fallthrough]];
2534 ? IsMemOrExport ? DMFMA16x16WriteVgprMemExpReadWaitStates
2535 : DMFMA16x16WriteVgprVALUReadWaitStates
2538 ? GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates
2539 : GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates
2540 : SMFMA32x32WriteVgprVALUMemExpReadWaitStates;
2544 int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
2545 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2547 if (WaitStatesNeeded == MaxWaitStates)
2552 unsigned Opc =
MI->getOpcode();
2553 const int DMFMAToFMA64WaitStates = 2;
2554 if ((Opc == AMDGPU::V_FMA_F64_e64 ||
2555 Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64 ||
2556 Opc == AMDGPU::V_FMAC_F64_dpp) &&
2557 WaitStatesNeeded < DMFMAToFMA64WaitStates) {
2558 int WaitStatesNeededForUse = DMFMAToFMA64WaitStates -
2559 getWaitStatesSince(IsDGEMMFn, DMFMAToFMA64WaitStates);
2560 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2563 if (!IsVALU && !IsMemOrExport)
2564 return WaitStatesNeeded;
2567 const int SMFMA4x4WriteVgprVALUWawWaitStates = 5;
2568 const int SMFMA16x16WriteVgprVALUWawWaitStates = 11;
2569 const int SMFMA32x32WriteVgprVALUWawWaitStates = 19;
2570 const int GFX940_SMFMA2PassWriteVgprVALUWawWaitStates = 4;
2571 const int GFX940_SMFMA4PassWriteVgprVALUWawWaitStates = 6;
2572 const int GFX940_SMFMA8PassWriteVgprVALUWawWaitStates = 10;
2573 const int GFX940_SMFMA16PassWriteVgprVALUWawWaitStates = 18;
2574 const int GFX940_XDL2PassWriteVgprVALUWawWaitStates = 5;
2575 const int GFX940_XDL4PassWriteVgprVALUWawWaitStates = 7;
2576 const int GFX940_XDL8PassWriteVgprVALUWawWaitStates = 11;
2577 const int GFX940_XDL16PassWriteVgprVALUWawWaitStates = 19;
2578 const int SMFMA4x4ReadVgprVALUWarWaitStates = 1;
2579 const int GFX940_XDL4PassReadVgprVALUWarWaitStates = 3;
2580 const int SMFMA16x16ReadVgprVALUWarWaitStates = 7;
2581 const int SMFMA32x32ReadVgprVALUWarWaitStates = 15;
2582 const int DMFMA4x4WriteVgprVALUWriteWaitStates = 6;
2583 const int DMFMA16x16WriteVgprVALUWriteWaitStates = 11;
2584 const int DotWriteDifferentVALUWrite = 3;
2585 const int MaxWaitStates = 19;
2586 const int MaxWarWaitStates = 15;
2591 int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDotWriteFn,
2593 if (DOT &&
DOT->getOpcode() !=
MI->getOpcode())
2594 WaitStatesNeeded = std::max(WaitStatesNeeded, DotWriteDifferentVALUWrite -
2595 WaitStatesSinceDef);
2598 WaitStatesSinceDef =
2599 getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates);
2601 int NeedWaitStates = MaxWaitStates;
2602 switch (TSchedModel.computeInstrLatency(MFMA)) {
2606 ? GFX940_XDL2PassWriteVgprVALUWawWaitStates
2607 : GFX940_SMFMA2PassWriteVgprVALUWawWaitStates
2608 : SMFMA4x4WriteVgprVALUWawWaitStates;
2612 NeedWaitStates =
isDGEMM(MFMA->getOpcode())
2613 ? DMFMA4x4WriteVgprVALUWriteWaitStates
2615 ? GFX940_XDL4PassWriteVgprVALUWawWaitStates
2616 : GFX940_SMFMA4PassWriteVgprVALUWawWaitStates;
2621 ? GFX940_XDL8PassWriteVgprVALUWawWaitStates
2622 : GFX940_SMFMA8PassWriteVgprVALUWawWaitStates
2623 : SMFMA16x16WriteVgprVALUWawWaitStates;
2625 case 16: [[fallthrough]];
2627 NeedWaitStates =
isDGEMM(MFMA->getOpcode())
2628 ? DMFMA16x16WriteVgprVALUWriteWaitStates
2631 ? GFX940_XDL16PassWriteVgprVALUWawWaitStates
2632 : GFX940_SMFMA16PassWriteVgprVALUWawWaitStates
2633 : SMFMA32x32WriteVgprVALUWawWaitStates;
2637 int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
2638 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2640 if (WaitStatesNeeded == MaxWaitStates)
2646 !
MI.readsRegister(Reg, &TRI))
2653 TII.getNamedOperand(
MI, AMDGPU::OpName::src2);
2663 int WaitStatesSinceUse = getWaitStatesSince(IsSMFMAReadAsCFn,
2668 unsigned HazardDefLatency = TSchedModel.computeInstrLatency(MFMA);
2669 int NeedWaitStates = MaxWaitStates;
2670 switch (HazardDefLatency) {
2671 case 2: NeedWaitStates = SMFMA4x4ReadVgprVALUWarWaitStates;
2674 NeedWaitStates = GFX940_XDL4PassReadVgprVALUWarWaitStates;
2676 case 8: NeedWaitStates = SMFMA16x16ReadVgprVALUWarWaitStates;
2678 case 16: [[fallthrough]];
2679 default: NeedWaitStates = SMFMA32x32ReadVgprVALUWarWaitStates;
2683 int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceUse;
2684 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
2687 return WaitStatesNeeded;
2700 return MAI !=
nullptr;
2704 if (IsMFMAFn(*
MI)) {
2705 int W = getWaitStatesSince(IsMFMAFn, 16);
2707 return W < (int)TSchedModel.computeInstrLatency(MAI);
2713bool GCNHazardRecognizer::fixVALUMaskWriteHazard(
MachineInstr *
MI) {
2730 if (!SDSTOp || !SDSTOp->
isReg())
2734 if (HazardReg == AMDGPU::EXEC ||
2735 HazardReg == AMDGPU::EXEC_LO ||
2736 HazardReg == AMDGPU::EXEC_HI ||
2737 HazardReg == AMDGPU::M0)
2741 switch (
I.getOpcode()) {
2742 case AMDGPU::V_ADDC_U32_e32:
2743 case AMDGPU::V_ADDC_U32_dpp:
2744 case AMDGPU::V_CNDMASK_B16_e32:
2745 case AMDGPU::V_CNDMASK_B16_dpp:
2746 case AMDGPU::V_CNDMASK_B32_e32:
2747 case AMDGPU::V_CNDMASK_B32_dpp:
2748 case AMDGPU::V_DIV_FMAS_F32_e64:
2749 case AMDGPU::V_DIV_FMAS_F64_e64:
2750 case AMDGPU::V_SUBB_U32_e32:
2751 case AMDGPU::V_SUBB_U32_dpp:
2752 case AMDGPU::V_SUBBREV_U32_e32:
2753 case AMDGPU::V_SUBBREV_U32_dpp:
2755 return HazardReg == AMDGPU::VCC ||
2756 HazardReg == AMDGPU::VCC_LO ||
2757 HazardReg == AMDGPU::VCC_HI;
2758 case AMDGPU::V_ADDC_U32_e64:
2759 case AMDGPU::V_ADDC_U32_e64_dpp:
2760 case AMDGPU::V_CNDMASK_B16_e64:
2761 case AMDGPU::V_CNDMASK_B16_e64_dpp:
2762 case AMDGPU::V_CNDMASK_B32_e64:
2763 case AMDGPU::V_CNDMASK_B32_e64_dpp:
2764 case AMDGPU::V_SUBB_U32_e64:
2765 case AMDGPU::V_SUBB_U32_e64_dpp:
2766 case AMDGPU::V_SUBBREV_U32_e64:
2767 case AMDGPU::V_SUBBREV_U32_e64_dpp: {
2771 return TRI.regsOverlap(SSRCOp->
getReg(), HazardReg);
2781 if (
I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
2790 for (
int OpNo = 0,
End =
I.getNumOperands(); OpNo <
End; ++OpNo) {
2798 if (OpReg == AMDGPU::EXEC ||
2799 OpReg == AMDGPU::EXEC_LO ||
2800 OpReg == AMDGPU::EXEC_HI)
2803 if (
Op.isImplicit()) {
2804 if (OpReg == AMDGPU::VCC ||
2805 OpReg == AMDGPU::VCC_LO ||
2806 OpReg == AMDGPU::VCC_HI)
2810 if (
TRI.isSGPRReg(
MRI, OpReg))
2815 if (!
TII.isInlineConstant(
Op, OpInfo))
2824 std::numeric_limits<int>::max())
2827 auto NextMI = std::next(
MI->getIterator());
2830 BuildMI(*
MI->getParent(), NextMI,
MI->getDebugLoc(),
2831 TII.get(AMDGPU::S_WAITCNT_DEPCTR))
2835 if (
MI->getOpcode() == AMDGPU::S_GETPC_B64) {
2837 while (NextMI !=
MI->getParent()->end() &&
2838 NextMI->isBundledWithPred()) {
2839 for (
auto &Operand : NextMI->operands()) {
2840 if (Operand.isGlobal())
2841 Operand.setOffset(Operand.getOffset() + 4);
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
static cl::opt< unsigned, false, MFMAPaddingRatioParser > MFMAPaddingRatio("amdgpu-mfma-padding-ratio", cl::init(0), cl::Hidden, cl::desc("Fill a percentage of the latency between " "neighboring MFMA with s_nops."))
static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF, const GCNSubtarget &ST)
static bool isSGetReg(unsigned Opcode)
static bool breaksSMEMSoftClause(MachineInstr *MI)
static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI)
static bool isLdsDma(const MachineInstr &MI)
static bool isRFE(unsigned Opcode)
static bool isRWLane(unsigned Opcode)
static bool isSMovRel(unsigned Opcode)
static bool isStoreCountWaitZero(const MachineInstr &I)
static bool breaksVMEMSoftClause(MachineInstr *MI)
static bool isSSetReg(unsigned Opcode)
static bool hasHazard(StateT State, function_ref< HazardFnResult(StateT &, const MachineInstr &)> IsHazard, function_ref< void(StateT &, const MachineInstr &)> UpdateState, const MachineBasicBlock *MBB, MachineBasicBlock::const_reverse_instr_iterator I, DenseSet< const MachineBasicBlock * > &Visited)
static void addRegUnits(const SIRegisterInfo &TRI, BitVector &BV, MCRegister Reg)
static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr)
static bool isDivFMas(unsigned Opcode)
function_ref< unsigned int(const MachineInstr &)> GetNumWaitStatesFn
static bool isDGEMM(unsigned Opcode)
static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, const MachineBasicBlock *MBB, MachineBasicBlock::const_reverse_instr_iterator I, int WaitStates, IsExpiredFn IsExpired, DenseSet< const MachineBasicBlock * > &Visited, GetNumWaitStatesFn GetNumWaitStates=SIInstrInfo::getNumWaitStates)
static void addRegsToSet(const SIRegisterInfo &TRI, iterator_range< MachineInstr::const_mop_iterator > Ops, BitVector &DefSet, BitVector &UseSet)
static void insertNoopsInBundle(MachineInstr *MI, const SIInstrInfo &TII, unsigned Quantity)
static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII, const MachineInstr &MI)
function_ref< bool(const MachineInstr &, int WaitStates)> IsExpiredFn
static bool isPermlane(const MachineInstr &MI)
AMD GCN specific subclass of TargetSubtarget.
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const uint32_t IV[8]
bool anyCommon(const BitVector &RHS) const
Test if any common bits are set.
bool none() const
none - Returns true if none of the bits are set.
This class represents an Operation in the Expression.
Implements a dense probed hash-table based set.
void EmitNoop() override
EmitNoop - This callback is invoked when a noop was added to the instruction stream.
void Reset() override
Reset - This callback is invoked when a new block of instructions is about to be schedule.
unsigned PreEmitNoops(MachineInstr *) override
This overload will be used when the hazard recognizer is being used by a non-scheduling pass,...
void EmitInstruction(SUnit *SU) override
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
function_ref< bool(const MachineInstr &)> IsHazardFn
void AdvanceCycle() override
AdvanceCycle - This callback is invoked whenever the next top-down instruction to be scheduled cannot...
unsigned PreEmitNoopsCommon(MachineInstr *)
bool ShouldPreferAnother(SUnit *SU) override
ShouldPreferAnother - This callback may be invoked if getHazardType returns NoHazard.
HazardType getHazardType(SUnit *SU, int Stalls) override
getHazardType - Return the hazard type of emitting this node.
GCNHazardRecognizer(const MachineFunction &MF)
void RecedeCycle() override
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
bool hasShift64HighRegBug() const
bool hasFPAtomicToDenormModeHazard() const
bool hasLdsBranchVmemWARHazard() const
bool hasGFX90AInsts() const
bool hasDstSelForwardingHazard() const
const SIInstrInfo * getInstrInfo() const override
bool hasVALUMaskWriteHazard() const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasVcmpxExecWARHazard() const
bool hasReadM0MovRelInterpHazard() const
const SIRegisterInfo * getRegisterInfo() const override
bool hasVcmpxPermlaneHazard() const
bool has12DWordStoreHazard() const
bool hasVALUPartialForwardingHazard() const
bool hasNoDataDepHazard() const
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
bool hasTransForwardingHazard() const
bool hasGFX940Insts() const
bool hasReadM0LdsDmaHazard() const
bool hasSMEMtoVectorWriteHazard() const
bool hasVMEMtoScalarWriteHazard() const
bool hasNSAtoVMEMBug() const
bool hasVDecCoExecHazard() const
bool hasReadM0SendMsgHazard() const
bool hasReadM0LdsDirectHazard() const
bool isXNACKEnabled() const
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
bool hasRFEHazards() const
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
bool hasVALUTransUseHazard() const
bool hasLdsDirect() const
Describe properties that are true of each instruction in the target description file.
ArrayRef< MCOperandInfo > operands() const
This holds information about one operand of a machine instruction, indicating the register class for ...
Wrapper class representing physical registers. Should be passed by value.
reverse_instr_iterator instr_rend()
instr_iterator instr_end()
iterator_range< pred_iterator > predecessors()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
Wrapper class representing virtual and physical registers.
static bool isMAI(const MachineInstr &MI)
static bool isDS(const MachineInstr &MI)
static bool isVMEM(const MachineInstr &MI)
static bool isSMRD(const MachineInstr &MI)
static bool isMTBUF(const MachineInstr &MI)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
static bool isSDWA(const MachineInstr &MI)
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isVINTRP(const MachineInstr &MI)
static bool isDOT(const MachineInstr &MI)
static bool isLDSDIR(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isTRANS(const MachineInstr &MI)
static bool isMUBUF(const MachineInstr &MI)
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
static bool isDPP(const MachineInstr &MI)
static bool isMFMA(const MachineInstr &MI)
static bool isFPAtomic(const MachineInstr &MI)
static bool isMIMG(const MachineInstr &MI)
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
static bool isWMMA(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getOccupancy() const
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
unsigned getMaxLookAhead() const
unsigned MaxLookAhead
MaxLookAhead - Indicate the number of cycles in the scoreboard state.
virtual void EmitNoops(unsigned Quantity)
EmitNoops - This callback is invoked when noops were added to the instruction stream.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
StringRef - Represent a constant reference to a string, i.e.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
const MCSchedClassDesc * resolveSchedClass(const MachineInstr *MI) const
Return the MCSchedClassDesc for this instruction.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
A Use represents the edge between a Value definition and its users.
unsigned getOperandNo() const
Return the operand # of this use in its User.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
An efficient, type-erasing, non-owning reference to a callable.
Iterator for intrusive lists based on ilist_node.
self_iterator getIterator()
A range adaptor for a pair of iterators.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
unsigned decodeFieldSaSdst(unsigned Encoded)
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
unsigned decodeFieldVaVdst(unsigned Encoded)
unsigned decodeFieldVmVsrc(unsigned Encoded)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
IsaVersion getIsaVersion(StringRef GPU)
bool getMAIIsGFX940XDL(unsigned Opc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Instruction set architecture version.
Represents the counter values to wait for in an s_waitcnt instruction.
Description of the encoding of one expression Op.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
uint16_t ReleaseAtCycle
Cycle at which the resource will be released by an instruction, relatively to the cycle in which the ...