38#define DEBUG_TYPE "machine-scheduler"
43 "amdgpu-disable-unclustered-high-rp-reschedule",
cl::Hidden,
44 cl::desc(
"Disable unclustered high register pressure "
45 "reduction scheduling stage."),
49 "amdgpu-disable-clustered-low-occupancy-reschedule",
cl::Hidden,
50 cl::desc(
"Disable clustered low occupancy "
51 "rescheduling for ILP scheduling stage."),
57 "Sets the bias which adds weight to occupancy vs latency. Set it to "
58 "100 to chase the occupancy only."),
63 cl::desc(
"Relax occupancy targets for kernels which are memory "
64 "bound (amdgpu-membound-threshold), or "
65 "Wave Limited (amdgpu-limit-wave-threshold)."),
70 cl::desc(
"Use the AMDGPU specific RPTrackers during scheduling"),
74 "amdgpu-scheduler-pending-queue-limit",
cl::Hidden,
76 "Max (Available+Pending) size to inspect pending queue (0 disables)"),
79#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
80#define DUMP_MAX_REG_PRESSURE
82 "amdgpu-print-max-reg-pressure-regusage-before-scheduler",
cl::Hidden,
83 cl::desc(
"Print a list of live registers along with their def/uses at the "
84 "point of maximum register pressure before scheduling."),
88 "amdgpu-print-max-reg-pressure-regusage-after-scheduler",
cl::Hidden,
89 cl::desc(
"Print a list of live registers along with their def/uses at the "
90 "point of maximum register pressure after scheduling."),
95 "amdgpu-disable-rewrite-mfma-form-sched-stage",
cl::Hidden,
113 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
115 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
137 "VGPRCriticalLimit calculation method.\n");
141 unsigned Addressable =
144 VGPRBudget = std::max(VGPRBudget, Granule);
182 if (!
Op.isReg() ||
Op.isImplicit())
184 if (
Op.getReg().isPhysical() ||
185 (
Op.isDef() &&
Op.getSubReg() != AMDGPU::NoSubRegister))
193 std::vector<unsigned> &Pressure, std::vector<unsigned> &MaxPressure,
208 Pressure.resize(4, 0);
219 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = NewPressure.
getSGPRNum();
220 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
222 Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = NewPressure.
getAGPRNum();
229 unsigned SGPRPressure,
230 unsigned VGPRPressure,
bool IsBottomUp) {
234 if (!
DAG->isTrackingPressure())
257 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
258 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
260 for (
const auto &Diff :
DAG->getPressureDiff(SU)) {
266 (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());
269#ifdef EXPENSIVE_CHECKS
270 std::vector<unsigned> CheckPressure, CheckMaxPressure;
273 if (
Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
274 CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
275 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
276 CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
277 errs() <<
"Register Pressure is inaccurate when calculated through "
279 <<
"SGPR got " <<
Pressure[AMDGPU::RegisterPressureSets::SReg_32]
281 << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
"\n"
282 <<
"VGPR got " <<
Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
284 << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
"\n";
290 unsigned NewSGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
291 unsigned NewVGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
301 const unsigned MaxVGPRPressureInc = 16;
302 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >=
VGPRExcessLimit;
303 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >=
SGPRExcessLimit;
334 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
336 if (SGPRDelta > VGPRDelta) {
350 bool HasBufferedModel =
369 dbgs() <<
"Prefer:\t\t";
370 DAG->dumpNode(*Preferred.
SU);
374 DAG->dumpNode(*Current.
SU);
377 dbgs() <<
"Reason:\t\t";
391 unsigned SGPRPressure = 0;
392 unsigned VGPRPressure = 0;
394 if (
DAG->isTrackingPressure()) {
396 SGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
397 VGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
402 SGPRPressure =
T->getPressure().getSGPRNum();
403 VGPRPressure =
T->getPressure().getArchVGPRNum();
408 for (
SUnit *SU : AQ) {
412 VGPRPressure, IsBottomUp);
432 for (
SUnit *SU : PQ) {
436 VGPRPressure, IsBottomUp);
456 bool &PickedPending) {
476 bool BotPending =
false;
496 "Last pick result should correspond to re-picking right now");
501 bool TopPending =
false;
521 "Last pick result should correspond to re-picking right now");
531 PickedPending = BotPending && TopPending;
534 if (BotPending || TopPending) {
541 Cand.setBest(TryCand);
546 IsTopNode = Cand.AtTop;
553 if (
DAG->top() ==
DAG->bottom()) {
555 Bot.Available.empty() &&
Bot.Pending.empty() &&
"ReadyQ garbage");
561 PickedPending =
false;
595 if (ReadyCycle > CurrentCycle)
667 if (
DAG->isTrackingPressure() &&
673 if (
DAG->isTrackingPressure() &&
678 bool SameBoundary = Zone !=
nullptr;
720 if (
DAG->isTrackingPressure() &&
730 bool SameBoundary = Zone !=
nullptr;
765 bool CandIsClusterSucc =
767 bool TryCandIsClusterSucc =
769 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
774 if (
DAG->isTrackingPressure() &&
780 if (
DAG->isTrackingPressure() &&
826 if (
DAG->isTrackingPressure()) {
842 bool CandIsClusterSucc =
844 bool TryCandIsClusterSucc =
846 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
855 bool SameBoundary = Zone !=
nullptr;
872 if (TryMayLoad || CandMayLoad) {
873 bool TryLongLatency =
875 bool CandLongLatency =
879 Zone->
isTop() ? CandLongLatency : TryLongLatency, TryCand,
897 if (
DAG->isTrackingPressure() &&
916 !
Rem.IsAcyclicLatencyLimited &&
tryLatency(TryCand, Cand, *Zone))
934 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy),
935 RegionLiveOuts(this,
true) {
941 LLVM_DEBUG(
dbgs() <<
"Starting occupancy is " << StartingOccupancy <<
".\n");
943 MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy);
944 if (MinOccupancy != StartingOccupancy)
945 LLVM_DEBUG(
dbgs() <<
"Allowing Occupancy drops to " << MinOccupancy
950std::unique_ptr<GCNSchedStage>
952 switch (SchedStageID) {
954 return std::make_unique<OccInitialScheduleStage>(SchedStageID, *
this);
956 return std::make_unique<RewriteMFMAFormStage>(SchedStageID, *
this);
958 return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *
this);
960 return std::make_unique<ClusteredLowOccStage>(SchedStageID, *
this);
962 return std::make_unique<PreRARematStage>(SchedStageID, *
this);
964 return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *
this);
966 return std::make_unique<MemoryClauseInitialScheduleStage>(SchedStageID,
980GCNScheduleDAGMILive::getRealRegPressure(
unsigned RegionIdx)
const {
983 &LiveIns[RegionIdx]);
989 assert(RegionBegin != RegionEnd &&
"Region must not be empty");
993void GCNScheduleDAGMILive::computeBlockPressure(
unsigned RegionIdx,
1005 const MachineBasicBlock *OnlySucc =
nullptr;
1008 if (!Candidate->empty() && Candidate->pred_size() == 1) {
1009 SlotIndexes *Ind =
LIS->getSlotIndexes();
1011 OnlySucc = Candidate;
1016 size_t CurRegion = RegionIdx;
1017 for (
size_t E = Regions.size(); CurRegion !=
E; ++CurRegion)
1018 if (Regions[CurRegion].first->getParent() !=
MBB)
1023 auto LiveInIt = MBBLiveIns.find(
MBB);
1024 auto &Rgn = Regions[CurRegion];
1026 if (LiveInIt != MBBLiveIns.end()) {
1027 auto LiveIn = std::move(LiveInIt->second);
1029 MBBLiveIns.erase(LiveInIt);
1032 auto LRS = BBLiveInMap.lookup(NonDbgMI);
1033#ifdef EXPENSIVE_CHECKS
1042 if (Regions[CurRegion].first ==
I || NonDbgMI ==
I) {
1043 LiveIns[CurRegion] =
RPTracker.getLiveRegs();
1047 if (Regions[CurRegion].second ==
I) {
1048 Pressure[CurRegion] =
RPTracker.moveMaxPressure();
1049 if (CurRegion-- == RegionIdx)
1051 auto &Rgn = Regions[CurRegion];
1064 MBBLiveIns[OnlySucc] =
RPTracker.moveLiveRegs();
1069GCNScheduleDAGMILive::getRegionLiveInMap()
const {
1070 assert(!Regions.empty());
1071 std::vector<MachineInstr *> RegionFirstMIs;
1072 RegionFirstMIs.reserve(Regions.size());
1074 RegionFirstMIs.push_back(
1081GCNScheduleDAGMILive::getRegionLiveOutMap()
const {
1082 assert(!Regions.empty());
1083 std::vector<MachineInstr *> RegionLastMIs;
1084 RegionLastMIs.reserve(Regions.size());
1095 IdxToInstruction.clear();
1098 IsLiveOut ? DAG->getRegionLiveOutMap() : DAG->getRegionLiveInMap();
1099 for (
unsigned I = 0;
I < DAG->Regions.size();
I++) {
1100 auto &[RegionBegin, RegionEnd] = DAG->Regions[
I];
1102 if (RegionBegin == RegionEnd)
1106 IdxToInstruction[
I] = RegionKey;
1114 LiveIns.resize(Regions.size());
1115 Pressure.resize(Regions.size());
1116 RegionsWithHighRP.resize(Regions.size());
1117 RegionsWithExcessRP.resize(Regions.size());
1118 RegionsWithIGLPInstrs.resize(Regions.size());
1119 RegionsWithHighRP.reset();
1120 RegionsWithExcessRP.reset();
1121 RegionsWithIGLPInstrs.reset();
1126void GCNScheduleDAGMILive::runSchedStages() {
1127 LLVM_DEBUG(
dbgs() <<
"All regions recorded, starting actual scheduling.\n");
1129 if (!Regions.
empty()) {
1130 BBLiveInMap = getRegionLiveInMap();
1135#ifdef DUMP_MAX_REG_PRESSURE
1143 GCNSchedStrategy &S =
static_cast<GCNSchedStrategy &
>(*SchedImpl);
1146 if (!Stage->initGCNSchedStage())
1149 for (
auto Region : Regions) {
1153 if (!Stage->initGCNRegion()) {
1154 Stage->advanceRegion();
1163 &LiveIns[Stage->getRegionIdx()];
1165 reinterpret_cast<GCNRPTracker *
>(DownwardTracker)
1166 ->reset(
MRI, *RegionLiveIns);
1167 reinterpret_cast<GCNRPTracker *
>(UpwardTracker)
1168 ->reset(
MRI, RegionLiveOuts.getLiveRegsForRegionIdx(
1169 Stage->getRegionIdx()));
1173 Stage->finalizeGCNRegion();
1174 Stage->advanceRegion();
1178 Stage->finalizeGCNSchedStage();
1181#ifdef DUMP_MAX_REG_PRESSURE
1194 OS <<
"Max Occupancy Initial Schedule";
1197 OS <<
"Instruction Rewriting Reschedule";
1200 OS <<
"Unclustered High Register Pressure Reschedule";
1203 OS <<
"Clustered Low Occupancy Reschedule";
1206 OS <<
"Pre-RA Rematerialize";
1209 OS <<
"Max ILP Initial Schedule";
1212 OS <<
"Max memory clause Initial Schedule";
1232void RewriteMFMAFormStage::findReachingDefs(
1246 SmallVector<MachineBasicBlock *, 8> Worklist;
1254 while (!Worklist.
empty()) {
1269 for (MachineBasicBlock *PredMBB : DefMBB->
predecessors()) {
1270 if (Visited.
insert(PredMBB).second)
1276void RewriteMFMAFormStage::findReachingUses(
1280 for (MachineOperand &UseMO :
1283 findReachingDefs(UseMO, LIS, ReachingDefIndexes);
1287 if (
any_of(ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) {
1299 if (!
ST.hasGFX90AInsts() ||
MFI.getMinWavesPerEU() > 1)
1302 RegionsWithExcessArchVGPR.resize(
DAG.Regions.size());
1303 RegionsWithExcessArchVGPR.reset();
1307 RegionsWithExcessArchVGPR[
Region] =
true;
1310 if (RegionsWithExcessArchVGPR.none())
1313 TII =
ST.getInstrInfo();
1314 SRI =
ST.getRegisterInfo();
1316 std::vector<std::pair<MachineInstr *, unsigned>> RewriteCands;
1320 if (!initHeuristics(RewriteCands, CopyForUse, CopyForDef))
1323 int64_t
Cost = getRewriteCost(RewriteCands, CopyForUse, CopyForDef);
1330 return rewrite(RewriteCands);
1340 if (
DAG.RegionsWithHighRP.none() &&
DAG.RegionsWithExcessRP.none())
1347 InitialOccupancy =
DAG.MinOccupancy;
1350 TempTargetOccupancy =
MFI.getMaxWavesPerEU() >
DAG.MinOccupancy
1351 ? InitialOccupancy + 1
1353 IsAnyRegionScheduled =
false;
1354 S.SGPRLimitBias =
S.HighRPSGPRBias;
1355 S.VGPRLimitBias =
S.HighRPVGPRBias;
1359 <<
"Retrying function scheduling without clustering. "
1360 "Aggressively try to reduce register pressure to achieve occupancy "
1361 << TempTargetOccupancy <<
".\n");
1376 if (
DAG.StartingOccupancy <=
DAG.MinOccupancy)
1380 dbgs() <<
"Retrying function scheduling with lowest recorded occupancy "
1381 <<
DAG.MinOccupancy <<
".\n");
1386#define REMAT_PREFIX "[PreRARemat] "
1387#define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)
1400 const unsigned NumRegions =
DAG.Regions.size();
1401 RegionBB.reserve(NumRegions);
1402 for (
unsigned I = 0;
I < NumRegions; ++
I) {
1405 MIRegion.insert({&*
MI,
I});
1409 if (!canIncreaseOccupancyOrReduceSpill())
1415 DAG.RegionLiveOuts.buildLiveRegMap();
1417 dbgs() <<
"Retrying function scheduling with new min. occupancy of "
1418 << AchievedOcc <<
" from rematerializing (original was "
1419 <<
DAG.MinOccupancy;
1421 dbgs() <<
", target was " << *TargetOcc;
1425 if (AchievedOcc >
DAG.MinOccupancy) {
1426 DAG.MinOccupancy = AchievedOcc;
1428 MFI.increaseOccupancy(
MF,
DAG.MinOccupancy);
1440 S.SGPRLimitBias =
S.VGPRLimitBias = 0;
1441 if (
DAG.MinOccupancy > InitialOccupancy) {
1442 assert(IsAnyRegionScheduled);
1444 <<
" stage successfully increased occupancy to "
1445 <<
DAG.MinOccupancy <<
'\n');
1446 }
else if (!IsAnyRegionScheduled) {
1447 assert(
DAG.MinOccupancy == InitialOccupancy);
1449 <<
": No regions scheduled, min occupancy stays at "
1450 <<
DAG.MinOccupancy <<
", MFI occupancy stays at "
1451 <<
MFI.getOccupancy() <<
".\n");
1462 unsigned NumRegionInstrs = std::distance(
DAG.begin(),
DAG.end());
1466 if (
DAG.begin() ==
DAG.end() ||
DAG.begin() == std::prev(
DAG.end()))
1472 <<
"\n From: " << *
DAG.begin() <<
" To: ";
1474 else dbgs() <<
"End";
1475 dbgs() <<
" RegionInstrs: " << NumRegionInstrs <<
'\n');
1483 for (
auto &
I :
DAG) {
1496 dbgs() <<
"Pressure before scheduling:\nRegion live-ins:"
1498 <<
"Region live-in pressure: "
1502 S.HasHighPressure =
false;
1524 unsigned DynamicVGPRBlockSize =
DAG.MFI.getDynamicVGPRBlockSize();
1527 unsigned CurrentTargetOccupancy =
1528 IsAnyRegionScheduled ?
DAG.MinOccupancy : TempTargetOccupancy;
1530 (CurrentTargetOccupancy <= InitialOccupancy ||
1531 DAG.Pressure[
RegionIdx].getOccupancy(
ST, DynamicVGPRBlockSize) !=
1538 if (!IsAnyRegionScheduled && IsSchedulingThisRegion) {
1539 IsAnyRegionScheduled =
true;
1540 if (
MFI.getMaxWavesPerEU() >
DAG.MinOccupancy) {
1541 DAG.MinOccupancy = TempTargetOccupancy;
1542 MFI.increaseOccupancy(
MF, TempTargetOccupancy);
1545 return IsSchedulingThisRegion;
1580 if (
S.HasHighPressure)
1599 unsigned DynamicVGPRBlockSize =
DAG.MFI.getDynamicVGPRBlockSize();
1610 unsigned TargetOccupancy = std::min(
1611 S.getTargetOccupancy(),
ST.getOccupancyWithWorkGroupSizes(
MF).second);
1612 unsigned WavesAfter = std::min(
1613 TargetOccupancy,
PressureAfter.getOccupancy(
ST, DynamicVGPRBlockSize));
1614 unsigned WavesBefore = std::min(
1616 LLVM_DEBUG(
dbgs() <<
"Occupancy before scheduling: " << WavesBefore
1617 <<
", after " << WavesAfter <<
".\n");
1623 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
1627 if (WavesAfter < WavesBefore && WavesAfter <
DAG.MinOccupancy &&
1628 WavesAfter >=
MFI.getMinAllowedOccupancy()) {
1629 LLVM_DEBUG(
dbgs() <<
"Function is memory bound, allow occupancy drop up to "
1630 <<
MFI.getMinAllowedOccupancy() <<
" waves\n");
1631 NewOccupancy = WavesAfter;
1634 if (NewOccupancy <
DAG.MinOccupancy) {
1635 DAG.MinOccupancy = NewOccupancy;
1636 MFI.limitOccupancy(
DAG.MinOccupancy);
1638 <<
DAG.MinOccupancy <<
".\n");
1642 unsigned MaxVGPRs =
ST.getMaxNumVGPRs(
MF);
1645 unsigned MaxArchVGPRs = std::min(MaxVGPRs,
ST.getAddressableNumArchVGPRs());
1646 unsigned MaxSGPRs =
ST.getMaxNumSGPRs(
MF);
1670 unsigned ReadyCycle = CurrCycle;
1671 for (
auto &
D : SU.
Preds) {
1672 if (
D.isAssignedRegDep()) {
1675 unsigned DefReady = ReadyCycles[
DAG.getSUnit(
DefMI)->NodeNum];
1676 ReadyCycle = std::max(ReadyCycle, DefReady +
Latency);
1679 ReadyCycles[SU.
NodeNum] = ReadyCycle;
1686 std::pair<MachineInstr *, unsigned>
B)
const {
1687 return A.second <
B.second;
1693 if (ReadyCycles.empty())
1695 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
1696 dbgs() <<
"\n################## Schedule time ReadyCycles for MBB : " << BBNum
1697 <<
" ##################\n# Cycle #\t\t\tInstruction "
1701 for (
auto &
I : ReadyCycles) {
1702 if (
I.second > IPrev + 1)
1703 dbgs() <<
"****************************** BUBBLE OF " <<
I.second - IPrev
1704 <<
" CYCLES DETECTED ******************************\n\n";
1705 dbgs() <<
"[ " <<
I.second <<
" ] : " << *
I.first <<
"\n";
1718 unsigned SumBubbles = 0;
1720 unsigned CurrCycle = 0;
1721 for (
auto &SU : InputSchedule) {
1722 unsigned ReadyCycle =
1724 SumBubbles += ReadyCycle - CurrCycle;
1726 ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));
1728 CurrCycle = ++ReadyCycle;
1751 unsigned SumBubbles = 0;
1753 unsigned CurrCycle = 0;
1754 for (
auto &
MI :
DAG) {
1758 unsigned ReadyCycle =
1760 SumBubbles += ReadyCycle - CurrCycle;
1762 ReadyCyclesSorted.insert(std::make_pair(SU->
getInstr(), ReadyCycle));
1764 CurrCycle = ++ReadyCycle;
1781 if (WavesAfter <
DAG.MinOccupancy)
1785 if (
DAG.MFI.isDynamicVGPREnabled()) {
1787 &
ST,
DAG.MFI.getDynamicVGPRBlockSize(),
1790 &
ST,
DAG.MFI.getDynamicVGPRBlockSize(),
1792 if (BlocksAfter > BlocksBefore)
1829 <<
"\n\t *** In shouldRevertScheduling ***\n"
1830 <<
" *********** BEFORE UnclusteredHighRPStage ***********\n");
1834 <<
"\n *********** AFTER UnclusteredHighRPStage ***********\n");
1836 unsigned OldMetric = MBefore.
getMetric();
1837 unsigned NewMetric = MAfter.
getMetric();
1838 unsigned WavesBefore = std::min(
1839 S.getTargetOccupancy(),
1846 LLVM_DEBUG(
dbgs() <<
"\tMetric before " << MBefore <<
"\tMetric after "
1847 << MAfter <<
"Profit: " << Profit <<
"\n");
1877 unsigned WavesAfter) {
1884 LLVM_DEBUG(
dbgs() <<
"New pressure will result in more spilling.\n");
1897 "instruction number mismatch");
1898 if (MIOrder.
empty())
1910 if (MII != RegionEnd) {
1912 bool NonDebugReordered =
1913 !
MI->isDebugInstr() &&
1919 if (NonDebugReordered)
1920 DAG.LIS->handleMove(*
MI,
true);
1924 if (
MI->isDebugInstr()) {
1931 Op.setIsUndef(
false);
1934 if (
DAG.ShouldTrackLaneMasks) {
1936 SlotIndex SlotIdx =
DAG.LIS->getInstructionIndex(*MI).getRegSlot();
1951bool RewriteMFMAFormStage::isRewriteCandidate(
MachineInstr *
MI)
const {
1958bool RewriteMFMAFormStage::initHeuristics(
1959 std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
1967 if (!isRewriteCandidate(&
MI))
1971 assert(ReplacementOp != -1);
1973 RewriteCands.push_back({&
MI,
MI.getOpcode()});
1974 MI.setDesc(TII->get(ReplacementOp));
1976 MachineOperand *Src2 = TII->getNamedOperand(
MI, AMDGPU::OpName::src2);
1977 if (Src2->
isReg()) {
1979 findReachingDefs(*Src2,
DAG.LIS, Src2ReachingDefs);
1983 for (SlotIndex RDIdx : Src2ReachingDefs) {
1984 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIdx);
1985 if (!TII->isMAI(*RD))
1990 MachineOperand &Dst =
MI.getOperand(0);
1993 findReachingUses(&
MI,
DAG.LIS, DstReachingUses);
1995 for (MachineOperand *RUOp : DstReachingUses) {
1996 if (TII->isMAI(*RUOp->getParent()))
2002 CopyForUse[RUOp->getParent()->getParent()].insert(RUOp->getReg());
2005 findReachingDefs(*RUOp,
DAG.LIS, DstUsesReachingDefs);
2007 for (SlotIndex RDIndex : DstUsesReachingDefs) {
2008 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2009 if (TII->isMAI(*RD))
2021 const TargetRegisterClass *VGPRRC =
DAG.MRI.getRegClass(Dst.getReg());
2022 const TargetRegisterClass *AGPRRC = SRI->getEquivalentAGPRClass(VGPRRC);
2023 DAG.MRI.setRegClass(Dst.getReg(), AGPRRC);
2025 DAG.MRI.setRegClass(Src2->
getReg(), AGPRRC);
2033int64_t RewriteMFMAFormStage::getRewriteCost(
2034 const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
2037 MachineBlockFrequencyInfo *MBFI =
DAG.MBFI;
2039 int64_t BestSpillCost = 0;
2043 std::pair<unsigned, unsigned> MaxVectorRegs =
2044 ST.getMaxNumVectorRegs(
MF.getFunction());
2045 unsigned ArchVGPRThreshold = MaxVectorRegs.first;
2046 unsigned AGPRThreshold = MaxVectorRegs.second;
2047 unsigned CombinedThreshold =
ST.getMaxNumVGPRs(
MF);
2050 if (!RegionsWithExcessArchVGPR[Region])
2055 MF, ArchVGPRThreshold, AGPRThreshold, CombinedThreshold);
2063 MF, ArchVGPRThreshold, AGPRThreshold, CombinedThreshold);
2065 uint64_t BlockFreq =
2069 bool RelativeFreqIsDenom = EntryFreq > BlockFreq;
2070 uint64_t RelativeFreq = EntryFreq && BlockFreq
2071 ? (RelativeFreqIsDenom ? EntryFreq / BlockFreq
2072 : BlockFreq / EntryFreq)
2077 int64_t SpillCost = ((int)SpillCostAfter - (int)SpillCostBefore) * 2;
2080 if (RelativeFreqIsDenom)
2081 SpillCost /= (int64_t)RelativeFreq;
2083 SpillCost *= (int64_t)RelativeFreq;
2089 if (SpillCost < BestSpillCost)
2090 BestSpillCost = SpillCost;
2095 Cost = BestSpillCost;
2098 unsigned CopyCost = 0;
2102 for (MachineInstr *
DefMI : CopyForDef) {
2109 const TargetRegisterClass *RC =
DAG.MRI.getRegClass(DefReg);
2114 for (
auto &[UseBlock, UseRegs] : CopyForUse) {
2119 const TargetRegisterClass *RC =
DAG.MRI.getRegClass(
UseReg);
2128 for (
auto &[
MI, OriginalOpcode] : RewriteCands) {
2130 const TargetRegisterClass *AGPRRC =
2131 DAG.MRI.getRegClass(
MI->getOperand(0).getReg());
2132 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(AGPRRC);
2134 MachineOperand *Src2 = TII->getNamedOperand(*
MI, AMDGPU::OpName::src2);
2138 DAG.MRI.setRegClass(Src2->
getReg(), VGPRRC);
2139 DAG.MRI.setRegClass(
MI->getOperand(0).getReg(), VGPRRC);
2140 MI->setDesc(TII->get(OriginalOpcode));
2143 return Cost + CopyCost;
2146bool RewriteMFMAFormStage::rewrite(
2147 const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands) {
2148 DenseMap<MachineInstr *, unsigned> FirstMIToRegion;
2149 DenseMap<MachineInstr *, unsigned> LastMIToRegion;
2157 if (
Entry.second !=
Entry.first->getParent()->end())
2200 DenseSet<Register> RewriteRegs;
2203 DenseMap<Register, Register> RedefMap;
2205 DenseMap<Register, DenseSet<MachineOperand *>>
ReplaceMap;
2207 DenseMap<Register, SmallPtrSet<MachineInstr *, 8>> ReachingDefCopyMap;
2210 DenseMap<unsigned, DenseMap<Register, SmallPtrSet<MachineOperand *, 8>>>
2213 for (
auto &[
MI, OriginalOpcode] : RewriteCands) {
2215 if (ReplacementOp == -1)
2217 MI->setDesc(TII->get(ReplacementOp));
2220 MachineOperand *Src2 = TII->getNamedOperand(*
MI, AMDGPU::OpName::src2);
2221 if (Src2->
isReg()) {
2228 findReachingDefs(*Src2,
DAG.LIS, Src2ReachingDefs);
2229 SmallSetVector<MachineInstr *, 8> Src2DefsReplace;
2231 for (SlotIndex RDIndex : Src2ReachingDefs) {
2232 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2233 if (TII->isMAI(*RD))
2237 Src2DefsReplace.
insert(RD);
2240 if (!Src2DefsReplace.
empty()) {
2242 if (RI != RedefMap.
end()) {
2243 MappedReg = RI->second;
2246 const TargetRegisterClass *Src2RC =
DAG.MRI.getRegClass(Src2Reg);
2247 const TargetRegisterClass *VGPRRC =
2248 SRI->getEquivalentVGPRClass(Src2RC);
2251 MappedReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2252 RedefMap[Src2Reg] = MappedReg;
2257 for (MachineInstr *RD : Src2DefsReplace) {
2259 if (ReachingDefCopyMap[Src2Reg].insert(RD).second) {
2260 MachineInstrBuilder VGPRCopy =
2263 .
addDef(MappedReg, {}, 0)
2264 .addUse(Src2Reg, {}, 0);
2265 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2270 unsigned UpdateRegion = LastMIToRegion[RD];
2271 DAG.Regions[UpdateRegion].second = VGPRCopy;
2272 LastMIToRegion.
erase(RD);
2279 RewriteRegs.
insert(Src2Reg);
2289 MachineOperand *Dst = &
MI->getOperand(0);
2298 SmallVector<MachineInstr *, 8> DstUseDefsReplace;
2300 findReachingUses(
MI,
DAG.LIS, DstReachingUses);
2302 for (MachineOperand *RUOp : DstReachingUses) {
2303 if (TII->isMAI(*RUOp->getParent()))
2307 if (
find(DstReachingUseCopies, RUOp) == DstReachingUseCopies.
end())
2310 findReachingDefs(*RUOp,
DAG.LIS, DstUsesReachingDefs);
2312 for (SlotIndex RDIndex : DstUsesReachingDefs) {
2313 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2314 if (TII->isMAI(*RD))
2319 if (
find(DstUseDefsReplace, RD) == DstUseDefsReplace.
end())
2324 if (!DstUseDefsReplace.
empty()) {
2326 if (RI != RedefMap.
end()) {
2327 MappedReg = RI->second;
2330 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(DstReg);
2331 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2334 MappedReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2335 RedefMap[DstReg] = MappedReg;
2340 for (MachineInstr *RD : DstUseDefsReplace) {
2342 if (ReachingDefCopyMap[DstReg].insert(RD).second) {
2343 MachineInstrBuilder VGPRCopy =
2346 .
addDef(MappedReg, {}, 0)
2347 .addUse(DstReg, {}, 0);
2348 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2353 LastMIToRegion.
find(RD);
2354 if (LMI != LastMIToRegion.
end()) {
2355 unsigned UpdateRegion = LMI->second;
2356 DAG.Regions[UpdateRegion].second = VGPRCopy;
2357 LastMIToRegion.
erase(RD);
2363 DenseSet<MachineOperand *> &DstRegSet =
ReplaceMap[DstReg];
2364 for (MachineOperand *RU : DstReachingUseCopies) {
2365 MachineBasicBlock *RUBlock = RU->getParent()->getParent();
2368 if (RUBlock !=
MI->getParent()) {
2375 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(DstReg);
2376 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2377 Register NewUseReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2378 MachineInstr *UseInst = RU->getParent();
2379 MachineInstrBuilder VGPRCopy =
2382 .
addDef(NewUseReg, {}, 0)
2383 .addUse(DstReg, {}, 0);
2384 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2387 RU->setReg(NewUseReg);
2393 RewriteRegs.
insert(DstReg);
2403 std::pair<unsigned, DenseMap<Register, SmallPtrSet<MachineOperand *, 8>>>;
2404 for (RUBType RUBlockEntry : ReachingUseTracker) {
2405 using RUDType = std::pair<Register, SmallPtrSet<MachineOperand *, 8>>;
2406 for (RUDType RUDst : RUBlockEntry.second) {
2407 MachineOperand *OpBegin = *RUDst.second.begin();
2408 SlotIndex InstPt =
DAG.LIS->getInstructionIndex(*OpBegin->
getParent());
2411 for (MachineOperand *User : RUDst.second) {
2412 SlotIndex NewInstPt =
DAG.LIS->getInstructionIndex(*
User->getParent());
2417 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(RUDst.first);
2418 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2419 Register NewUseReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2420 MachineInstr *UseInst =
DAG.LIS->getInstructionFromIndex(InstPt);
2422 MachineInstrBuilder VGPRCopy =
2425 .
addDef(NewUseReg, {}, 0)
2426 .addUse(RUDst.first, {}, 0);
2427 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2432 FirstMIToRegion.
find(UseInst);
2433 if (FI != FirstMIToRegion.
end()) {
2434 unsigned UpdateRegion = FI->second;
2435 DAG.Regions[UpdateRegion].first = VGPRCopy;
2436 FirstMIToRegion.
erase(UseInst);
2440 for (MachineOperand *User : RUDst.second) {
2441 User->setReg(NewUseReg);
2452 for (std::pair<Register, Register> NewDef : RedefMap) {
2457 for (MachineOperand *ReplaceOp :
ReplaceMap[OldReg])
2458 ReplaceOp->setReg(NewReg);
2462 for (
Register RewriteReg : RewriteRegs) {
2463 Register RegToRewrite = RewriteReg;
2467 if (RI != RedefMap.end())
2468 RegToRewrite = RI->second;
2470 const TargetRegisterClass *CurrRC =
DAG.MRI.getRegClass(RegToRewrite);
2471 const TargetRegisterClass *AGPRRC = SRI->getEquivalentAGPRClass(CurrRC);
2473 DAG.MRI.setRegClass(RegToRewrite, AGPRRC);
2477 DAG.LIS->reanalyze(
DAG.MF);
2479 RegionPressureMap LiveInUpdater(&
DAG,
false);
2480 LiveInUpdater.buildLiveRegMap();
2483 DAG.LiveIns[Region] = LiveInUpdater.getLiveRegsForRegionIdx(Region);
2490bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
2496 DenseMap<unsigned, GCNRPTarget> OptRegions;
2497 unsigned MaxSGPRs =
ST.getMaxNumSGPRs(
F);
2498 unsigned MaxVGPRs =
ST.getMaxNumVGPRs(
F);
2499 bool HasVectorRegisterExcess;
2501 auto ResetTargetRegions = [&]() {
2503 HasVectorRegisterExcess =
false;
2504 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
2505 const GCNRegPressure &
RP =
DAG.Pressure[
I];
2506 GCNRPTarget
Target(MaxSGPRs, MaxVGPRs,
MF, RP);
2516 HasVectorRegisterExcess |=
Target.hasVectorRegisterExcess();
2520 ResetTargetRegions();
2521 if (HasVectorRegisterExcess ||
DAG.MinOccupancy >=
MFI.getMaxWavesPerEU()) {
2524 TargetOcc = std::nullopt;
2528 TargetOcc =
DAG.MinOccupancy + 1;
2529 unsigned VGPRBlockSize =
2531 MaxSGPRs =
ST.getMaxNumSGPRs(*TargetOcc,
false);
2532 MaxVGPRs =
ST.getMaxNumVGPRs(*TargetOcc, VGPRBlockSize);
2533 ResetTargetRegions();
2536 dbgs() <<
"Analyzing ";
2537 MF.getFunction().printAsOperand(
dbgs(),
false);
2539 if (OptRegions.
empty()) {
2540 dbgs() <<
"no objective to achieve, occupancy is maximal at "
2541 <<
MFI.getMaxWavesPerEU();
2542 }
else if (!TargetOcc) {
2543 dbgs() <<
"reduce spilling (minimum target occupancy is "
2544 <<
MFI.getMinWavesPerEU() <<
')';
2546 dbgs() <<
"increase occupancy from " <<
DAG.MinOccupancy <<
" to "
2550 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
2551 if (
auto OptIt = OptRegions.
find(
I); OptIt != OptRegions.
end()) {
2557 if (OptRegions.
empty())
2564 auto ReduceRPInRegion = [&](
auto OptIt,
Register Reg, LaneBitmask
Mask,
2565 bool &Progress) ->
bool {
2566 GCNRPTarget &
Target = OptIt->getSecond();
2572 OptRegions.
erase(OptIt->getFirst());
2573 return OptRegions.
empty();
2578 DAG.RegionLiveOuts.buildLiveRegMap();
2581 DenseSet<unsigned> RematRegs;
2584 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
2589 if (!isReMaterializable(
DefMI))
2600 MachineInstr *
UseMI =
DAG.MRI.getOneNonDBGUser(
Reg);
2603 auto UseRegion = MIRegion.find(
UseMI);
2604 if (UseRegion != MIRegion.end() && UseRegion->second ==
I)
2613 if (Rematerializations.contains(
UseMI) ||
2615 return MO.isReg() && RematRegs.contains(MO.getReg());
2622 SlotIndex UseIdx =
DAG.LIS->getInstructionIndex(*UseMI).getRegSlot(
true);
2628 RematInstruction &Remat =
2629 Rematerializations.try_emplace(&
DefMI,
UseMI).first->second;
2631 bool RematUseful =
false;
2632 if (
auto It = OptRegions.
find(
I); It != OptRegions.
end()) {
2638 LaneBitmask
Mask =
DAG.RegionLiveOuts.getLiveRegsForRegionIdx(
I)[
Reg];
2639 if (ReduceRPInRegion(It,
Reg, Mask, RematUseful))
2643 for (
unsigned LIRegion = 0; LIRegion !=
E; ++LIRegion) {
2646 auto It =
DAG.LiveIns[LIRegion].find(
Reg);
2647 if (It ==
DAG.LiveIns[LIRegion].end() || It->second.none())
2649 Remat.LiveInRegions.insert(LIRegion);
2657 if (
auto It = OptRegions.
find(LIRegion); It != OptRegions.
end()) {
2659 if (ReduceRPInRegion(It,
Reg,
DAG.LiveIns[LIRegion][
Reg],
2668 Rematerializations.pop_back();
2669 REMAT_DEBUG(
dbgs() <<
" No impact, not rematerializing instruction\n");
2679 Rematerializations.clear();
2683 return !Rematerializations.empty();
2686void PreRARematStage::rematerialize() {
2687 const SIInstrInfo *
TII =
MF.getSubtarget<GCNSubtarget>().getInstrInfo();
2691 DenseSet<unsigned> RecomputeRP;
2694 for (
auto &[
DefMI, Remat] : Rematerializations) {
2697 unsigned DefRegion = MIRegion.at(
DefMI);
2700 TII->reMaterialize(*InsertPos->getParent(), InsertPos,
Reg,
2701 AMDGPU::NoSubRegister, *
DefMI);
2702 Remat.RematMI = &*std::prev(InsertPos);
2703 DAG.LIS->InsertMachineInstrInMaps(*Remat.RematMI);
2708 DAG.updateRegionBoundaries(
DAG.Regions[DefRegion],
DefMI,
nullptr);
2709 auto UseRegion = MIRegion.find(Remat.UseMI);
2710 if (UseRegion != MIRegion.end()) {
2711 DAG.updateRegionBoundaries(
DAG.Regions[UseRegion->second], InsertPos,
2714 DAG.LIS->RemoveMachineInstrFromMaps(*
DefMI);
2719 for (
unsigned I : Remat.LiveInRegions) {
2720 ImpactedRegions.insert({
I,
DAG.Pressure[
I]});
2723#ifdef EXPENSIVE_CHECKS
2734 LiveInterval &LI =
DAG.LIS->getInterval(
UseReg);
2735 LaneBitmask LM =
DAG.MRI.getMaxLaneMaskForVReg(MO.
getReg());
2739 LaneBitmask LiveInMask = RegionLiveIns.
at(
UseReg);
2740 LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);
2744 if (UncoveredLanes.
any()) {
2746 for (LiveInterval::SubRange &SR : LI.
subranges())
2747 assert((SR.LaneMask & UncoveredLanes).none());
2757 LaneBitmask PrevMask = RegionLiveIns[
Reg];
2759 RegMasks.insert({{
I, Remat.RematMI->getOperand(0).getReg()}, PrevMask});
2760 if (Remat.UseMI->getParent() !=
DAG.Regions[
I].first->getParent())
2767 ImpactedRegions.insert({DefRegion,
DAG.Pressure[DefRegion]});
2768 RecomputeRP.
insert(DefRegion);
2771 Register RematReg = Remat.RematMI->getOperand(0).getReg();
2772 DAG.LIS->removeInterval(RematReg);
2773 DAG.LIS->createAndComputeVirtRegInterval(RematReg);
2779 unsigned DynamicVGPRBlockSize =
2781 AchievedOcc =
MFI.getMaxWavesPerEU();
2782 for (
auto &[
I, OriginalRP] : ImpactedRegions) {
2783 bool IsEmptyRegion =
DAG.Regions[
I].first ==
DAG.Regions[
I].second;
2784 RescheduleRegions[
I] = !IsEmptyRegion;
2789 if (IsEmptyRegion) {
2792 GCNDownwardRPTracker RPT(*
DAG.LIS);
2794 DAG.Regions[
I].second);
2795 if (NonDbgMI ==
DAG.Regions[
I].second) {
2799 RPT.reset(*NonDbgMI, &
DAG.LiveIns[
I]);
2800 RPT.advance(
DAG.Regions[
I].second);
2801 RP = RPT.moveMaxPressure();
2806 std::min(AchievedOcc,
RP.getOccupancy(
ST, DynamicVGPRBlockSize));
2812bool PreRARematStage::isReMaterializable(
const MachineInstr &
MI) {
2813 if (!
DAG.TII->isReMaterializable(
MI))
2816 for (
const MachineOperand &MO :
MI.all_uses()) {
2820 if (
DAG.MRI.isConstantPhysReg(MO.
getReg()) ||
DAG.TII->isIgnorableUse(MO))
2835 unsigned MaxOcc = std::max(AchievedOcc,
DAG.MinOccupancy);
2836 if (!TargetOcc || MaxOcc >= *TargetOcc)
2840 const SIInstrInfo *
TII =
MF.getSubtarget<GCNSubtarget>().getInstrInfo();
2843 for (
const auto &[
DefMI, Remat] : Rematerializations) {
2844 MachineInstr &RematMI = *Remat.RematMI;
2845 unsigned DefRegion = MIRegion.at(
DefMI);
2847 MachineBasicBlock *
MBB = RegionBB[DefRegion];
2853 TII->reMaterialize(*
MBB, InsertPos,
Reg, AMDGPU::NoSubRegister, RematMI);
2854 MachineInstr *NewMI = &*std::prev(InsertPos);
2855 DAG.LIS->InsertMachineInstrInMaps(*NewMI);
2857 auto UseRegion = MIRegion.find(Remat.UseMI);
2858 if (UseRegion != MIRegion.end()) {
2859 DAG.updateRegionBoundaries(
DAG.Regions[UseRegion->second], RematMI,
2862 DAG.updateRegionBoundaries(
DAG.Regions[DefRegion], InsertPos, NewMI);
2865 DAG.LIS->RemoveMachineInstrFromMaps(RematMI);
2869 DAG.LIS->removeInterval(
Reg);
2870 DAG.LIS->createAndComputeVirtRegInterval(
Reg);
2873 for (
unsigned LIRegion : Remat.LiveInRegions)
2874 DAG.LiveIns[LIRegion].insert({
Reg, RegMasks.at({LIRegion,
Reg})});
2878 for (
auto &[
I, OriginalRP] : ImpactedRegions)
2879 DAG.Pressure[
I] = OriginalRP;
2884void GCNScheduleDAGMILive::updateRegionBoundaries(
2887 assert((!NewMI || NewMI != RegionBounds.second) &&
2888 "cannot remove at region end");
2890 if (RegionBounds.first == RegionBounds.second) {
2891 assert(NewMI &&
"cannot remove from an empty region");
2892 RegionBounds.first = NewMI;
2898 if (
MI != RegionBounds.first)
2901 RegionBounds.first = std::next(
MI);
2903 RegionBounds.first = NewMI;
2920 if (HasIGLPInstrs) {
2921 SavedMutations.clear();
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
static cl::opt< bool > GCNTrackers("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false))
static cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))
#define REMAT_PREFIX
Allows to easily filter for this stage's debug output.
static MachineInstr * getLastMIForRegion(MachineBasicBlock::iterator RegionBegin, MachineBasicBlock::iterator RegionEnd)
static bool shouldCheckPending(SchedBoundary &Zone, const TargetSchedModel *SchedModel)
static cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))
static cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))
static void printScheduleModel(std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)
static cl::opt< bool > PrintMaxRPRegUsageAfterScheduler("amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure after scheduling."), cl::init(false))
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG)
static bool canUsePressureDiffs(const SUnit &SU)
Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.
static cl::opt< unsigned > PendingQueueLimit("amdgpu-scheduler-pending-queue-limit", cl::Hidden, cl::desc("Max (Available+Pending) size to inspect pending queue (0 disables)"), cl::init(256))
static void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)
static cl::opt< bool > PrintMaxRPRegUsageBeforeScheduler("amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure before scheduling."), cl::init(false))
static SUnit * pickOnlyChoice(SchedBoundary &Zone, const TargetSchedModel *SchedModel)
static cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))
static cl::opt< bool > DisableRewriteMFMAFormSchedStage("amdgpu-disable-rewrite-mfma-form-sched-stage", cl::Hidden, cl::desc("Disable rewrie mfma rewrite scheduling stage"), cl::init(true))
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
static constexpr std::pair< StringLiteral, StringLiteral > ReplaceMap[]
A common definition of LaneBitmask for use in TableGen and CodeGen.
Promote Memory to Register
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
bool initGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
ValueT & at(const_arg_type_t< KeyT > Val)
at - Return the entry for the specified key, or abort if no such entry exists.
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
GCNRegPressure bumpDownwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI) const
Mostly copy/paste from CodeGen/RegisterPressure.cpp Calculate the impact MI will have on CurPressure ...
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
GCNRegPressure getPressure() const
DenseMap< unsigned, LaneBitmask > LiveRegSet
virtual bool initGCNRegion()
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
virtual void finalizeGCNRegion()
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
void modifyRegionSchedule(unsigned RegionIdx, MachineBasicBlock *MBB, ArrayRef< MachineInstr * > MIOrder)
Sets the schedule of region RegionIdx in block MBB to MIOrder.
MachineBasicBlock * CurrentMBB
This is a minimal scheduler strategy.
GCNDownwardRPTracker DownwardTracker
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
unsigned SGPRCriticalLimit
std::vector< unsigned > MaxPressure
bool hasNextStage() const
SUnit * pickNodeBidirectional(bool &IsTopNode, bool &PickedPending)
GCNSchedStageID getCurrentStage()
bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Evaluates instructions in the pending queue using a subset of scheduling heuristics.
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
unsigned VGPRCriticalLimit
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
GCNDownwardRPTracker * getDownwardTracker()
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
GCNUpwardRPTracker UpwardTracker
void printCandidateDecision(const SchedCandidate &Current, const SchedCandidate &Preferred)
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool &IsPending, bool IsBottomUp)
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
GCNUpwardRPTracker * getUpwardTracker()
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
void recede(const MachineInstr &MI)
Move to the state of RP just before the MI .
void traceCandidate(const SchedCandidate &Cand)
LLVM_ABI void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
MachineSchedPolicy RegionPolicy
const TargetSchedModel * SchedModel
const MachineSchedContext * Context
const TargetRegisterInfo * TRI
SchedCandidate BotCand
Candidate last picked from Bot boundary.
SchedCandidate TopCand
Candidate last picked from Top boundary.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
GenericScheduler(const MachineSchedContext *C)
bool shouldRevertScheduling(unsigned WavesAfter) override
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
LLVM_ABI void dump() const
MachineBasicBlock * getMBBFromIndex(SlotIndex index) const
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
succ_iterator succ_begin()
unsigned succ_size() const
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
LLVM_ABI BlockFrequency getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
bool initGCNSchedStage() override
Capture a change in pressure for a single pressure set.
Helpers for implementing custom MachineSchedStrategy classes.
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void advance()
Advance across the current instruction.
LLVM_ABI void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
LLVM_ABI void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
RegionT * getParent() const
Get the parent of the Region.
List of registers defined and used by a machine instruction.
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
LLVM_ABI void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
LLVM_ABI void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
bool isIGLPMutationOnly(unsigned Opcode) const
static bool isMAI(const MCInstrDesc &Desc)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getOccupancy() const
unsigned getDynamicVGPRBlockSize() const
unsigned getMinAllowedOccupancy() const
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
unsigned NodeNum
Entry # of node in the node vector.
unsigned short Latency
Node latency.
bool isScheduled
True once scheduled.
unsigned ParentClusterIdx
The parent cluster id.
unsigned BotReadyCycle
Cycle relative to end when node is ready.
bool isBottomReady() const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
LLVM_ABI void releasePending()
Release pending ready nodes in to the available queue.
LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
LLVM_ABI SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
LLVM_ABI void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LLVM_ABI bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
A ScheduleDAG for scheduling lists of MachineInstr.
bool ScheduleSingleMIRegions
True if regions with a single MI should be scheduled.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
const MachineLoopInfo * MLI
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
RegPressureTracker RPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
MachineFunction & MF
Machine function.
static const unsigned ScaleFactor
unsigned getMetric() const
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
SlotIndex - An opaque wrapper around machine indexes.
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
static bool isEarlierInstr(SlotIndex A, SlotIndex B)
isEarlierInstr - Return true if A refers to an instruction earlier than B.
SlotIndex getPrevSlot() const
Returns the previous slot in the index list.
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
uint8_t getCopyCost() const
Return the cost of copying a value between two registers in this class.
Provide an instruction scheduling machine model to CodeGen passes.
LLVM_ABI bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
bool initGCNSchedStage() override
bool initGCNRegion() override
void finalizeGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
VNInfo - Value Number Information.
SlotIndex def
The index of the defining instruction.
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
static bool allUsesAvailableAt(const MachineInstr *MI, SlotIndex UseIdx, const LiveIntervals &LIS, const MachineRegisterInfo &MRI, const TargetInstrInfo &TII)
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
LLVM_READONLY int getMFMASrcCVDstAGPROp(uint16_t Opcode)
unsigned getDynamicVGPRBlockSize(const Function &F)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)
Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > RegionBoundaries
A region's boundaries i.e.
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
@ UnclusteredHighRPReschedule
@ MemoryClauseInitialSchedule
@ ClusteredLowOccupancyReschedule
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
LLVM_ABI cl::opt< bool > VerifyScheduling
LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool isTheSameCluster(unsigned A, unsigned B)
Return whether the input cluster ID's are the same and valid.
DWARFExpression::Operation Op
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
LLVM_ABI void dumpMaxRegPressure(MachineFunction &MF, GCNRegPressure::RegKind Kind, LiveIntervals &LIS, const MachineLoopInfo *MLI)
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Implement std::hash so that hash_code can be used in STL containers.
bool operator()(std::pair< MachineInstr *, unsigned > A, std::pair< MachineInstr *, unsigned > B) const
unsigned getArchVGPRNum() const
unsigned getAGPRNum() const
unsigned getSGPRNum() const
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void setBest(SchedCandidate &Best)
void reset(const CandPolicy &NewPolicy)
LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SchedResourceDelta ResDelta
Status of an instruction's critical resource consumption.
unsigned DemandedResources
constexpr bool any() const
static constexpr LaneBitmask getNone()
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
PressureChange CriticalMax
PressureChange CurrentMax