37#define DEBUG_TYPE "machine-scheduler"
42 "amdgpu-disable-unclustered-high-rp-reschedule",
cl::Hidden,
43 cl::desc(
"Disable unclustered high register pressure "
44 "reduction scheduling stage."),
48 "amdgpu-disable-clustered-low-occupancy-reschedule",
cl::Hidden,
49 cl::desc(
"Disable clustered low occupancy "
50 "rescheduling for ILP scheduling stage."),
56 "Sets the bias which adds weight to occupancy vs latency. Set it to "
57 "100 to chase the occupancy only."),
62 cl::desc(
"Relax occupancy targets for kernels which are memory "
63 "bound (amdgpu-membound-threshold), or "
64 "Wave Limited (amdgpu-limit-wave-threshold)."),
69 cl::desc(
"Use the AMDGPU specific RPTrackers during scheduling"),
73 "amdgpu-scheduler-pending-queue-limit",
cl::Hidden,
75 "Max (Available+Pending) size to inspect pending queue (0 disables)"),
78#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
79#define DUMP_MAX_REG_PRESSURE
81 "amdgpu-print-max-reg-pressure-regusage-before-scheduler",
cl::Hidden,
82 cl::desc(
"Print a list of live registers along with their def/uses at the "
83 "point of maximum register pressure before scheduling."),
87 "amdgpu-print-max-reg-pressure-regusage-after-scheduler",
cl::Hidden,
88 cl::desc(
"Print a list of live registers along with their def/uses at the "
89 "point of maximum register pressure after scheduling."),
108 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
110 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
132 "VGPRCriticalLimit calculation method.\n");
136 unsigned Addressable =
139 VGPRBudget = std::max(VGPRBudget, Granule);
178 if (!
Op.isReg() ||
Op.isImplicit())
180 if (
Op.getReg().isPhysical() ||
181 (
Op.isDef() &&
Op.getSubReg() != AMDGPU::NoSubRegister))
189 std::vector<unsigned> &Pressure, std::vector<unsigned> &MaxPressure,
204 Pressure.resize(4, 0);
215 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = NewPressure.
getSGPRNum();
216 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
218 Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = NewPressure.
getAGPRNum();
225 unsigned SGPRPressure,
226 unsigned VGPRPressure,
bool IsBottomUp) {
230 if (!
DAG->isTrackingPressure())
253 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
254 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
256 for (
const auto &Diff :
DAG->getPressureDiff(SU)) {
262 (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());
265#ifdef EXPENSIVE_CHECKS
266 std::vector<unsigned> CheckPressure, CheckMaxPressure;
269 if (
Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
270 CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
271 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
272 CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
273 errs() <<
"Register Pressure is inaccurate when calculated through "
275 <<
"SGPR got " <<
Pressure[AMDGPU::RegisterPressureSets::SReg_32]
277 << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
"\n"
278 <<
"VGPR got " <<
Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
280 << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
"\n";
286 unsigned NewSGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
287 unsigned NewVGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
297 const unsigned MaxVGPRPressureInc = 16;
298 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >=
VGPRExcessLimit;
299 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >=
SGPRExcessLimit;
330 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
332 if (SGPRDelta > VGPRDelta) {
346 bool HasBufferedModel =
365 dbgs() <<
"Prefer:\t\t";
366 DAG->dumpNode(*Preferred.
SU);
370 DAG->dumpNode(*Current.
SU);
373 dbgs() <<
"Reason:\t\t";
387 unsigned SGPRPressure = 0;
388 unsigned VGPRPressure = 0;
390 if (
DAG->isTrackingPressure()) {
392 SGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
393 VGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
398 SGPRPressure =
T->getPressure().getSGPRNum();
399 VGPRPressure =
T->getPressure().getArchVGPRNum();
404 for (
SUnit *SU : AQ) {
408 VGPRPressure, IsBottomUp);
428 for (
SUnit *SU : PQ) {
432 VGPRPressure, IsBottomUp);
452 bool &PickedPending) {
472 bool BotPending =
false;
492 "Last pick result should correspond to re-picking right now");
497 bool TopPending =
false;
517 "Last pick result should correspond to re-picking right now");
527 PickedPending = BotPending && TopPending;
530 if (BotPending || TopPending) {
537 Cand.setBest(TryCand);
542 IsTopNode = Cand.AtTop;
549 if (
DAG->top() ==
DAG->bottom()) {
551 Bot.Available.empty() &&
Bot.Pending.empty() &&
"ReadyQ garbage");
557 PickedPending =
false;
591 if (ReadyCycle > CurrentCycle)
663 if (
DAG->isTrackingPressure() &&
669 if (
DAG->isTrackingPressure() &&
674 bool SameBoundary = Zone !=
nullptr;
714 if (
DAG->isTrackingPressure() &&
724 bool SameBoundary = Zone !=
nullptr;
759 bool CandIsClusterSucc =
761 bool TryCandIsClusterSucc =
763 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
768 if (
DAG->isTrackingPressure() &&
774 if (
DAG->isTrackingPressure() &&
820 if (
DAG->isTrackingPressure()) {
836 bool CandIsClusterSucc =
838 bool TryCandIsClusterSucc =
840 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
849 bool SameBoundary = Zone !=
nullptr;
866 if (TryMayLoad || CandMayLoad) {
867 bool TryLongLatency =
869 bool CandLongLatency =
873 Zone->
isTop() ? CandLongLatency : TryLongLatency, TryCand,
891 if (
DAG->isTrackingPressure() &&
910 !
Rem.IsAcyclicLatencyLimited &&
tryLatency(TryCand, Cand, *Zone))
928 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy),
929 RegionLiveOuts(this,
true) {
935 LLVM_DEBUG(
dbgs() <<
"Starting occupancy is " << StartingOccupancy <<
".\n");
937 MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy);
938 if (MinOccupancy != StartingOccupancy)
939 LLVM_DEBUG(
dbgs() <<
"Allowing Occupancy drops to " << MinOccupancy
944std::unique_ptr<GCNSchedStage>
946 switch (SchedStageID) {
948 return std::make_unique<OccInitialScheduleStage>(SchedStageID, *
this);
950 return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *
this);
952 return std::make_unique<ClusteredLowOccStage>(SchedStageID, *
this);
954 return std::make_unique<PreRARematStage>(SchedStageID, *
this);
956 return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *
this);
958 return std::make_unique<MemoryClauseInitialScheduleStage>(SchedStageID,
972GCNScheduleDAGMILive::getRealRegPressure(
unsigned RegionIdx)
const {
975 &LiveIns[RegionIdx]);
981 assert(RegionBegin != RegionEnd &&
"Region must not be empty");
985void GCNScheduleDAGMILive::computeBlockPressure(
unsigned RegionIdx,
997 const MachineBasicBlock *OnlySucc =
nullptr;
1000 if (!Candidate->empty() && Candidate->pred_size() == 1) {
1001 SlotIndexes *Ind =
LIS->getSlotIndexes();
1003 OnlySucc = Candidate;
1008 size_t CurRegion = RegionIdx;
1009 for (
size_t E = Regions.size(); CurRegion !=
E; ++CurRegion)
1010 if (Regions[CurRegion].first->getParent() !=
MBB)
1015 auto LiveInIt = MBBLiveIns.find(
MBB);
1016 auto &Rgn = Regions[CurRegion];
1018 if (LiveInIt != MBBLiveIns.end()) {
1019 auto LiveIn = std::move(LiveInIt->second);
1021 MBBLiveIns.erase(LiveInIt);
1024 auto LRS = BBLiveInMap.lookup(NonDbgMI);
1025#ifdef EXPENSIVE_CHECKS
1034 if (Regions[CurRegion].first ==
I || NonDbgMI ==
I) {
1035 LiveIns[CurRegion] =
RPTracker.getLiveRegs();
1039 if (Regions[CurRegion].second ==
I) {
1040 Pressure[CurRegion] =
RPTracker.moveMaxPressure();
1041 if (CurRegion-- == RegionIdx)
1043 auto &Rgn = Regions[CurRegion];
1056 MBBLiveIns[OnlySucc] =
RPTracker.moveLiveRegs();
1061GCNScheduleDAGMILive::getRegionLiveInMap()
const {
1062 assert(!Regions.empty());
1063 std::vector<MachineInstr *> RegionFirstMIs;
1064 RegionFirstMIs.reserve(Regions.size());
1066 RegionFirstMIs.push_back(
1073GCNScheduleDAGMILive::getRegionLiveOutMap()
const {
1074 assert(!Regions.empty());
1075 std::vector<MachineInstr *> RegionLastMIs;
1076 RegionLastMIs.reserve(Regions.size());
1087 IdxToInstruction.clear();
1090 IsLiveOut ? DAG->getRegionLiveOutMap() : DAG->getRegionLiveInMap();
1091 for (
unsigned I = 0;
I < DAG->Regions.size();
I++) {
1092 auto &[RegionBegin, RegionEnd] = DAG->Regions[
I];
1094 if (RegionBegin == RegionEnd)
1098 IdxToInstruction[
I] = RegionKey;
1106 LiveIns.resize(Regions.size());
1107 Pressure.resize(Regions.size());
1108 RegionsWithHighRP.resize(Regions.size());
1109 RegionsWithExcessRP.resize(Regions.size());
1110 RegionsWithIGLPInstrs.resize(Regions.size());
1111 RegionsWithHighRP.reset();
1112 RegionsWithExcessRP.reset();
1113 RegionsWithIGLPInstrs.reset();
1118void GCNScheduleDAGMILive::runSchedStages() {
1119 LLVM_DEBUG(
dbgs() <<
"All regions recorded, starting actual scheduling.\n");
1121 if (!Regions.
empty()) {
1122 BBLiveInMap = getRegionLiveInMap();
1127#ifdef DUMP_MAX_REG_PRESSURE
1135 GCNSchedStrategy &S =
static_cast<GCNSchedStrategy &
>(*SchedImpl);
1138 if (!Stage->initGCNSchedStage())
1141 for (
auto Region : Regions) {
1145 if (!Stage->initGCNRegion()) {
1146 Stage->advanceRegion();
1155 &LiveIns[Stage->getRegionIdx()];
1157 reinterpret_cast<GCNRPTracker *
>(DownwardTracker)
1158 ->reset(
MRI, *RegionLiveIns);
1159 reinterpret_cast<GCNRPTracker *
>(UpwardTracker)
1160 ->reset(
MRI, RegionLiveOuts.getLiveRegsForRegionIdx(
1161 Stage->getRegionIdx()));
1165 Stage->finalizeGCNRegion();
1168 Stage->finalizeGCNSchedStage();
1171#ifdef DUMP_MAX_REG_PRESSURE
1184 OS <<
"Max Occupancy Initial Schedule";
1187 OS <<
"Unclustered High Register Pressure Reschedule";
1190 OS <<
"Clustered Low Occupancy Reschedule";
1193 OS <<
"Pre-RA Rematerialize";
1196 OS <<
"Max ILP Initial Schedule";
1199 OS <<
"Max memory clause Initial Schedule";
1226 if (
DAG.RegionsWithHighRP.none() &&
DAG.RegionsWithExcessRP.none())
1233 InitialOccupancy =
DAG.MinOccupancy;
1236 TempTargetOccupancy =
MFI.getMaxWavesPerEU() >
DAG.MinOccupancy
1237 ? InitialOccupancy + 1
1239 IsAnyRegionScheduled =
false;
1240 S.SGPRLimitBias =
S.HighRPSGPRBias;
1241 S.VGPRLimitBias =
S.HighRPVGPRBias;
1245 <<
"Retrying function scheduling without clustering. "
1246 "Aggressively try to reduce register pressure to achieve occupancy "
1247 << TempTargetOccupancy <<
".\n");
1262 if (
DAG.StartingOccupancy <=
DAG.MinOccupancy)
1266 dbgs() <<
"Retrying function scheduling with lowest recorded occupancy "
1267 <<
DAG.MinOccupancy <<
".\n");
1272#define REMAT_PREFIX "[PreRARemat] "
1273#define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)
1286 const unsigned NumRegions =
DAG.Regions.size();
1287 RegionBB.reserve(NumRegions);
1288 for (
unsigned I = 0;
I < NumRegions; ++
I) {
1291 MIRegion.insert({&*
MI,
I});
1295 if (!canIncreaseOccupancyOrReduceSpill())
1301 DAG.RegionLiveOuts.buildLiveRegMap();
1303 dbgs() <<
"Retrying function scheduling with new min. occupancy of "
1304 << AchievedOcc <<
" from rematerializing (original was "
1305 <<
DAG.MinOccupancy;
1307 dbgs() <<
", target was " << *TargetOcc;
1311 if (AchievedOcc >
DAG.MinOccupancy) {
1312 DAG.MinOccupancy = AchievedOcc;
1314 MFI.increaseOccupancy(
MF,
DAG.MinOccupancy);
1326 S.SGPRLimitBias =
S.VGPRLimitBias = 0;
1327 if (
DAG.MinOccupancy > InitialOccupancy) {
1328 assert(IsAnyRegionScheduled);
1330 <<
" stage successfully increased occupancy to "
1331 <<
DAG.MinOccupancy <<
'\n');
1332 }
else if (!IsAnyRegionScheduled) {
1333 assert(
DAG.MinOccupancy == InitialOccupancy);
1335 <<
": No regions scheduled, min occupancy stays at "
1336 <<
DAG.MinOccupancy <<
", MFI occupancy stays at "
1337 <<
MFI.getOccupancy() <<
".\n");
1348 unsigned NumRegionInstrs = std::distance(
DAG.begin(),
DAG.end());
1352 if (
DAG.begin() ==
DAG.end() ||
DAG.begin() == std::prev(
DAG.end()))
1358 <<
"\n From: " << *
DAG.begin() <<
" To: ";
1360 else dbgs() <<
"End";
1361 dbgs() <<
" RegionInstrs: " << NumRegionInstrs <<
'\n');
1369 for (
auto &
I :
DAG) {
1382 dbgs() <<
"Pressure before scheduling:\nRegion live-ins:"
1384 <<
"Region live-in pressure: "
1388 S.HasHighPressure =
false;
1410 unsigned DynamicVGPRBlockSize =
DAG.MFI.getDynamicVGPRBlockSize();
1413 unsigned CurrentTargetOccupancy =
1414 IsAnyRegionScheduled ?
DAG.MinOccupancy : TempTargetOccupancy;
1416 (CurrentTargetOccupancy <= InitialOccupancy ||
1417 DAG.Pressure[
RegionIdx].getOccupancy(
ST, DynamicVGPRBlockSize) !=
1424 if (!IsAnyRegionScheduled && IsSchedulingThisRegion) {
1425 IsAnyRegionScheduled =
true;
1426 if (
MFI.getMaxWavesPerEU() >
DAG.MinOccupancy) {
1427 DAG.MinOccupancy = TempTargetOccupancy;
1428 MFI.increaseOccupancy(
MF, TempTargetOccupancy);
1431 return IsSchedulingThisRegion;
1466 if (
S.HasHighPressure)
1488 unsigned DynamicVGPRBlockSize =
DAG.MFI.getDynamicVGPRBlockSize();
1499 unsigned TargetOccupancy = std::min(
1500 S.getTargetOccupancy(),
ST.getOccupancyWithWorkGroupSizes(
MF).second);
1501 unsigned WavesAfter = std::min(
1502 TargetOccupancy,
PressureAfter.getOccupancy(
ST, DynamicVGPRBlockSize));
1503 unsigned WavesBefore = std::min(
1505 LLVM_DEBUG(
dbgs() <<
"Occupancy before scheduling: " << WavesBefore
1506 <<
", after " << WavesAfter <<
".\n");
1512 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
1516 if (WavesAfter < WavesBefore && WavesAfter <
DAG.MinOccupancy &&
1517 WavesAfter >=
MFI.getMinAllowedOccupancy()) {
1518 LLVM_DEBUG(
dbgs() <<
"Function is memory bound, allow occupancy drop up to "
1519 <<
MFI.getMinAllowedOccupancy() <<
" waves\n");
1520 NewOccupancy = WavesAfter;
1523 if (NewOccupancy <
DAG.MinOccupancy) {
1524 DAG.MinOccupancy = NewOccupancy;
1525 MFI.limitOccupancy(
DAG.MinOccupancy);
1527 <<
DAG.MinOccupancy <<
".\n");
1531 unsigned MaxVGPRs =
ST.getMaxNumVGPRs(
MF);
1534 unsigned MaxArchVGPRs = std::min(MaxVGPRs,
ST.getAddressableNumArchVGPRs());
1535 unsigned MaxSGPRs =
ST.getMaxNumSGPRs(
MF);
1557 unsigned ReadyCycle = CurrCycle;
1558 for (
auto &
D : SU.
Preds) {
1559 if (
D.isAssignedRegDep()) {
1562 unsigned DefReady = ReadyCycles[
DAG.getSUnit(
DefMI)->NodeNum];
1563 ReadyCycle = std::max(ReadyCycle, DefReady +
Latency);
1566 ReadyCycles[SU.
NodeNum] = ReadyCycle;
1573 std::pair<MachineInstr *, unsigned>
B)
const {
1574 return A.second <
B.second;
1580 if (ReadyCycles.empty())
1582 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
1583 dbgs() <<
"\n################## Schedule time ReadyCycles for MBB : " << BBNum
1584 <<
" ##################\n# Cycle #\t\t\tInstruction "
1588 for (
auto &
I : ReadyCycles) {
1589 if (
I.second > IPrev + 1)
1590 dbgs() <<
"****************************** BUBBLE OF " <<
I.second - IPrev
1591 <<
" CYCLES DETECTED ******************************\n\n";
1592 dbgs() <<
"[ " <<
I.second <<
" ] : " << *
I.first <<
"\n";
1605 unsigned SumBubbles = 0;
1607 unsigned CurrCycle = 0;
1608 for (
auto &SU : InputSchedule) {
1609 unsigned ReadyCycle =
1611 SumBubbles += ReadyCycle - CurrCycle;
1613 ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));
1615 CurrCycle = ++ReadyCycle;
1638 unsigned SumBubbles = 0;
1640 unsigned CurrCycle = 0;
1641 for (
auto &
MI :
DAG) {
1645 unsigned ReadyCycle =
1647 SumBubbles += ReadyCycle - CurrCycle;
1649 ReadyCyclesSorted.insert(std::make_pair(SU->
getInstr(), ReadyCycle));
1651 CurrCycle = ++ReadyCycle;
1668 if (WavesAfter <
DAG.MinOccupancy)
1672 if (
DAG.MFI.isDynamicVGPREnabled()) {
1674 &
ST,
DAG.MFI.getDynamicVGPRBlockSize(),
1677 &
ST,
DAG.MFI.getDynamicVGPRBlockSize(),
1679 if (BlocksAfter > BlocksBefore)
1716 <<
"\n\t *** In shouldRevertScheduling ***\n"
1717 <<
" *********** BEFORE UnclusteredHighRPStage ***********\n");
1721 <<
"\n *********** AFTER UnclusteredHighRPStage ***********\n");
1723 unsigned OldMetric = MBefore.
getMetric();
1724 unsigned NewMetric = MAfter.
getMetric();
1725 unsigned WavesBefore = std::min(
1726 S.getTargetOccupancy(),
1733 LLVM_DEBUG(
dbgs() <<
"\tMetric before " << MBefore <<
"\tMetric after "
1734 << MAfter <<
"Profit: " << Profit <<
"\n");
1764 unsigned WavesAfter) {
1771 LLVM_DEBUG(
dbgs() <<
"New pressure will result in more spilling.\n");
1780 DAG.RegionEnd =
DAG.RegionBegin;
1781 int SkippedDebugInstr = 0;
1783 if (
MI->isDebugInstr()) {
1784 ++SkippedDebugInstr;
1788 if (
MI->getIterator() !=
DAG.RegionEnd) {
1790 if (!
MI->isDebugInstr())
1791 DAG.LIS->handleMove(*
MI,
true);
1795 for (
auto &
Op :
MI->all_defs())
1796 Op.setIsUndef(
false);
1799 if (!
MI->isDebugInstr()) {
1800 if (
DAG.ShouldTrackLaneMasks) {
1802 SlotIndex SlotIdx =
DAG.LIS->getInstructionIndex(*MI).getRegSlot();
1809 DAG.RegionEnd =
MI->getIterator();
1817 while (SkippedDebugInstr-- > 0)
1823 DAG.RegionBegin =
Unsched.front()->getIterator();
1824 if (
DAG.RegionBegin->isDebugInstr()) {
1826 if (
MI->isDebugInstr())
1828 DAG.RegionBegin =
MI->getIterator();
1835 DAG.placeDebugValues();
1840bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
1849 bool HasVectorRegisterExcess;
1851 auto ResetTargetRegions = [&]() {
1853 HasVectorRegisterExcess =
false;
1854 for (
unsigned I = 0, E =
DAG.Regions.
size();
I != E; ++
I) {
1866 HasVectorRegisterExcess |=
Target.hasVectorRegisterExcess();
1870 ResetTargetRegions();
1871 if (HasVectorRegisterExcess ||
DAG.MinOccupancy >=
MFI.getMaxWavesPerEU()) {
1874 TargetOcc = std::nullopt;
1878 TargetOcc =
DAG.MinOccupancy + 1;
1879 unsigned VGPRBlockSize =
1881 MaxSGPRs =
ST.getMaxNumSGPRs(*TargetOcc,
false);
1882 MaxVGPRs =
ST.getMaxNumVGPRs(*TargetOcc, VGPRBlockSize);
1883 ResetTargetRegions();
1886 dbgs() <<
"Analyzing ";
1887 MF.getFunction().printAsOperand(
dbgs(),
false);
1889 if (OptRegions.
empty()) {
1890 dbgs() <<
"no objective to achieve, occupancy is maximal at "
1891 <<
MFI.getMaxWavesPerEU();
1892 }
else if (!TargetOcc) {
1893 dbgs() <<
"reduce spilling (minimum target occupancy is "
1894 <<
MFI.getMinWavesPerEU() <<
')';
1896 dbgs() <<
"increase occupancy from " <<
DAG.MinOccupancy <<
" to "
1900 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
1901 if (
auto OptIt = OptRegions.
find(
I); OptIt != OptRegions.
end()) {
1907 if (OptRegions.
empty())
1914 auto ReduceRPInRegion = [&](
auto OptIt,
Register Reg, LaneBitmask
Mask,
1915 bool &Progress) ->
bool {
1916 GCNRPTarget &
Target = OptIt->getSecond();
1922 OptRegions.
erase(OptIt->getFirst());
1923 return OptRegions.
empty();
1928 DAG.RegionLiveOuts.buildLiveRegMap();
1931 DenseSet<unsigned> RematRegs;
1934 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
1939 if (!isReMaterializable(
DefMI))
1950 MachineInstr *
UseMI =
DAG.MRI.getOneNonDBGUser(
Reg);
1953 auto UseRegion = MIRegion.find(
UseMI);
1954 if (UseRegion != MIRegion.end() && UseRegion->second ==
I)
1963 if (Rematerializations.contains(
UseMI) ||
1965 return MO.isReg() && RematRegs.contains(MO.getReg());
1972 SlotIndex UseIdx =
DAG.LIS->getInstructionIndex(*UseMI).getRegSlot(
true);
1978 RematInstruction &Remat =
1979 Rematerializations.try_emplace(&
DefMI,
UseMI).first->second;
1981 bool RematUseful =
false;
1982 if (
auto It = OptRegions.
find(
I); It != OptRegions.
end()) {
1988 LaneBitmask
Mask =
DAG.RegionLiveOuts.getLiveRegsForRegionIdx(
I)[
Reg];
1989 if (ReduceRPInRegion(It,
Reg, Mask, RematUseful))
1993 for (
unsigned LIRegion = 0; LIRegion !=
E; ++LIRegion) {
1996 auto It =
DAG.LiveIns[LIRegion].find(
Reg);
1997 if (It ==
DAG.LiveIns[LIRegion].end() || It->second.none())
1999 Remat.LiveInRegions.insert(LIRegion);
2007 if (
auto It = OptRegions.
find(LIRegion); It != OptRegions.
end()) {
2009 if (ReduceRPInRegion(It,
Reg,
DAG.LiveIns[LIRegion][
Reg],
2018 Rematerializations.pop_back();
2019 REMAT_DEBUG(
dbgs() <<
" No impact, not rematerializing instruction\n");
2029 Rematerializations.clear();
2033 return !Rematerializations.empty();
2036void PreRARematStage::rematerialize() {
2037 const SIInstrInfo *
TII =
MF.getSubtarget<GCNSubtarget>().getInstrInfo();
2041 DenseSet<unsigned> RecomputeRP;
2044 for (
auto &[
DefMI, Remat] : Rematerializations) {
2047 unsigned DefRegion = MIRegion.at(
DefMI);
2050 TII->reMaterialize(*InsertPos->getParent(), InsertPos,
Reg,
2051 AMDGPU::NoSubRegister, *
DefMI);
2052 Remat.RematMI = &*std::prev(InsertPos);
2053 DAG.LIS->InsertMachineInstrInMaps(*Remat.RematMI);
2058 DAG.updateRegionBoundaries(
DAG.Regions[DefRegion],
DefMI,
nullptr);
2059 auto UseRegion = MIRegion.find(Remat.UseMI);
2060 if (UseRegion != MIRegion.end()) {
2061 DAG.updateRegionBoundaries(
DAG.Regions[UseRegion->second], InsertPos,
2064 DAG.LIS->RemoveMachineInstrFromMaps(*
DefMI);
2069 for (
unsigned I : Remat.LiveInRegions) {
2070 ImpactedRegions.insert({
I,
DAG.Pressure[
I]});
2073#ifdef EXPENSIVE_CHECKS
2084 LiveInterval &LI =
DAG.LIS->getInterval(
UseReg);
2085 LaneBitmask LM =
DAG.MRI.getMaxLaneMaskForVReg(MO.
getReg());
2089 LaneBitmask LiveInMask = RegionLiveIns.
at(
UseReg);
2090 LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);
2094 if (UncoveredLanes.
any()) {
2096 for (LiveInterval::SubRange &SR : LI.
subranges())
2097 assert((SR.LaneMask & UncoveredLanes).none());
2107 LaneBitmask PrevMask = RegionLiveIns[
Reg];
2109 RegMasks.insert({{
I, Remat.RematMI->getOperand(0).getReg()}, PrevMask});
2110 if (Remat.UseMI->getParent() !=
DAG.Regions[
I].first->getParent())
2117 ImpactedRegions.insert({DefRegion,
DAG.Pressure[DefRegion]});
2118 RecomputeRP.
insert(DefRegion);
2121 Register RematReg = Remat.RematMI->getOperand(0).getReg();
2122 DAG.LIS->removeInterval(RematReg);
2123 DAG.LIS->createAndComputeVirtRegInterval(RematReg);
2129 unsigned DynamicVGPRBlockSize =
2131 AchievedOcc =
MFI.getMaxWavesPerEU();
2132 for (
auto &[
I, OriginalRP] : ImpactedRegions) {
2133 bool IsEmptyRegion =
DAG.Regions[
I].first ==
DAG.Regions[
I].second;
2134 RescheduleRegions[
I] = !IsEmptyRegion;
2139 if (IsEmptyRegion) {
2142 GCNDownwardRPTracker RPT(*
DAG.LIS);
2144 DAG.Regions[
I].second);
2145 if (NonDbgMI ==
DAG.Regions[
I].second) {
2149 RPT.reset(*NonDbgMI, &
DAG.LiveIns[
I]);
2150 RPT.advance(
DAG.Regions[
I].second);
2151 RP = RPT.moveMaxPressure();
2156 std::min(AchievedOcc,
RP.getOccupancy(
ST, DynamicVGPRBlockSize));
2162bool PreRARematStage::isReMaterializable(
const MachineInstr &
MI) {
2163 if (!
DAG.TII->isReMaterializable(
MI))
2166 for (
const MachineOperand &MO :
MI.all_uses()) {
2170 if (
DAG.MRI.isConstantPhysReg(MO.
getReg()) ||
DAG.TII->isIgnorableUse(MO))
2185 unsigned MaxOcc = std::max(AchievedOcc,
DAG.MinOccupancy);
2186 if (!TargetOcc || MaxOcc >= *TargetOcc)
2190 const SIInstrInfo *
TII =
MF.getSubtarget<GCNSubtarget>().getInstrInfo();
2193 for (
const auto &[
DefMI, Remat] : Rematerializations) {
2194 MachineInstr &RematMI = *Remat.RematMI;
2195 unsigned DefRegion = MIRegion.at(
DefMI);
2197 MachineBasicBlock *
MBB = RegionBB[DefRegion];
2203 TII->reMaterialize(*
MBB, InsertPos,
Reg, AMDGPU::NoSubRegister, RematMI);
2204 MachineInstr *NewMI = &*std::prev(InsertPos);
2205 DAG.LIS->InsertMachineInstrInMaps(*NewMI);
2207 auto UseRegion = MIRegion.find(Remat.UseMI);
2208 if (UseRegion != MIRegion.end()) {
2209 DAG.updateRegionBoundaries(
DAG.Regions[UseRegion->second], RematMI,
2212 DAG.updateRegionBoundaries(
DAG.Regions[DefRegion], InsertPos, NewMI);
2215 DAG.LIS->RemoveMachineInstrFromMaps(RematMI);
2219 DAG.LIS->removeInterval(
Reg);
2220 DAG.LIS->createAndComputeVirtRegInterval(
Reg);
2223 for (
unsigned LIRegion : Remat.LiveInRegions)
2224 DAG.LiveIns[LIRegion].insert({
Reg, RegMasks.at({LIRegion,
Reg})});
2228 for (
auto &[
I, OriginalRP] : ImpactedRegions)
2229 DAG.Pressure[
I] = OriginalRP;
2234void GCNScheduleDAGMILive::updateRegionBoundaries(
2237 assert((!NewMI || NewMI != RegionBounds.second) &&
2238 "cannot remove at region end");
2240 if (RegionBounds.first == RegionBounds.second) {
2241 assert(NewMI &&
"cannot remove from an empty region");
2242 RegionBounds.first = NewMI;
2248 if (
MI != RegionBounds.first)
2251 RegionBounds.first = std::next(
MI);
2253 RegionBounds.first = NewMI;
2270 if (HasIGLPInstrs) {
2271 SavedMutations.clear();
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
static cl::opt< bool > GCNTrackers("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false))
static cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))
#define REMAT_PREFIX
Allows to easily filter for this stage's debug output.
static MachineInstr * getLastMIForRegion(MachineBasicBlock::iterator RegionBegin, MachineBasicBlock::iterator RegionEnd)
static bool shouldCheckPending(SchedBoundary &Zone, const TargetSchedModel *SchedModel)
static cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))
static cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))
static void printScheduleModel(std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)
static cl::opt< bool > PrintMaxRPRegUsageAfterScheduler("amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure after scheduling."), cl::init(false))
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG)
static bool canUsePressureDiffs(const SUnit &SU)
Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.
static cl::opt< unsigned > PendingQueueLimit("amdgpu-scheduler-pending-queue-limit", cl::Hidden, cl::desc("Max (Available+Pending) size to inspect pending queue (0 disables)"), cl::init(256))
static void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)
static cl::opt< bool > PrintMaxRPRegUsageBeforeScheduler("amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure before scheduling."), cl::init(false))
static SUnit * pickOnlyChoice(SchedBoundary &Zone, const TargetSchedModel *SchedModel)
static cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
Promote Memory to Register
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool initGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
ValueT & at(const_arg_type_t< KeyT > Val)
at - Return the entry for the specified key, or abort if no such entry exists.
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
GCNRegPressure bumpDownwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI) const
Mostly copy/paste from CodeGen/RegisterPressure.cpp Calculate the impact MI will have on CurPressure ...
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
Models a register pressure target, allowing to evaluate and track register savings against that targe...
GCNRegPressure getPressure() const
DenseMap< unsigned, LaneBitmask > LiveRegSet
virtual bool initGCNRegion()
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
MachineBasicBlock * CurrentMBB
This is a minimal scheduler strategy.
GCNDownwardRPTracker DownwardTracker
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
unsigned SGPRCriticalLimit
std::vector< unsigned > MaxPressure
bool hasNextStage() const
SUnit * pickNodeBidirectional(bool &IsTopNode, bool &PickedPending)
GCNSchedStageID getCurrentStage()
bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Evaluates instructions in the pending queue using a subset of scheduling heuristics.
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
unsigned VGPRCriticalLimit
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
GCNDownwardRPTracker * getDownwardTracker()
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
GCNUpwardRPTracker UpwardTracker
void printCandidateDecision(const SchedCandidate &Current, const SchedCandidate &Preferred)
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool &IsPending, bool IsBottomUp)
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
GCNUpwardRPTracker * getUpwardTracker()
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
void recede(const MachineInstr &MI)
Move to the state of RP just before the MI .
void traceCandidate(const SchedCandidate &Cand)
LLVM_ABI void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
MachineSchedPolicy RegionPolicy
const TargetSchedModel * SchedModel
const MachineSchedContext * Context
const TargetRegisterInfo * TRI
SchedCandidate BotCand
Candidate last picked from Bot boundary.
SchedCandidate TopCand
Candidate last picked from Top boundary.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
GenericScheduler(const MachineSchedContext *C)
bool shouldRevertScheduling(unsigned WavesAfter) override
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
LLVM_ABI void dump() const
succ_iterator succ_begin()
unsigned succ_size() const
MachineInstrBundleIterator< MachineInstr > iterator
Function & getFunction()
Return the LLVM function that this machine code represents.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
unsigned getSubReg() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
bool initGCNSchedStage() override
Capture a change in pressure for a single pressure set.
Helpers for implementing custom MachineSchedStrategy classes.
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void advance()
Advance across the current instruction.
LLVM_ABI void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
LLVM_ABI void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
RegionT * getParent() const
Get the parent of the Region.
List of registers defined and used by a machine instruction.
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
LLVM_ABI void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
LLVM_ABI void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
bool isIGLPMutationOnly(unsigned Opcode) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getOccupancy() const
unsigned getDynamicVGPRBlockSize() const
unsigned getMinAllowedOccupancy() const
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
unsigned NodeNum
Entry # of node in the node vector.
unsigned short Latency
Node latency.
bool isScheduled
True once scheduled.
unsigned ParentClusterIdx
The parent cluster id.
unsigned BotReadyCycle
Cycle relative to end when node is ready.
bool isBottomReady() const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
LLVM_ABI void releasePending()
Release pending ready nodes in to the available queue.
LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
LLVM_ABI SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
LLVM_ABI void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LLVM_ABI bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
A ScheduleDAG for scheduling lists of MachineInstr.
bool ScheduleSingleMIRegions
True if regions with a single MI should be scheduled.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
const MachineLoopInfo * MLI
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
RegPressureTracker RPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
MachineFunction & MF
Machine function.
static const unsigned ScaleFactor
unsigned getMetric() const
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Provide an instruction scheduling machine model to CodeGen passes.
LLVM_ABI bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
Target - Wrapper for Target specific information.
bool initGCNSchedStage() override
bool initGCNRegion() override
void finalizeGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
static bool allUsesAvailableAt(const MachineInstr *MI, SlotIndex UseIdx, const LiveIntervals &LIS, const MachineRegisterInfo &MRI, const TargetInstrInfo &TII)
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
unsigned getDynamicVGPRBlockSize(const Function &F)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop)
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)
Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > RegionBoundaries
A region's boundaries i.e.
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
@ UnclusteredHighRPReschedule
@ MemoryClauseInitialSchedule
@ ClusteredLowOccupancyReschedule
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
LLVM_ABI cl::opt< bool > VerifyScheduling
LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool isTheSameCluster(unsigned A, unsigned B)
Return whether the input cluster ID's are the same and valid.
DWARFExpression::Operation Op
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
LLVM_ABI void dumpMaxRegPressure(MachineFunction &MF, GCNRegPressure::RegKind Kind, LiveIntervals &LIS, const MachineLoopInfo *MLI)
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Implement std::hash so that hash_code can be used in STL containers.
bool operator()(std::pair< MachineInstr *, unsigned > A, std::pair< MachineInstr *, unsigned > B) const
unsigned getArchVGPRNum() const
unsigned getAGPRNum() const
unsigned getSGPRNum() const
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void setBest(SchedCandidate &Best)
void reset(const CandPolicy &NewPolicy)
LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SchedResourceDelta ResDelta
Status of an instruction's critical resource consumption.
unsigned DemandedResources
constexpr bool any() const
static constexpr LaneBitmask getNone()
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
PressureChange CriticalMax
PressureChange CurrentMax