37#define DEBUG_TYPE "machine-scheduler"
42 "amdgpu-disable-unclustered-high-rp-reschedule",
cl::Hidden,
43 cl::desc(
"Disable unclustered high register pressure "
44 "reduction scheduling stage."),
48 "amdgpu-disable-clustered-low-occupancy-reschedule",
cl::Hidden,
49 cl::desc(
"Disable clustered low occupancy "
50 "rescheduling for ILP scheduling stage."),
56 "Sets the bias which adds weight to occupancy vs latency. Set it to "
57 "100 to chase the occupancy only."),
62 cl::desc(
"Relax occupancy targets for kernels which are memory "
63 "bound (amdgpu-membound-threshold), or "
64 "Wave Limited (amdgpu-limit-wave-threshold)."),
69 cl::desc(
"Use the AMDGPU specific RPTrackers during scheduling"),
72#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
73#define DUMP_MAX_REG_PRESSURE
75 "amdgpu-print-max-reg-pressure-regusage-before-scheduler",
cl::Hidden,
76 cl::desc(
"Print a list of live registers along with their def/uses at the "
77 "point of maximum register pressure before scheduling."),
81 "amdgpu-print-max-reg-pressure-regusage-after-scheduler",
cl::Hidden,
82 cl::desc(
"Print a list of live registers along with their def/uses at the "
83 "point of maximum register pressure after scheduling."),
102 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
104 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
126 "VGPRCriticalLimit calculation method.\n");
130 unsigned Addressable =
133 VGPRBudget = std::max(VGPRBudget, Granule);
172 if (!
Op.isReg() ||
Op.isImplicit())
174 if (
Op.getReg().isPhysical() ||
175 (
Op.isDef() &&
Op.getSubReg() != AMDGPU::NoSubRegister))
183 std::vector<unsigned> &Pressure, std::vector<unsigned> &MaxPressure,
198 Pressure.resize(4, 0);
209 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = NewPressure.
getSGPRNum();
210 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
212 Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = NewPressure.
getAGPRNum();
219 unsigned SGPRPressure,
220 unsigned VGPRPressure,
bool IsBottomUp) {
224 if (!
DAG->isTrackingPressure())
247 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
248 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
250 for (
const auto &Diff :
DAG->getPressureDiff(SU)) {
256 (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());
259#ifdef EXPENSIVE_CHECKS
260 std::vector<unsigned> CheckPressure, CheckMaxPressure;
263 if (
Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
264 CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
265 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
266 CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
267 errs() <<
"Register Pressure is inaccurate when calculated through "
269 <<
"SGPR got " <<
Pressure[AMDGPU::RegisterPressureSets::SReg_32]
271 << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
"\n"
272 <<
"VGPR got " <<
Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
274 << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
"\n";
280 unsigned NewSGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
281 unsigned NewVGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
291 const unsigned MaxVGPRPressureInc = 16;
292 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >=
VGPRExcessLimit;
293 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >=
SGPRExcessLimit;
324 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
326 if (SGPRDelta > VGPRDelta) {
347 unsigned SGPRPressure = 0;
348 unsigned VGPRPressure = 0;
349 if (
DAG->isTrackingPressure()) {
351 SGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
352 VGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
357 SGPRPressure =
T->getPressure().getSGPRNum();
358 VGPRPressure =
T->getPressure().getArchVGPRNum();
362 for (
SUnit *SU : Q) {
366 VGPRPressure, IsBottomUp);
385 if (
SUnit *SU =
Bot.pickOnlyChoice()) {
389 if (
SUnit *SU =
Top.pickOnlyChoice()) {
419 "Last pick result should correspond to re-picking right now");
441 "Last pick result should correspond to re-picking right now");
457 IsTopNode = Cand.AtTop;
464 if (
DAG->top() ==
DAG->bottom()) {
466 Bot.Available.empty() &&
Bot.Pending.empty() &&
"ReadyQ garbage");
472 SU =
Top.pickOnlyChoice();
483 SU =
Bot.pickOnlyChoice();
568 if (
DAG->isTrackingPressure() &&
578 bool SameBoundary = Zone !=
nullptr;
613 bool CandIsClusterSucc =
615 bool TryCandIsClusterSucc =
617 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
622 if (
DAG->isTrackingPressure() &&
628 if (
DAG->isTrackingPressure() &&
674 if (
DAG->isTrackingPressure()) {
690 bool CandIsClusterSucc =
692 bool TryCandIsClusterSucc =
694 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
703 bool SameBoundary = Zone !=
nullptr;
720 if (TryMayLoad || CandMayLoad) {
721 bool TryLongLatency =
723 bool CandLongLatency =
727 Zone->
isTop() ? CandLongLatency : TryLongLatency, TryCand,
745 if (
DAG->isTrackingPressure() &&
764 !
Rem.IsAcyclicLatencyLimited &&
tryLatency(TryCand, Cand, *Zone))
782 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy),
783 RegionLiveOuts(this,
true) {
789 LLVM_DEBUG(
dbgs() <<
"Starting occupancy is " << StartingOccupancy <<
".\n");
791 MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy);
792 if (MinOccupancy != StartingOccupancy)
793 LLVM_DEBUG(
dbgs() <<
"Allowing Occupancy drops to " << MinOccupancy
798std::unique_ptr<GCNSchedStage>
800 switch (SchedStageID) {
802 return std::make_unique<OccInitialScheduleStage>(SchedStageID, *
this);
804 return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *
this);
806 return std::make_unique<ClusteredLowOccStage>(SchedStageID, *
this);
808 return std::make_unique<PreRARematStage>(SchedStageID, *
this);
810 return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *
this);
812 return std::make_unique<MemoryClauseInitialScheduleStage>(SchedStageID,
826GCNScheduleDAGMILive::getRealRegPressure(
unsigned RegionIdx)
const {
829 &LiveIns[RegionIdx]);
835 auto REnd = RegionEnd == RegionBegin->getParent()->end()
836 ? std::prev(RegionEnd)
841void GCNScheduleDAGMILive::computeBlockPressure(
unsigned RegionIdx,
853 const MachineBasicBlock *OnlySucc =
nullptr;
856 if (!Candidate->empty() && Candidate->pred_size() == 1) {
857 SlotIndexes *Ind =
LIS->getSlotIndexes();
859 OnlySucc = Candidate;
864 size_t CurRegion = RegionIdx;
865 for (
size_t E = Regions.size(); CurRegion !=
E; ++CurRegion)
866 if (Regions[CurRegion].first->getParent() !=
MBB)
871 auto LiveInIt = MBBLiveIns.find(
MBB);
872 auto &Rgn = Regions[CurRegion];
874 if (LiveInIt != MBBLiveIns.end()) {
875 auto LiveIn = std::move(LiveInIt->second);
877 MBBLiveIns.erase(LiveInIt);
880 auto LRS = BBLiveInMap.lookup(NonDbgMI);
881#ifdef EXPENSIVE_CHECKS
890 if (Regions[CurRegion].first ==
I || NonDbgMI ==
I) {
891 LiveIns[CurRegion] =
RPTracker.getLiveRegs();
895 if (Regions[CurRegion].second ==
I) {
896 Pressure[CurRegion] =
RPTracker.moveMaxPressure();
897 if (CurRegion-- == RegionIdx)
899 auto &Rgn = Regions[CurRegion];
912 MBBLiveIns[OnlySucc] =
RPTracker.moveLiveRegs();
917GCNScheduleDAGMILive::getRegionLiveInMap()
const {
919 std::vector<MachineInstr *> RegionFirstMIs;
920 RegionFirstMIs.reserve(Regions.size());
922 RegionFirstMIs.push_back(
929GCNScheduleDAGMILive::getRegionLiveOutMap()
const {
931 std::vector<MachineInstr *> RegionLastMIs;
932 RegionLastMIs.reserve(Regions.size());
940 IdxToInstruction.clear();
943 IsLiveOut ? DAG->getRegionLiveOutMap() : DAG->getRegionLiveInMap();
944 for (
unsigned I = 0;
I < DAG->Regions.size();
I++) {
948 : &*DAG->Regions[
I].first;
949 IdxToInstruction[
I] = RegionKey;
957 LiveIns.resize(Regions.size());
958 Pressure.resize(Regions.size());
959 RegionsWithHighRP.resize(Regions.size());
960 RegionsWithExcessRP.resize(Regions.size());
961 RegionsWithIGLPInstrs.resize(Regions.size());
962 RegionsWithHighRP.reset();
963 RegionsWithExcessRP.reset();
964 RegionsWithIGLPInstrs.reset();
969void GCNScheduleDAGMILive::runSchedStages() {
970 LLVM_DEBUG(
dbgs() <<
"All regions recorded, starting actual scheduling.\n");
972 if (!Regions.
empty()) {
973 BBLiveInMap = getRegionLiveInMap();
978#ifdef DUMP_MAX_REG_PRESSURE
986 GCNSchedStrategy &S =
static_cast<GCNSchedStrategy &
>(*SchedImpl);
989 if (!Stage->initGCNSchedStage())
992 for (
auto Region : Regions) {
996 if (!Stage->initGCNRegion()) {
997 Stage->advanceRegion();
1006 &LiveIns[Stage->getRegionIdx()];
1008 reinterpret_cast<GCNRPTracker *
>(DownwardTracker)
1009 ->reset(
MRI, *RegionLiveIns);
1010 reinterpret_cast<GCNRPTracker *
>(UpwardTracker)
1011 ->reset(
MRI, RegionLiveOuts.getLiveRegsForRegionIdx(
1012 Stage->getRegionIdx()));
1016 Stage->finalizeGCNRegion();
1019 Stage->finalizeGCNSchedStage();
1022#ifdef DUMP_MAX_REG_PRESSURE
1035 OS <<
"Max Occupancy Initial Schedule";
1038 OS <<
"Unclustered High Register Pressure Reschedule";
1041 OS <<
"Clustered Low Occupancy Reschedule";
1044 OS <<
"Pre-RA Rematerialize";
1047 OS <<
"Max ILP Initial Schedule";
1050 OS <<
"Max memory clause Initial Schedule";
1077 if (
DAG.RegionsWithHighRP.none() &&
DAG.RegionsWithExcessRP.none())
1084 InitialOccupancy =
DAG.MinOccupancy;
1087 S.SGPRLimitBias =
S.HighRPSGPRBias;
1088 S.VGPRLimitBias =
S.HighRPVGPRBias;
1089 if (
MFI.getMaxWavesPerEU() >
DAG.MinOccupancy)
1090 MFI.increaseOccupancy(
MF, ++
DAG.MinOccupancy);
1094 <<
"Retrying function scheduling without clustering. "
1095 "Aggressivly try to reduce register pressure to achieve occupancy "
1096 <<
DAG.MinOccupancy <<
".\n");
1111 if (
DAG.StartingOccupancy <=
DAG.MinOccupancy)
1115 dbgs() <<
"Retrying function scheduling with lowest recorded occupancy "
1116 <<
DAG.MinOccupancy <<
".\n");
1121#define REMAT_PREFIX "[PreRARemat] "
1122#define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)
1135 const unsigned NumRegions =
DAG.Regions.size();
1136 RegionBB.reserve(NumRegions);
1137 for (
unsigned I = 0;
I < NumRegions; ++
I) {
1140 MIRegion.insert({&*
MI,
I});
1144 if (!canIncreaseOccupancyOrReduceSpill())
1150 DAG.RegionLiveOuts.buildLiveRegMap();
1152 dbgs() <<
"Retrying function scheduling with new min. occupancy of "
1153 << AchievedOcc <<
" from rematerializing (original was "
1154 <<
DAG.MinOccupancy;
1156 dbgs() <<
", target was " << *TargetOcc;
1160 if (AchievedOcc >
DAG.MinOccupancy) {
1161 DAG.MinOccupancy = AchievedOcc;
1163 MFI.increaseOccupancy(
MF,
DAG.MinOccupancy);
1175 S.SGPRLimitBias =
S.VGPRLimitBias = 0;
1176 if (
DAG.MinOccupancy > InitialOccupancy) {
1178 <<
" stage successfully increased occupancy to "
1179 <<
DAG.MinOccupancy <<
'\n');
1190 unsigned NumRegionInstrs = std::distance(
DAG.begin(),
DAG.end());
1194 if (
DAG.begin() ==
DAG.end() ||
DAG.begin() == std::prev(
DAG.end()))
1200 <<
"\n From: " << *
DAG.begin() <<
" To: ";
1202 else dbgs() <<
"End";
1203 dbgs() <<
" RegionInstrs: " << NumRegionInstrs <<
'\n');
1211 for (
auto &
I :
DAG) {
1224 dbgs() <<
"Pressure before scheduling:\nRegion live-ins:"
1226 <<
"Region live-in pressure: "
1230 S.HasHighPressure =
false;
1252 unsigned DynamicVGPRBlockSize =
DAG.MFI.getDynamicVGPRBlockSize();
1254 (
DAG.MinOccupancy <= InitialOccupancy ||
1255 DAG.Pressure[
RegionIdx].getOccupancy(
ST, DynamicVGPRBlockSize) !=
1294 if (
S.HasHighPressure)
1316 unsigned DynamicVGPRBlockSize =
DAG.MFI.getDynamicVGPRBlockSize();
1327 unsigned TargetOccupancy = std::min(
1328 S.getTargetOccupancy(),
ST.getOccupancyWithWorkGroupSizes(
MF).second);
1329 unsigned WavesAfter = std::min(
1330 TargetOccupancy,
PressureAfter.getOccupancy(
ST, DynamicVGPRBlockSize));
1331 unsigned WavesBefore = std::min(
1333 LLVM_DEBUG(
dbgs() <<
"Occupancy before scheduling: " << WavesBefore
1334 <<
", after " << WavesAfter <<
".\n");
1340 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
1344 if (WavesAfter < WavesBefore && WavesAfter <
DAG.MinOccupancy &&
1345 WavesAfter >=
MFI.getMinAllowedOccupancy()) {
1346 LLVM_DEBUG(
dbgs() <<
"Function is memory bound, allow occupancy drop up to "
1347 <<
MFI.getMinAllowedOccupancy() <<
" waves\n");
1348 NewOccupancy = WavesAfter;
1351 if (NewOccupancy <
DAG.MinOccupancy) {
1352 DAG.MinOccupancy = NewOccupancy;
1353 MFI.limitOccupancy(
DAG.MinOccupancy);
1355 <<
DAG.MinOccupancy <<
".\n");
1359 unsigned MaxVGPRs =
ST.getMaxNumVGPRs(
MF);
1362 unsigned MaxArchVGPRs = std::min(MaxVGPRs,
ST.getAddressableNumArchVGPRs());
1363 unsigned MaxSGPRs =
ST.getMaxNumSGPRs(
MF);
1385 unsigned ReadyCycle = CurrCycle;
1386 for (
auto &
D : SU.
Preds) {
1387 if (
D.isAssignedRegDep()) {
1390 unsigned DefReady = ReadyCycles[
DAG.getSUnit(
DefMI)->NodeNum];
1391 ReadyCycle = std::max(ReadyCycle, DefReady +
Latency);
1394 ReadyCycles[SU.
NodeNum] = ReadyCycle;
1401 std::pair<MachineInstr *, unsigned>
B)
const {
1402 return A.second <
B.second;
1408 if (ReadyCycles.empty())
1410 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
1411 dbgs() <<
"\n################## Schedule time ReadyCycles for MBB : " << BBNum
1412 <<
" ##################\n# Cycle #\t\t\tInstruction "
1416 for (
auto &
I : ReadyCycles) {
1417 if (
I.second > IPrev + 1)
1418 dbgs() <<
"****************************** BUBBLE OF " <<
I.second - IPrev
1419 <<
" CYCLES DETECTED ******************************\n\n";
1420 dbgs() <<
"[ " <<
I.second <<
" ] : " << *
I.first <<
"\n";
1433 unsigned SumBubbles = 0;
1435 unsigned CurrCycle = 0;
1436 for (
auto &SU : InputSchedule) {
1437 unsigned ReadyCycle =
1439 SumBubbles += ReadyCycle - CurrCycle;
1441 ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));
1443 CurrCycle = ++ReadyCycle;
1466 unsigned SumBubbles = 0;
1468 unsigned CurrCycle = 0;
1469 for (
auto &
MI :
DAG) {
1473 unsigned ReadyCycle =
1475 SumBubbles += ReadyCycle - CurrCycle;
1477 ReadyCyclesSorted.insert(std::make_pair(SU->
getInstr(), ReadyCycle));
1479 CurrCycle = ++ReadyCycle;
1496 if (WavesAfter <
DAG.MinOccupancy)
1500 if (
DAG.MFI.isDynamicVGPREnabled()) {
1502 &
ST,
DAG.MFI.getDynamicVGPRBlockSize(),
1505 &
ST,
DAG.MFI.getDynamicVGPRBlockSize(),
1507 if (BlocksAfter > BlocksBefore)
1544 <<
"\n\t *** In shouldRevertScheduling ***\n"
1545 <<
" *********** BEFORE UnclusteredHighRPStage ***********\n");
1549 <<
"\n *********** AFTER UnclusteredHighRPStage ***********\n");
1551 unsigned OldMetric = MBefore.
getMetric();
1552 unsigned NewMetric = MAfter.
getMetric();
1553 unsigned WavesBefore = std::min(
1554 S.getTargetOccupancy(),
1561 LLVM_DEBUG(
dbgs() <<
"\tMetric before " << MBefore <<
"\tMetric after "
1562 << MAfter <<
"Profit: " << Profit <<
"\n");
1592 unsigned WavesAfter) {
1599 LLVM_DEBUG(
dbgs() <<
"New pressure will result in more spilling.\n");
1608 DAG.RegionEnd =
DAG.RegionBegin;
1609 int SkippedDebugInstr = 0;
1611 if (
MI->isDebugInstr()) {
1612 ++SkippedDebugInstr;
1616 if (
MI->getIterator() !=
DAG.RegionEnd) {
1618 if (!
MI->isDebugInstr())
1619 DAG.LIS->handleMove(*
MI,
true);
1623 for (
auto &
Op :
MI->all_defs())
1624 Op.setIsUndef(
false);
1627 if (!
MI->isDebugInstr()) {
1628 if (
DAG.ShouldTrackLaneMasks) {
1630 SlotIndex SlotIdx =
DAG.LIS->getInstructionIndex(*MI).getRegSlot();
1637 DAG.RegionEnd =
MI->getIterator();
1645 while (SkippedDebugInstr-- > 0)
1651 DAG.RegionBegin =
Unsched.front()->getIterator();
1652 if (
DAG.RegionBegin->isDebugInstr()) {
1654 if (
MI->isDebugInstr())
1656 DAG.RegionBegin =
MI->getIterator();
1663 DAG.placeDebugValues();
1668bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
1677 auto ResetTargetRegions = [&]() {
1679 for (
unsigned I = 0, E =
DAG.Regions.
size();
I != E; ++
I) {
1687 ResetTargetRegions();
1688 if (!OptRegions.
empty() ||
DAG.MinOccupancy >=
MFI.getMaxWavesPerEU()) {
1691 TargetOcc = std::nullopt;
1695 TargetOcc =
DAG.MinOccupancy + 1;
1696 unsigned VGPRBlockSize =
1698 MaxSGPRs =
ST.getMaxNumSGPRs(*TargetOcc,
false);
1699 MaxVGPRs =
ST.getMaxNumVGPRs(*TargetOcc, VGPRBlockSize);
1700 ResetTargetRegions();
1703 dbgs() <<
"Analyzing ";
1704 MF.getFunction().printAsOperand(
dbgs(),
false);
1706 if (OptRegions.
empty()) {
1707 dbgs() <<
"no objective to achieve, occupancy is maximal at "
1708 <<
MFI.getMaxWavesPerEU();
1709 }
else if (!TargetOcc) {
1710 dbgs() <<
"reduce spilling (minimum target occupancy is "
1711 <<
MFI.getMinWavesPerEU() <<
')';
1713 dbgs() <<
"increase occupancy from " <<
DAG.MinOccupancy <<
" to "
1717 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
1718 if (
auto OptIt = OptRegions.
find(
I); OptIt != OptRegions.
end()) {
1724 if (OptRegions.
empty())
1731 auto ReduceRPInRegion = [&](
auto OptIt,
Register Reg, LaneBitmask
Mask,
1732 bool &Progress) ->
bool {
1733 GCNRPTarget &
Target = OptIt->getSecond();
1739 OptRegions.
erase(OptIt->getFirst());
1740 return OptRegions.
empty();
1745 DAG.RegionLiveOuts.buildLiveRegMap();
1748 DenseSet<unsigned> RematRegs;
1751 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
1756 if (!isReMaterializable(
DefMI))
1767 MachineInstr *
UseMI =
DAG.MRI.getOneNonDBGUser(
Reg);
1770 auto UseRegion = MIRegion.find(
UseMI);
1771 if (UseRegion != MIRegion.end() && UseRegion->second ==
I)
1780 if (Rematerializations.contains(
UseMI) ||
1782 return MO.isReg() && RematRegs.contains(MO.getReg());
1789 SlotIndex UseIdx =
DAG.LIS->getInstructionIndex(*UseMI).getRegSlot(
true);
1795 RematInstruction &Remat =
1796 Rematerializations.try_emplace(&
DefMI,
UseMI).first->second;
1798 bool RematUseful =
false;
1799 if (
auto It = OptRegions.
find(
I); It != OptRegions.
end()) {
1805 LaneBitmask
Mask =
DAG.RegionLiveOuts.getLiveRegsForRegionIdx(
I)[
Reg];
1806 if (ReduceRPInRegion(It,
Reg, Mask, RematUseful))
1810 for (
unsigned LIRegion = 0; LIRegion !=
E; ++LIRegion) {
1813 auto It =
DAG.LiveIns[LIRegion].find(
Reg);
1814 if (It ==
DAG.LiveIns[LIRegion].end() || It->second.none())
1816 Remat.LiveInRegions.insert(LIRegion);
1824 if (
auto It = OptRegions.
find(LIRegion); It != OptRegions.
end()) {
1826 if (ReduceRPInRegion(It,
Reg,
DAG.LiveIns[LIRegion][
Reg],
1835 Rematerializations.pop_back();
1836 REMAT_DEBUG(
dbgs() <<
" No impact, not rematerializing instruction\n");
1846 Rematerializations.clear();
1850 return !Rematerializations.empty();
1853void PreRARematStage::rematerialize() {
1854 const SIInstrInfo *
TII =
MF.getSubtarget<GCNSubtarget>().getInstrInfo();
1858 DenseSet<unsigned> RecomputeRP;
1861 for (
auto &[
DefMI, Remat] : Rematerializations) {
1864 unsigned DefRegion = MIRegion.at(
DefMI);
1867 TII->reMaterialize(*InsertPos->getParent(), InsertPos,
Reg,
1868 AMDGPU::NoSubRegister, *
DefMI, *
DAG.TRI);
1869 Remat.RematMI = &*std::prev(InsertPos);
1870 DAG.LIS->InsertMachineInstrInMaps(*Remat.RematMI);
1875 DAG.updateRegionBoundaries(
DAG.Regions[DefRegion],
DefMI,
nullptr);
1876 auto UseRegion = MIRegion.find(Remat.UseMI);
1877 if (UseRegion != MIRegion.end()) {
1878 DAG.updateRegionBoundaries(
DAG.Regions[UseRegion->second], InsertPos,
1881 DAG.LIS->RemoveMachineInstrFromMaps(*
DefMI);
1886 for (
unsigned I : Remat.LiveInRegions) {
1887 ImpactedRegions.insert({
I,
DAG.Pressure[
I]});
1890#ifdef EXPENSIVE_CHECKS
1901 LiveInterval &LI =
DAG.LIS->getInterval(
UseReg);
1902 LaneBitmask LM =
DAG.MRI.getMaxLaneMaskForVReg(MO.
getReg());
1906 LaneBitmask LiveInMask = RegionLiveIns.
at(
UseReg);
1907 LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);
1911 if (UncoveredLanes.
any()) {
1913 for (LiveInterval::SubRange &SR : LI.
subranges())
1914 assert((SR.LaneMask & UncoveredLanes).none());
1924 LaneBitmask PrevMask = RegionLiveIns[
Reg];
1926 RegMasks.insert({{
I, Remat.RematMI->getOperand(0).getReg()}, PrevMask});
1927 if (Remat.UseMI->getParent() !=
DAG.Regions[
I].first->getParent())
1934 ImpactedRegions.insert({DefRegion,
DAG.Pressure[DefRegion]});
1935 RecomputeRP.
insert(DefRegion);
1938 Register RematReg = Remat.RematMI->getOperand(0).getReg();
1939 DAG.LIS->removeInterval(RematReg);
1940 DAG.LIS->createAndComputeVirtRegInterval(RematReg);
1946 unsigned DynamicVGPRBlockSize =
1948 AchievedOcc =
MFI.getMaxWavesPerEU();
1949 for (
auto &[
I, OriginalRP] : ImpactedRegions) {
1950 bool IsEmptyRegion =
DAG.Regions[
I].first ==
DAG.Regions[
I].second;
1951 RescheduleRegions[
I] = !IsEmptyRegion;
1956 if (IsEmptyRegion) {
1959 GCNDownwardRPTracker RPT(*
DAG.LIS);
1961 DAG.Regions[
I].second);
1962 if (NonDbgMI ==
DAG.Regions[
I].second) {
1966 RPT.reset(*NonDbgMI, &
DAG.LiveIns[
I]);
1967 RPT.advance(
DAG.Regions[
I].second);
1968 RP = RPT.moveMaxPressure();
1973 std::min(AchievedOcc,
RP.getOccupancy(
ST, DynamicVGPRBlockSize));
1979bool PreRARematStage::isReMaterializable(
const MachineInstr &
MI) {
1980 if (!
DAG.TII->isReMaterializable(
MI))
1983 for (
const MachineOperand &MO :
MI.all_uses()) {
1987 if (
DAG.MRI.isConstantPhysReg(MO.
getReg()) ||
DAG.TII->isIgnorableUse(MO))
2002 unsigned MaxOcc = std::max(AchievedOcc,
DAG.MinOccupancy);
2003 if (!TargetOcc || MaxOcc >= *TargetOcc)
2007 const SIInstrInfo *
TII =
MF.getSubtarget<GCNSubtarget>().getInstrInfo();
2010 for (
const auto &[
DefMI, Remat] : Rematerializations) {
2011 MachineInstr &RematMI = *Remat.RematMI;
2012 unsigned DefRegion = MIRegion.at(
DefMI);
2014 MachineBasicBlock *
MBB = RegionBB[DefRegion];
2020 TII->reMaterialize(*
MBB, InsertPos,
Reg, AMDGPU::NoSubRegister, RematMI,
2022 MachineInstr *NewMI = &*std::prev(InsertPos);
2023 DAG.LIS->InsertMachineInstrInMaps(*NewMI);
2025 auto UseRegion = MIRegion.find(Remat.UseMI);
2026 if (UseRegion != MIRegion.end()) {
2027 DAG.updateRegionBoundaries(
DAG.Regions[UseRegion->second], RematMI,
2030 DAG.updateRegionBoundaries(
DAG.Regions[DefRegion], InsertPos, NewMI);
2033 DAG.LIS->RemoveMachineInstrFromMaps(RematMI);
2037 DAG.LIS->removeInterval(
Reg);
2038 DAG.LIS->createAndComputeVirtRegInterval(
Reg);
2041 for (
unsigned LIRegion : Remat.LiveInRegions)
2042 DAG.LiveIns[LIRegion].insert({
Reg, RegMasks.at({LIRegion,
Reg})});
2046 for (
auto &[
I, OriginalRP] : ImpactedRegions)
2047 DAG.Pressure[
I] = OriginalRP;
2052void GCNScheduleDAGMILive::updateRegionBoundaries(
2055 assert((!NewMI || NewMI != RegionBounds.second) &&
2056 "cannot remove at region end");
2058 if (RegionBounds.first == RegionBounds.second) {
2059 assert(NewMI &&
"cannot remove from an empty region");
2060 RegionBounds.first = NewMI;
2066 if (
MI != RegionBounds.first)
2069 RegionBounds.first = std::next(
MI);
2071 RegionBounds.first = NewMI;
2088 if (HasIGLPInstrs) {
2089 SavedMutations.clear();
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
static cl::opt< bool > GCNTrackers("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false))
static cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))
#define REMAT_PREFIX
Allows to easily filter for this stage's debug output.
static MachineInstr * getLastMIForRegion(MachineBasicBlock::iterator RegionBegin, MachineBasicBlock::iterator RegionEnd)
static cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))
static cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))
static void printScheduleModel(std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)
static cl::opt< bool > PrintMaxRPRegUsageAfterScheduler("amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure after scheduling."), cl::init(false))
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG)
static bool canUsePressureDiffs(const SUnit &SU)
Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.
static void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)
static cl::opt< bool > PrintMaxRPRegUsageBeforeScheduler("amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure before scheduling."), cl::init(false))
static cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
Promote Memory to Register
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool initGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
GCNRegPressure bumpDownwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI) const
Mostly copy/paste from CodeGen/RegisterPressure.cpp Calculate the impact MI will have on CurPressure ...
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
Models a register pressure target, allowing to evaluate and track register savings against that targe...
GCNRegPressure getPressure() const
DenseMap< unsigned, LaneBitmask > LiveRegSet
virtual bool initGCNRegion()
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
MachineBasicBlock * CurrentMBB
This is a minimal scheduler strategy.
GCNDownwardRPTracker DownwardTracker
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
SUnit * pickNodeBidirectional(bool &IsTopNode)
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool IsBottomUp)
unsigned SGPRCriticalLimit
std::vector< unsigned > MaxPressure
bool hasNextStage() const
GCNSchedStageID getCurrentStage()
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
unsigned VGPRCriticalLimit
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
GCNDownwardRPTracker * getDownwardTracker()
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
GCNUpwardRPTracker UpwardTracker
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
GCNUpwardRPTracker * getUpwardTracker()
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
void recede(const MachineInstr &MI)
Move to the state of RP just before the MI .
void traceCandidate(const SchedCandidate &Cand)
LLVM_ABI void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
MachineSchedPolicy RegionPolicy
const TargetSchedModel * SchedModel
const MachineSchedContext * Context
const TargetRegisterInfo * TRI
SchedCandidate BotCand
Candidate last picked from Bot boundary.
SchedCandidate TopCand
Candidate last picked from Top boundary.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
GenericScheduler(const MachineSchedContext *C)
bool shouldRevertScheduling(unsigned WavesAfter) override
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
LLVM_ABI void dump() const
succ_iterator succ_begin()
unsigned succ_size() const
MachineInstrBundleIterator< MachineInstr > iterator
Function & getFunction()
Return the LLVM function that this machine code represents.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
unsigned getSubReg() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
bool initGCNSchedStage() override
Capture a change in pressure for a single pressure set.
Helpers for implementing custom MachineSchedStrategy classes.
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void advance()
Advance across the current instruction.
LLVM_ABI void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
LLVM_ABI void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
RegionT * getParent() const
Get the parent of the Region.
List of registers defined and used by a machine instruction.
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
LLVM_ABI void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
LLVM_ABI void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
bool isIGLPMutationOnly(unsigned Opcode) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getOccupancy() const
unsigned getDynamicVGPRBlockSize() const
unsigned getMinAllowedOccupancy() const
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
unsigned short Latency
Node latency.
bool isScheduled
True once scheduled.
unsigned ParentClusterIdx
The parent cluster id.
bool isBottomReady() const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
A ScheduleDAG for scheduling lists of MachineInstr.
bool ScheduleSingleMIRegions
True if regions with a single MI should be scheduled.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
const MachineLoopInfo * MLI
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
RegPressureTracker RPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
MachineFunction & MF
Machine function.
static const unsigned ScaleFactor
unsigned getMetric() const
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Provide an instruction scheduling machine model to CodeGen passes.
Target - Wrapper for Target specific information.
bool initGCNSchedStage() override
bool initGCNRegion() override
void finalizeGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
static bool allUsesAvailableAt(const MachineInstr *MI, SlotIndex UseIdx, const LiveIntervals &LIS, const MachineRegisterInfo &MRI, const TargetInstrInfo &TII)
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
unsigned getDynamicVGPRBlockSize(const Function &F)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop)
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)
Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > RegionBoundaries
A region's boundaries i.e.
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
@ UnclusteredHighRPReschedule
@ MemoryClauseInitialSchedule
@ ClusteredLowOccupancyReschedule
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
LLVM_ABI cl::opt< bool > VerifyScheduling
LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool isTheSameCluster(unsigned A, unsigned B)
Return whether the input cluster ID's are the same and valid.
DWARFExpression::Operation Op
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
LLVM_ABI void dumpMaxRegPressure(MachineFunction &MF, GCNRegPressure::RegKind Kind, LiveIntervals &LIS, const MachineLoopInfo *MLI)
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Implement std::hash so that hash_code can be used in STL containers.
bool operator()(std::pair< MachineInstr *, unsigned > A, std::pair< MachineInstr *, unsigned > B) const
unsigned getArchVGPRNum() const
unsigned getAGPRNum() const
unsigned getSGPRNum() const
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void setBest(SchedCandidate &Best)
void reset(const CandPolicy &NewPolicy)
LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SchedResourceDelta ResDelta
Status of an instruction's critical resource consumption.
unsigned DemandedResources
constexpr bool any() const
static constexpr LaneBitmask getNone()
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
PressureChange CriticalMax
PressureChange CurrentMax