47#define DEBUG_TYPE "machine-scheduler"
52 "amdgpu-disable-unclustered-high-rp-reschedule",
cl::Hidden,
53 cl::desc(
"Disable unclustered high register pressure "
54 "reduction scheduling stage."),
58 "amdgpu-disable-clustered-low-occupancy-reschedule",
cl::Hidden,
59 cl::desc(
"Disable clustered low occupancy "
60 "rescheduling for ILP scheduling stage."),
66 "Sets the bias which adds weight to occupancy vs latency. Set it to "
67 "100 to chase the occupancy only."),
72 cl::desc(
"Relax occupancy targets for kernels which are memory "
73 "bound (amdgpu-membound-threshold), or "
74 "Wave Limited (amdgpu-limit-wave-threshold)."),
79 cl::desc(
"Use the AMDGPU specific RPTrackers during scheduling"),
83 "amdgpu-scheduler-pending-queue-limit",
cl::Hidden,
85 "Max (Available+Pending) size to inspect pending queue (0 disables)"),
88#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
89#define DUMP_MAX_REG_PRESSURE
91 "amdgpu-print-max-reg-pressure-regusage-before-scheduler",
cl::Hidden,
92 cl::desc(
"Print a list of live registers along with their def/uses at the "
93 "point of maximum register pressure before scheduling."),
97 "amdgpu-print-max-reg-pressure-regusage-after-scheduler",
cl::Hidden,
98 cl::desc(
"Print a list of live registers along with their def/uses at the "
99 "point of maximum register pressure after scheduling."),
104 "amdgpu-disable-rewrite-mfma-form-sched-stage",
cl::Hidden,
124 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
126 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
148 "VGPRCriticalLimit calculation method.\n");
152 unsigned Addressable =
155 VGPRBudget = std::max(VGPRBudget, Granule);
193 if (!
Op.isReg() ||
Op.isImplicit())
195 if (
Op.getReg().isPhysical() ||
196 (
Op.isDef() &&
Op.getSubReg() != AMDGPU::NoSubRegister))
231 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
239 if (!Zone.
isTop() || !SU)
256 if (NextAvail > CurrCycle)
257 Stall = std::max(
Stall, NextAvail - CurrCycle);
277 unsigned SGPRPressure,
278 unsigned VGPRPressure,
bool IsBottomUp) {
282 if (!
DAG->isTrackingPressure())
305 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
306 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
308 for (
const auto &Diff :
DAG->getPressureDiff(SU)) {
314 (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());
317#ifdef EXPENSIVE_CHECKS
318 std::vector<unsigned> CheckPressure, CheckMaxPressure;
321 if (
Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
322 CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
323 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
324 CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
325 errs() <<
"Register Pressure is inaccurate when calculated through "
327 <<
"SGPR got " <<
Pressure[AMDGPU::RegisterPressureSets::SReg_32]
329 << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
"\n"
330 <<
"VGPR got " <<
Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
332 << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
"\n";
338 unsigned NewSGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
339 unsigned NewVGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
349 const unsigned MaxVGPRPressureInc = 16;
350 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >=
VGPRExcessLimit;
351 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >=
SGPRExcessLimit;
382 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
384 if (SGPRDelta > VGPRDelta) {
398 bool HasBufferedModel =
417 dbgs() <<
"Prefer:\t\t";
418 DAG->dumpNode(*Preferred.
SU);
422 DAG->dumpNode(*Current.
SU);
425 dbgs() <<
"Reason:\t\t";
439 unsigned SGPRPressure = 0;
440 unsigned VGPRPressure = 0;
442 if (
DAG->isTrackingPressure()) {
444 SGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
445 VGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
450 SGPRPressure =
T->getPressure().getSGPRNum();
451 VGPRPressure =
T->getPressure().getArchVGPRNum();
456 for (
SUnit *SU : AQ) {
460 VGPRPressure, IsBottomUp);
480 for (
SUnit *SU : PQ) {
484 VGPRPressure, IsBottomUp);
504 bool &PickedPending) {
524 bool BotPending =
false;
544 "Last pick result should correspond to re-picking right now");
549 bool TopPending =
false;
569 "Last pick result should correspond to re-picking right now");
579 PickedPending = BotPending && TopPending;
582 if (BotPending || TopPending) {
589 Cand.setBest(TryCand);
594 IsTopNode = Cand.AtTop;
601 if (
DAG->top() ==
DAG->bottom()) {
603 Bot.Available.empty() &&
Bot.Pending.empty() &&
"ReadyQ garbage");
609 PickedPending =
false;
643 if (ReadyCycle > CurrentCycle)
715 if (
DAG->isTrackingPressure() &&
721 if (
DAG->isTrackingPressure() &&
726 bool SameBoundary = Zone !=
nullptr;
750 if (IsLegacyScheduler)
769 if (
DAG->isTrackingPressure() &&
779 bool SameBoundary = Zone !=
nullptr;
814 bool CandIsClusterSucc =
816 bool TryCandIsClusterSucc =
818 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
823 if (
DAG->isTrackingPressure() &&
829 if (
DAG->isTrackingPressure() &&
875 if (
DAG->isTrackingPressure()) {
891 bool CandIsClusterSucc =
893 bool TryCandIsClusterSucc =
895 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
904 bool SameBoundary = Zone !=
nullptr;
921 if (TryMayLoad || CandMayLoad) {
922 bool TryLongLatency =
924 bool CandLongLatency =
928 Zone->
isTop() ? CandLongLatency : TryLongLatency, TryCand,
946 if (
DAG->isTrackingPressure() &&
965 !
Rem.IsAcyclicLatencyLimited &&
tryLatency(TryCand, Cand, *Zone))
983 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy),
984 RegionLiveOuts(this,
true) {
990 LLVM_DEBUG(
dbgs() <<
"Starting occupancy is " << StartingOccupancy <<
".\n");
992 MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy);
993 if (MinOccupancy != StartingOccupancy)
994 LLVM_DEBUG(
dbgs() <<
"Allowing Occupancy drops to " << MinOccupancy
999std::unique_ptr<GCNSchedStage>
1001 switch (SchedStageID) {
1003 return std::make_unique<OccInitialScheduleStage>(SchedStageID, *
this);
1005 return std::make_unique<RewriteMFMAFormStage>(SchedStageID, *
this);
1007 return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *
this);
1009 return std::make_unique<ClusteredLowOccStage>(SchedStageID, *
this);
1011 return std::make_unique<PreRARematStage>(SchedStageID, *
this);
1013 return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *
this);
1015 return std::make_unique<MemoryClauseInitialScheduleStage>(SchedStageID,
1029GCNScheduleDAGMILive::getRealRegPressure(
unsigned RegionIdx)
const {
1030 if (Regions[RegionIdx].first == Regions[RegionIdx].second)
1034 &LiveIns[RegionIdx]);
1040 assert(RegionBegin != RegionEnd &&
"Region must not be empty");
1044void GCNScheduleDAGMILive::computeBlockPressure(
unsigned RegionIdx,
1056 const MachineBasicBlock *OnlySucc =
nullptr;
1059 if (!Candidate->empty() && Candidate->pred_size() == 1) {
1060 SlotIndexes *Ind =
LIS->getSlotIndexes();
1062 OnlySucc = Candidate;
1067 size_t CurRegion = RegionIdx;
1068 for (
size_t E = Regions.size(); CurRegion !=
E; ++CurRegion)
1069 if (Regions[CurRegion].first->getParent() !=
MBB)
1074 auto LiveInIt = MBBLiveIns.find(
MBB);
1075 auto &Rgn = Regions[CurRegion];
1077 if (LiveInIt != MBBLiveIns.end()) {
1078 auto LiveIn = std::move(LiveInIt->second);
1080 MBBLiveIns.erase(LiveInIt);
1083 auto LRS = BBLiveInMap.lookup(NonDbgMI);
1084#ifdef EXPENSIVE_CHECKS
1093 if (Regions[CurRegion].first ==
I || NonDbgMI ==
I) {
1094 LiveIns[CurRegion] =
RPTracker.getLiveRegs();
1098 if (Regions[CurRegion].second ==
I) {
1099 Pressure[CurRegion] =
RPTracker.moveMaxPressure();
1100 if (CurRegion-- == RegionIdx)
1102 auto &Rgn = Regions[CurRegion];
1115 MBBLiveIns[OnlySucc] =
RPTracker.moveLiveRegs();
1120GCNScheduleDAGMILive::getRegionLiveInMap()
const {
1121 assert(!Regions.empty());
1122 std::vector<MachineInstr *> RegionFirstMIs;
1123 RegionFirstMIs.reserve(Regions.size());
1125 RegionFirstMIs.push_back(
1132GCNScheduleDAGMILive::getRegionLiveOutMap()
const {
1133 assert(!Regions.empty());
1134 std::vector<MachineInstr *> RegionLastMIs;
1135 RegionLastMIs.reserve(Regions.size());
1146 IdxToInstruction.clear();
1149 IsLiveOut ? DAG->getRegionLiveOutMap() : DAG->getRegionLiveInMap();
1150 for (
unsigned I = 0;
I < DAG->Regions.size();
I++) {
1151 auto &[RegionBegin, RegionEnd] = DAG->Regions[
I];
1153 if (RegionBegin == RegionEnd)
1157 IdxToInstruction[
I] = RegionKey;
1165 LiveIns.resize(Regions.size());
1166 Pressure.resize(Regions.size());
1167 RegionsWithHighRP.resize(Regions.size());
1168 RegionsWithExcessRP.resize(Regions.size());
1169 RegionsWithIGLPInstrs.resize(Regions.size());
1170 RegionsWithHighRP.reset();
1171 RegionsWithExcessRP.reset();
1172 RegionsWithIGLPInstrs.reset();
1177void GCNScheduleDAGMILive::runSchedStages() {
1178 LLVM_DEBUG(
dbgs() <<
"All regions recorded, starting actual scheduling.\n");
1181 if (!Regions.
empty()) {
1182 BBLiveInMap = getRegionLiveInMap();
1187#ifdef DUMP_MAX_REG_PRESSURE
1197 if (!Stage->initGCNSchedStage())
1200 for (
auto Region : Regions) {
1204 if (!Stage->initGCNRegion()) {
1205 Stage->advanceRegion();
1214 &LiveIns[Stage->getRegionIdx()];
1216 reinterpret_cast<GCNRPTracker *
>(DownwardTracker)
1217 ->reset(
MRI, *RegionLiveIns);
1218 reinterpret_cast<GCNRPTracker *
>(UpwardTracker)
1219 ->reset(
MRI, RegionLiveOuts.getLiveRegsForRegionIdx(
1220 Stage->getRegionIdx()));
1224 Stage->finalizeGCNRegion();
1225 Stage->advanceRegion();
1229 Stage->finalizeGCNSchedStage();
1232#ifdef DUMP_MAX_REG_PRESSURE
1245 OS <<
"Max Occupancy Initial Schedule";
1248 OS <<
"Instruction Rewriting Reschedule";
1251 OS <<
"Unclustered High Register Pressure Reschedule";
1254 OS <<
"Clustered Low Occupancy Reschedule";
1257 OS <<
"Pre-RA Rematerialize";
1260 OS <<
"Max ILP Initial Schedule";
1263 OS <<
"Max memory clause Initial Schedule";
1283void RewriteMFMAFormStage::findReachingDefs(
1297 SmallVector<MachineBasicBlock *, 8> Worklist;
1305 while (!Worklist.
empty()) {
1320 for (MachineBasicBlock *PredMBB : DefMBB->
predecessors()) {
1321 if (Visited.
insert(PredMBB).second)
1327void RewriteMFMAFormStage::findReachingUses(
1331 for (MachineOperand &UseMO :
1334 findReachingDefs(UseMO, LIS, ReachingDefIndexes);
1338 if (
any_of(ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) {
1350 if (!
ST.hasGFX90AInsts() ||
MFI.getMinWavesPerEU() > 1)
1353 RegionsWithExcessArchVGPR.resize(
DAG.Regions.size());
1354 RegionsWithExcessArchVGPR.reset();
1358 RegionsWithExcessArchVGPR[
Region] =
true;
1361 if (RegionsWithExcessArchVGPR.none())
1364 TII =
ST.getInstrInfo();
1365 SRI =
ST.getRegisterInfo();
1367 std::vector<std::pair<MachineInstr *, unsigned>> RewriteCands;
1371 if (!initHeuristics(RewriteCands, CopyForUse, CopyForDef))
1374 int64_t
Cost = getRewriteCost(RewriteCands, CopyForUse, CopyForDef);
1381 return rewrite(RewriteCands);
1391 if (
DAG.RegionsWithHighRP.none() &&
DAG.RegionsWithExcessRP.none())
1398 InitialOccupancy =
DAG.MinOccupancy;
1401 TempTargetOccupancy =
MFI.getMaxWavesPerEU() >
DAG.MinOccupancy
1402 ? InitialOccupancy + 1
1404 IsAnyRegionScheduled =
false;
1405 S.SGPRLimitBias =
S.HighRPSGPRBias;
1406 S.VGPRLimitBias =
S.HighRPVGPRBias;
1410 <<
"Retrying function scheduling without clustering. "
1411 "Aggressively try to reduce register pressure to achieve occupancy "
1412 << TempTargetOccupancy <<
".\n");
1427 if (
DAG.StartingOccupancy <=
DAG.MinOccupancy)
1431 dbgs() <<
"Retrying function scheduling with lowest recorded occupancy "
1432 <<
DAG.MinOccupancy <<
".\n");
1437#define REMAT_PREFIX "[PreRARemat] "
1438#define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)
1440#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1441Printable PreRARematStage::ScoredRemat::print()
const {
1443 OS <<
'(' << MaxFreq <<
", " << FreqDiff <<
", " << RegionImpact <<
')';
1464 const unsigned NumRegions =
DAG.Regions.size();
1465 for (
unsigned I = 0;
I < NumRegions; ++
I) {
1472 RegionBB.push_back(ParentMBB);
1476 auto PrintTargetRegions = [&]() ->
void {
1477 if (TargetRegions.none()) {
1482 for (
unsigned I : TargetRegions.set_bits())
1485 auto PrintRematReg = [&](
const RematReg &Remat) ->
Printable {
1489 bool HasLiveThroughRegion =
false;
1490 OS <<
'[' << Remat.DefRegion <<
" -";
1491 for (
unsigned I = 0;
I < NumRegions; ++
I) {
1492 if (Remat.isUnusedLiveThrough(
I)) {
1493 if (HasLiveThroughRegion) {
1497 HasLiveThroughRegion =
true;
1502 if (HasLiveThroughRegion)
1504 OS <<
"-> " << Remat.UseRegion <<
"] ";
1505 Remat.DefMI->print(OS,
true,
false,
1513 dbgs() <<
"Analyzing ";
1514 MF.getFunction().printAsOperand(
dbgs(),
false);
1517 if (!setObjective()) {
1518 LLVM_DEBUG(
dbgs() <<
"no objective to achieve, occupancy is maximal at "
1519 <<
MFI.getMaxWavesPerEU() <<
'\n');
1524 dbgs() <<
"increase occupancy from " << *TargetOcc - 1 <<
'\n';
1526 dbgs() <<
"reduce spilling (minimum target occupancy is "
1527 <<
MFI.getMinWavesPerEU() <<
")\n";
1529 PrintTargetRegions();
1532 if (!collectRematRegs(MIRegion)) {
1538 dbgs() <<
"Rematerializable registers:\n";
1539 for (
const RematReg &Remat : RematRegs)
1547 dbgs() <<
"unknown ";
1548 dbgs() <<
" | " << *
DAG.Regions[
I].first;
1553 for (RematReg &Remat : RematRegs)
1559 unsigned RoundNum = 0;
1563 assert(!ScoredRemats.empty() &&
"no more remat candidates");
1566 for (ScoredRemat &Remat : ScoredRemats)
1567 Remat.update(TargetRegions, RPTargets, FreqInfo, !TargetOcc);
1571 dbgs() <<
"==== ROUND " << RoundNum++ <<
" ====\n"
1573 <<
"Candidates with non-null score, in rematerialization order:\n";
1574 for (
const ScoredRemat &RematDecision :
reverse(ScoredRemats)) {
1575 if (RematDecision.hasNullScore())
1578 << *RematDecision.Remat->DefMI;
1580 PrintTargetRegions();
1583 RecomputeRP.
reset();
1584 unsigned RematIdx = ScoredRemats.
size();
1589 for (; RematIdx && TargetRegions.any(); --RematIdx) {
1590 const ScoredRemat &Candidate = ScoredRemats[RematIdx - 1];
1594 if (Candidate.hasNullScore()) {
1599 RematReg &Remat = *Candidate.Remat;
1606 if (!Remat.maybeBeneficial(TargetRegions, RPTargets))
1611 Candidate.rematerialize(RecomputeRP, RPTargets,
DAG);
1612 RescheduleRegions |= Remat.Live;
1621 RollbackInfo &Rollback = Rollbacks.emplace_back(&Remat);
1622 Rollback.RematMI = RematMI;
1628 Remat.DefMI->
setDesc(
DAG.TII->get(TargetOpcode::DBG_VALUE));
1630 if (MO.isReg() && MO.readsReg()) {
1631 Rollback.RegMap.insert({Idx, MO.getReg()});
1637 DAG.deleteMI(Remat.DefRegion, Remat.DefMI);
1640 unsetSatisfiedRPTargets(Remat.Live);
1644 if (!TargetRegions.any()) {
1645 dbgs() <<
"** Interrupt round on all targets achieved\n";
1646 }
else if (RematIdx) {
1647 dbgs() <<
"** Interrupt round on stale score for "
1648 << *ScoredRemats[RematIdx - 1].Remat->DefMI;
1650 dbgs() <<
"** Stop on exhausted rematerialization candidates\n";
1655 ScoredRemats.truncate(RematIdx);
1656 }
while ((updateAndVerifyRPTargets(RecomputeRP) || TargetRegions.any()) &&
1657 !ScoredRemats.empty());
1658 if (RescheduleRegions.none())
1664 unsigned DynamicVGPRBlockSize =
MFI.getDynamicVGPRBlockSize();
1665 for (
unsigned I : RescheduleRegions.set_bits()) {
1666 DAG.Pressure[
I] = RPTargets[
I].getCurrentRP();
1668 <<
DAG.Pressure[
I].getOccupancy(
ST, DynamicVGPRBlockSize)
1669 <<
" (" << RPTargets[
I] <<
")\n");
1671 AchievedOcc =
MFI.getMaxWavesPerEU();
1674 std::min(AchievedOcc, RP.getOccupancy(
ST, DynamicVGPRBlockSize));
1678 dbgs() <<
"Retrying function scheduling with new min. occupancy of "
1679 << AchievedOcc <<
" from rematerializing (original was "
1680 <<
DAG.MinOccupancy;
1682 dbgs() <<
", target was " << *TargetOcc;
1686 DAG.setTargetOccupancy(getStageTargetOccupancy());
1697 S.SGPRLimitBias =
S.VGPRLimitBias = 0;
1698 if (
DAG.MinOccupancy > InitialOccupancy) {
1699 assert(IsAnyRegionScheduled);
1701 <<
" stage successfully increased occupancy to "
1702 <<
DAG.MinOccupancy <<
'\n');
1703 }
else if (!IsAnyRegionScheduled) {
1704 assert(
DAG.MinOccupancy == InitialOccupancy);
1706 <<
": No regions scheduled, min occupancy stays at "
1707 <<
DAG.MinOccupancy <<
", MFI occupancy stays at "
1708 <<
MFI.getOccupancy() <<
".\n");
1716 if (
DAG.begin() ==
DAG.end())
1723 unsigned NumRegionInstrs = std::distance(
DAG.begin(),
DAG.end());
1727 if (
DAG.begin() == std::prev(
DAG.end()))
1733 <<
"\n From: " << *
DAG.begin() <<
" To: ";
1735 else dbgs() <<
"End";
1736 dbgs() <<
" RegionInstrs: " << NumRegionInstrs <<
'\n');
1744 for (
auto &
I :
DAG) {
1757 dbgs() <<
"Pressure before scheduling:\nRegion live-ins:"
1759 <<
"Region live-in pressure: "
1763 S.HasHighPressure =
false;
1785 unsigned DynamicVGPRBlockSize =
DAG.MFI.getDynamicVGPRBlockSize();
1788 unsigned CurrentTargetOccupancy =
1789 IsAnyRegionScheduled ?
DAG.MinOccupancy : TempTargetOccupancy;
1791 (CurrentTargetOccupancy <= InitialOccupancy ||
1792 DAG.Pressure[
RegionIdx].getOccupancy(
ST, DynamicVGPRBlockSize) !=
1799 if (!IsAnyRegionScheduled && IsSchedulingThisRegion) {
1800 IsAnyRegionScheduled =
true;
1801 if (
MFI.getMaxWavesPerEU() >
DAG.MinOccupancy)
1802 DAG.setTargetOccupancy(TempTargetOccupancy);
1804 return IsSchedulingThisRegion;
1839 if (
S.HasHighPressure)
1860 if (
DAG.MinOccupancy < *TargetOcc) {
1862 <<
" cannot meet occupancy target, interrupting "
1863 "re-scheduling in all regions\n");
1864 RescheduleRegions.reset();
1875 unsigned DynamicVGPRBlockSize =
DAG.MFI.getDynamicVGPRBlockSize();
1886 unsigned TargetOccupancy = std::min(
1887 S.getTargetOccupancy(),
ST.getOccupancyWithWorkGroupSizes(
MF).second);
1888 unsigned WavesAfter = std::min(
1889 TargetOccupancy,
PressureAfter.getOccupancy(
ST, DynamicVGPRBlockSize));
1890 unsigned WavesBefore = std::min(
1892 LLVM_DEBUG(
dbgs() <<
"Occupancy before scheduling: " << WavesBefore
1893 <<
", after " << WavesAfter <<
".\n");
1899 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
1903 if (WavesAfter < WavesBefore && WavesAfter <
DAG.MinOccupancy &&
1904 WavesAfter >=
MFI.getMinAllowedOccupancy()) {
1905 LLVM_DEBUG(
dbgs() <<
"Function is memory bound, allow occupancy drop up to "
1906 <<
MFI.getMinAllowedOccupancy() <<
" waves\n");
1907 NewOccupancy = WavesAfter;
1910 if (NewOccupancy <
DAG.MinOccupancy) {
1911 DAG.MinOccupancy = NewOccupancy;
1912 MFI.limitOccupancy(
DAG.MinOccupancy);
1914 <<
DAG.MinOccupancy <<
".\n");
1918 unsigned MaxVGPRs =
ST.getMaxNumVGPRs(
MF);
1921 unsigned MaxArchVGPRs = std::min(MaxVGPRs,
ST.getAddressableNumArchVGPRs());
1922 unsigned MaxSGPRs =
ST.getMaxNumSGPRs(
MF);
1946 unsigned ReadyCycle = CurrCycle;
1947 for (
auto &
D : SU.
Preds) {
1948 if (
D.isAssignedRegDep()) {
1951 unsigned DefReady = ReadyCycles[
DAG.getSUnit(
DefMI)->NodeNum];
1952 ReadyCycle = std::max(ReadyCycle, DefReady +
Latency);
1955 ReadyCycles[SU.
NodeNum] = ReadyCycle;
1962 std::pair<MachineInstr *, unsigned>
B)
const {
1963 return A.second <
B.second;
1969 if (ReadyCycles.empty())
1971 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
1972 dbgs() <<
"\n################## Schedule time ReadyCycles for MBB : " << BBNum
1973 <<
" ##################\n# Cycle #\t\t\tInstruction "
1977 for (
auto &
I : ReadyCycles) {
1978 if (
I.second > IPrev + 1)
1979 dbgs() <<
"****************************** BUBBLE OF " <<
I.second - IPrev
1980 <<
" CYCLES DETECTED ******************************\n\n";
1981 dbgs() <<
"[ " <<
I.second <<
" ] : " << *
I.first <<
"\n";
1994 unsigned SumBubbles = 0;
1996 unsigned CurrCycle = 0;
1997 for (
auto &SU : InputSchedule) {
1998 unsigned ReadyCycle =
2000 SumBubbles += ReadyCycle - CurrCycle;
2002 ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));
2004 CurrCycle = ++ReadyCycle;
2027 unsigned SumBubbles = 0;
2029 unsigned CurrCycle = 0;
2030 for (
auto &
MI :
DAG) {
2034 unsigned ReadyCycle =
2036 SumBubbles += ReadyCycle - CurrCycle;
2038 ReadyCyclesSorted.insert(std::make_pair(SU->
getInstr(), ReadyCycle));
2040 CurrCycle = ++ReadyCycle;
2057 if (WavesAfter <
DAG.MinOccupancy)
2061 if (
DAG.MFI.isDynamicVGPREnabled()) {
2063 &
ST,
DAG.MFI.getDynamicVGPRBlockSize(),
2066 &
ST,
DAG.MFI.getDynamicVGPRBlockSize(),
2068 if (BlocksAfter > BlocksBefore)
2105 <<
"\n\t *** In shouldRevertScheduling ***\n"
2106 <<
" *********** BEFORE UnclusteredHighRPStage ***********\n");
2110 <<
"\n *********** AFTER UnclusteredHighRPStage ***********\n");
2112 unsigned OldMetric = MBefore.
getMetric();
2113 unsigned NewMetric = MAfter.
getMetric();
2114 unsigned WavesBefore = std::min(
2115 S.getTargetOccupancy(),
2122 LLVM_DEBUG(
dbgs() <<
"\tMetric before " << MBefore <<
"\tMetric after "
2123 << MAfter <<
"Profit: " << Profit <<
"\n");
2154 unsigned WavesAfter) {
2161 LLVM_DEBUG(
dbgs() <<
"New pressure will result in more spilling.\n");
2174 "instruction number mismatch");
2175 if (MIOrder.
empty())
2187 if (MII != RegionEnd) {
2189 bool NonDebugReordered =
2190 !
MI->isDebugInstr() &&
2196 if (NonDebugReordered)
2197 DAG.LIS->handleMove(*
MI,
true);
2201 if (
MI->isDebugInstr()) {
2208 Op.setIsUndef(
false);
2211 if (
DAG.ShouldTrackLaneMasks) {
2213 SlotIndex SlotIdx =
DAG.LIS->getInstructionIndex(*MI).getRegSlot();
2228bool RewriteMFMAFormStage::isRewriteCandidate(
MachineInstr *
MI)
const {
2235bool RewriteMFMAFormStage::initHeuristics(
2236 std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
2244 if (!isRewriteCandidate(&
MI))
2248 assert(ReplacementOp != -1);
2250 RewriteCands.push_back({&
MI,
MI.getOpcode()});
2251 MI.setDesc(
TII->get(ReplacementOp));
2253 MachineOperand *Src2 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src2);
2254 if (Src2->
isReg()) {
2256 findReachingDefs(*Src2,
DAG.LIS, Src2ReachingDefs);
2260 for (SlotIndex RDIdx : Src2ReachingDefs) {
2261 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIdx);
2262 if (!
TII->isMAI(*RD))
2267 MachineOperand &Dst =
MI.getOperand(0);
2270 findReachingUses(&
MI,
DAG.LIS, DstReachingUses);
2272 for (MachineOperand *RUOp : DstReachingUses) {
2273 if (
TII->isMAI(*RUOp->getParent()))
2279 CopyForUse[RUOp->getParent()->getParent()].insert(RUOp->getReg());
2282 findReachingDefs(*RUOp,
DAG.LIS, DstUsesReachingDefs);
2284 for (SlotIndex RDIndex : DstUsesReachingDefs) {
2285 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2286 if (
TII->isMAI(*RD))
2298 const TargetRegisterClass *VDefRC =
DAG.MRI.getRegClass(Dst.getReg());
2299 const TargetRegisterClass *ADefRC = SRI->getEquivalentAGPRClass(VDefRC);
2300 DAG.MRI.setRegClass(Dst.getReg(), ADefRC);
2301 if (Src2->
isReg()) {
2305 const TargetRegisterClass *VUseRC =
DAG.MRI.getRegClass(Src2->
getReg());
2306 const TargetRegisterClass *AUseRC = SRI->getEquivalentAGPRClass(VUseRC);
2307 DAG.MRI.setRegClass(Src2->
getReg(), AUseRC);
2316int64_t RewriteMFMAFormStage::getRewriteCost(
2317 const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
2318 const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
2319 const SmallPtrSetImpl<MachineInstr *> &CopyForDef) {
2320 MachineBlockFrequencyInfo *MBFI =
DAG.MBFI;
2322 int64_t BestSpillCost = 0;
2326 std::pair<unsigned, unsigned> MaxVectorRegs =
2327 ST.getMaxNumVectorRegs(
MF.getFunction());
2328 unsigned ArchVGPRThreshold = MaxVectorRegs.first;
2329 unsigned AGPRThreshold = MaxVectorRegs.second;
2330 unsigned CombinedThreshold =
ST.getMaxNumVGPRs(
MF);
2333 if (!RegionsWithExcessArchVGPR[Region])
2338 MF, ArchVGPRThreshold, AGPRThreshold, CombinedThreshold);
2346 MF, ArchVGPRThreshold, AGPRThreshold, CombinedThreshold);
2348 uint64_t BlockFreq =
2352 bool RelativeFreqIsDenom = EntryFreq > BlockFreq;
2353 uint64_t RelativeFreq = EntryFreq && BlockFreq
2354 ? (RelativeFreqIsDenom ? EntryFreq / BlockFreq
2355 : BlockFreq / EntryFreq)
2360 int64_t SpillCost = ((int)SpillCostAfter - (int)SpillCostBefore) * 2;
2363 if (RelativeFreqIsDenom)
2364 SpillCost /= (int64_t)RelativeFreq;
2366 SpillCost *= (int64_t)RelativeFreq;
2372 if (SpillCost < BestSpillCost)
2373 BestSpillCost = SpillCost;
2378 Cost = BestSpillCost;
2381 unsigned CopyCost = 0;
2385 for (MachineInstr *
DefMI : CopyForDef) {
2392 const TargetRegisterClass *RC =
DAG.MRI.getRegClass(DefReg);
2397 for (
auto &[UseBlock, UseRegs] : CopyForUse) {
2402 const TargetRegisterClass *RC =
DAG.MRI.getRegClass(
UseReg);
2411 for (
auto &[
MI, OriginalOpcode] : RewriteCands) {
2413 const TargetRegisterClass *ADefRC =
2414 DAG.MRI.getRegClass(
MI->getOperand(0).getReg());
2415 const TargetRegisterClass *VDefRC = SRI->getEquivalentVGPRClass(ADefRC);
2416 DAG.MRI.setRegClass(
MI->getOperand(0).getReg(), VDefRC);
2417 MI->setDesc(
TII->get(OriginalOpcode));
2419 MachineOperand *Src2 =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src2);
2427 const TargetRegisterClass *AUseRC =
DAG.MRI.getRegClass(Src2->
getReg());
2428 const TargetRegisterClass *VUseRC = SRI->getEquivalentVGPRClass(AUseRC);
2429 DAG.MRI.setRegClass(Src2->
getReg(), VUseRC);
2432 return Cost + CopyCost;
2435bool RewriteMFMAFormStage::rewrite(
2436 const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands) {
2437 DenseMap<MachineInstr *, unsigned> FirstMIToRegion;
2438 DenseMap<MachineInstr *, unsigned> LastMIToRegion;
2446 if (
Entry.second !=
Entry.first->getParent()->end())
2489 DenseSet<Register> RewriteRegs;
2492 DenseMap<Register, Register> RedefMap;
2494 DenseMap<Register, DenseSet<MachineOperand *>>
ReplaceMap;
2496 DenseMap<Register, SmallPtrSet<MachineInstr *, 8>> ReachingDefCopyMap;
2499 DenseMap<unsigned, DenseMap<Register, SmallPtrSet<MachineOperand *, 8>>>
2502 for (
auto &[
MI, OriginalOpcode] : RewriteCands) {
2504 if (ReplacementOp == -1)
2506 MI->setDesc(
TII->get(ReplacementOp));
2509 MachineOperand *Src2 =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src2);
2510 if (Src2->
isReg()) {
2517 findReachingDefs(*Src2,
DAG.LIS, Src2ReachingDefs);
2518 SmallSetVector<MachineInstr *, 8> Src2DefsReplace;
2520 for (SlotIndex RDIndex : Src2ReachingDefs) {
2521 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2522 if (
TII->isMAI(*RD))
2526 Src2DefsReplace.
insert(RD);
2529 if (!Src2DefsReplace.
empty()) {
2531 if (RI != RedefMap.
end()) {
2532 MappedReg = RI->second;
2535 const TargetRegisterClass *Src2RC =
DAG.MRI.getRegClass(Src2Reg);
2536 const TargetRegisterClass *VGPRRC =
2537 SRI->getEquivalentVGPRClass(Src2RC);
2540 MappedReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2541 RedefMap[Src2Reg] = MappedReg;
2546 for (MachineInstr *RD : Src2DefsReplace) {
2548 if (ReachingDefCopyMap[Src2Reg].insert(RD).second) {
2549 MachineInstrBuilder VGPRCopy =
2552 .
addDef(MappedReg, {}, 0)
2553 .addUse(Src2Reg, {}, 0);
2554 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2559 unsigned UpdateRegion = LastMIToRegion[RD];
2560 DAG.Regions[UpdateRegion].second = VGPRCopy;
2561 LastMIToRegion.
erase(RD);
2568 RewriteRegs.
insert(Src2Reg);
2578 MachineOperand *Dst = &
MI->getOperand(0);
2587 SmallVector<MachineInstr *, 8> DstUseDefsReplace;
2589 findReachingUses(
MI,
DAG.LIS, DstReachingUses);
2591 for (MachineOperand *RUOp : DstReachingUses) {
2592 if (
TII->isMAI(*RUOp->getParent()))
2596 if (
find(DstReachingUseCopies, RUOp) == DstReachingUseCopies.
end())
2599 findReachingDefs(*RUOp,
DAG.LIS, DstUsesReachingDefs);
2601 for (SlotIndex RDIndex : DstUsesReachingDefs) {
2602 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2603 if (
TII->isMAI(*RD))
2608 if (
find(DstUseDefsReplace, RD) == DstUseDefsReplace.
end())
2613 if (!DstUseDefsReplace.
empty()) {
2615 if (RI != RedefMap.
end()) {
2616 MappedReg = RI->second;
2619 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(DstReg);
2620 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2623 MappedReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2624 RedefMap[DstReg] = MappedReg;
2629 for (MachineInstr *RD : DstUseDefsReplace) {
2631 if (ReachingDefCopyMap[DstReg].insert(RD).second) {
2632 MachineInstrBuilder VGPRCopy =
2635 .
addDef(MappedReg, {}, 0)
2636 .addUse(DstReg, {}, 0);
2637 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2642 LastMIToRegion.
find(RD);
2643 if (LMI != LastMIToRegion.
end()) {
2644 unsigned UpdateRegion = LMI->second;
2645 DAG.Regions[UpdateRegion].second = VGPRCopy;
2646 LastMIToRegion.
erase(RD);
2652 DenseSet<MachineOperand *> &DstRegSet =
ReplaceMap[DstReg];
2653 for (MachineOperand *RU : DstReachingUseCopies) {
2654 MachineBasicBlock *RUBlock = RU->getParent()->getParent();
2657 if (RUBlock !=
MI->getParent()) {
2664 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(DstReg);
2665 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2666 Register NewUseReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2667 MachineInstr *UseInst = RU->getParent();
2668 MachineInstrBuilder VGPRCopy =
2671 .
addDef(NewUseReg, {}, 0)
2672 .addUse(DstReg, {}, 0);
2673 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2676 RU->setReg(NewUseReg);
2682 RewriteRegs.
insert(DstReg);
2692 std::pair<unsigned, DenseMap<Register, SmallPtrSet<MachineOperand *, 8>>>;
2693 for (RUBType RUBlockEntry : ReachingUseTracker) {
2694 using RUDType = std::pair<Register, SmallPtrSet<MachineOperand *, 8>>;
2695 for (RUDType RUDst : RUBlockEntry.second) {
2696 MachineOperand *OpBegin = *RUDst.second.begin();
2697 SlotIndex InstPt =
DAG.LIS->getInstructionIndex(*OpBegin->
getParent());
2700 for (MachineOperand *User : RUDst.second) {
2701 SlotIndex NewInstPt =
DAG.LIS->getInstructionIndex(*
User->getParent());
2706 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(RUDst.first);
2707 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2708 Register NewUseReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2709 MachineInstr *UseInst =
DAG.LIS->getInstructionFromIndex(InstPt);
2711 MachineInstrBuilder VGPRCopy =
2714 .
addDef(NewUseReg, {}, 0)
2715 .addUse(RUDst.first, {}, 0);
2716 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2721 FirstMIToRegion.
find(UseInst);
2722 if (FI != FirstMIToRegion.
end()) {
2723 unsigned UpdateRegion = FI->second;
2724 DAG.Regions[UpdateRegion].first = VGPRCopy;
2725 FirstMIToRegion.
erase(UseInst);
2729 for (MachineOperand *User : RUDst.second) {
2730 User->setReg(NewUseReg);
2741 for (std::pair<Register, Register> NewDef : RedefMap) {
2746 for (MachineOperand *ReplaceOp :
ReplaceMap[OldReg])
2747 ReplaceOp->setReg(NewReg);
2751 for (
Register RewriteReg : RewriteRegs) {
2752 Register RegToRewrite = RewriteReg;
2756 if (RI != RedefMap.end())
2757 RegToRewrite = RI->second;
2759 const TargetRegisterClass *CurrRC =
DAG.MRI.getRegClass(RegToRewrite);
2760 const TargetRegisterClass *AGPRRC = SRI->getEquivalentAGPRClass(CurrRC);
2762 DAG.MRI.setRegClass(RegToRewrite, AGPRRC);
2766 DAG.LIS->reanalyze(
DAG.MF);
2768 RegionPressureMap LiveInUpdater(&
DAG,
false);
2769 LiveInUpdater.buildLiveRegMap();
2772 DAG.LiveIns[Region] = LiveInUpdater.getLiveRegsForRegionIdx(Region);
2779unsigned PreRARematStage::getStageTargetOccupancy()
const {
2780 return TargetOcc ? *TargetOcc :
MFI.getMinWavesPerEU();
2783bool PreRARematStage::setObjective() {
2787 unsigned MaxSGPRs =
ST.getMaxNumSGPRs(
F);
2788 unsigned MaxVGPRs =
ST.getMaxNumVGPRs(
F);
2789 bool HasVectorRegisterExcess =
false;
2790 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
2791 const GCNRegPressure &
RP =
DAG.Pressure[
I];
2792 GCNRPTarget &
Target = RPTargets.emplace_back(MaxSGPRs, MaxVGPRs,
MF, RP);
2794 TargetRegions.set(
I);
2795 HasVectorRegisterExcess |=
Target.hasVectorRegisterExcess();
2798 if (HasVectorRegisterExcess ||
DAG.MinOccupancy >=
MFI.getMaxWavesPerEU()) {
2801 TargetOcc = std::nullopt;
2805 TargetOcc =
DAG.MinOccupancy + 1;
2806 const unsigned VGPRBlockSize =
MFI.getDynamicVGPRBlockSize();
2807 MaxSGPRs =
ST.getMaxNumSGPRs(*TargetOcc,
false);
2808 MaxVGPRs =
ST.getMaxNumVGPRs(*TargetOcc, VGPRBlockSize);
2809 for (
auto [
I, Target] :
enumerate(RPTargets)) {
2810 Target.setTarget(MaxSGPRs, MaxVGPRs);
2812 TargetRegions.set(
I);
2816 return TargetRegions.any();
2819bool PreRARematStage::collectRematRegs(
2820 const DenseMap<MachineInstr *, unsigned> &MIRegion) {
2823 DAG.RegionLiveOuts.buildLiveRegMap();
2827 SmallSet<Register, 4> MarkedRegs;
2828 auto IsMarkedForRemat = [&MarkedRegs](
const MachineOperand &MO) ->
bool {
2829 return MO.isReg() && MarkedRegs.
contains(MO.getReg());
2833 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
2835 for (
auto MI = Bounds.first;
MI != Bounds.second; ++
MI) {
2838 if (!isReMaterializable(
DefMI))
2851 MachineInstr *
UseMI =
DAG.MRI.getOneNonDBGUser(
Reg);
2855 if (UseRegion == MIRegion.
end() || UseRegion->second ==
I)
2866 if (IsMarkedForRemat(UseMO) ||
2873 SlotIndex UseIdx =
DAG.LIS->getInstructionIndex(*UseMI).getRegSlot(
true);
2884 return !RematRegs.empty();
2887PreRARematStage::RematReg::RematReg(
2888 MachineInstr *
DefMI, MachineInstr *
UseMI, GCNScheduleDAGMILive &
DAG,
2889 const DenseMap<MachineInstr *, unsigned> &MIRegion)
2892 DefRegion(MIRegion.at(
DefMI)), UseRegion(MIRegion.at(
UseMI)) {
2896 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
2897 auto LiveInIt =
DAG.LiveIns[
I].find(
Reg);
2898 if (LiveInIt !=
DAG.LiveIns[
I].end())
2900 const auto &LiveOuts =
DAG.RegionLiveOuts.getLiveRegsForRegionIdx(
I);
2901 if (
auto LiveOutIt = LiveOuts.find(
Reg); LiveOutIt != LiveOuts.end())
2906 Mask =
DAG.RegionLiveOuts.getLiveRegsForRegionIdx(DefRegion).at(
Reg);
2909bool PreRARematStage::RematReg::maybeBeneficial(
2912 for (
unsigned I : TargetRegions.
set_bits()) {
2913 if (Live[
I] && RPTargets[
I].isSaveBeneficial(
Reg))
2919void PreRARematStage::RematReg::insertMI(
unsigned RegionIdx,
2920 MachineInstr *RematMI,
2921 GCNScheduleDAGMILive &
DAG)
const {
2924 Bounds.first = RematMI;
2925 DAG.LIS->InsertMachineInstrInMaps(*RematMI);
2931 assert(
DAG.MLI &&
"MLI not defined in DAG");
2935 const unsigned NumRegions =
DAG.Regions.size();
2939 for (
unsigned I = 0;
I < NumRegions; ++
I) {
2943 if (BlockFreq && BlockFreq <
MinFreq)
2952 if (
MinFreq >= ScaleFactor * ScaleFactor) {
2954 Freq /= ScaleFactor;
2960PreRARematStage::ScoredRemat::ScoredRemat(RematReg *Remat,
const FreqInfo &Freq,
2962 : Remat(Remat), FreqDiff(getFreqDiff(Freq)) {
2966int64_t PreRARematStage::ScoredRemat::getFreqDiff(
const FreqInfo &Freq)
const {
2974 int64_t DefOrMin = std::max(Freq.Regions[Remat->DefRegion], Freq.MinFreq);
2975 int64_t UseOrMax = Freq.Regions[Remat->UseRegion];
2977 UseOrMax = Freq.MaxFreq;
2978 return DefOrMin - UseOrMax;
2981void PreRARematStage::ScoredRemat::update(
const BitVector &TargetRegions,
2983 const FreqInfo &FreqInfo,
2987 for (
unsigned I : TargetRegions.
set_bits()) {
2988 if (!Remat->Live[
I])
2996 if (!NumRegsBenefit)
2999 bool UnusedLT = Remat->isUnusedLiveThrough(
I);
3002 RegionImpact += (UnusedLT ? 2 : 1) * NumRegsBenefit;
3011 MaxFreq = std::max(MaxFreq, Freq);
3016MachineInstr *PreRARematStage::ScoredRemat::rematerialize(
3017 BitVector &RecomputeRP, SmallVectorImpl<GCNRPTarget> &RPTargets,
3018 GCNScheduleDAGMILive &
DAG)
const {
3020 MachineInstr &
DefMI = *Remat->DefMI;
3026 TII->reMaterialize(*InsertPos->getParent(), InsertPos, NewReg, 0,
DefMI);
3027 MachineInstr *RematMI = &*std::prev(InsertPos);
3028 Remat->UseMI->substituteRegister(
Reg, NewReg, 0, *
DAG.
TRI);
3029 Remat->insertMI(Remat->UseRegion, RematMI,
DAG);
3031#ifdef EXPENSIVE_CHECKS
3034 for (MachineOperand &MO :
DefMI.operands()) {
3035 if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
3047 LaneBitmask LiveInMask =
DAG.LiveIns[Remat->UseRegion].at(
UseReg);
3048 LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);
3052 if (UncoveredLanes.
any()) {
3054 for (LiveInterval::SubRange &SR : LI.
subranges())
3055 assert((SR.LaneMask & UncoveredLanes).none());
3064 for (
unsigned I : Remat->Live.set_bits()) {
3065 RPTargets[
I].saveRP(RPSave);
3068 if (!Remat->isUnusedLiveThrough(
I))
3075void PreRARematStage::commitRematerializations()
const {
3077 for (
const RollbackInfo &Rollback : Rollbacks)
3078 DAG.deleteMI(Rollback.Remat->DefRegion, Rollback.Remat->DefMI);
3081void PreRARematStage::unsetSatisfiedRPTargets(
const BitVector &Regions) {
3083 if (TargetRegions[
I] && RPTargets[
I].satisfied()) {
3090bool PreRARematStage::updateAndVerifyRPTargets(
const BitVector &Regions) {
3091 bool TooOptimistic =
false;
3093 GCNRPTarget &
Target = RPTargets[
I];
3099 if (!TargetRegions[
I] && !
Target.satisfied()) {
3101 TooOptimistic =
true;
3102 TargetRegions.
set(
I);
3105 return TooOptimistic;
3109bool PreRARematStage::isReMaterializable(
const MachineInstr &
MI) {
3113 for (
const MachineOperand &MO :
MI.all_uses()) {
3116 if (MO.getReg().isPhysical()) {
3135 if (
DAG.MinOccupancy >= *TargetOcc) {
3136 commitRematerializations();
3143 const bool ShouldRollbackRemats = AchievedOcc < *TargetOcc;
3148 if (ShouldRollbackRemats) {
3149 for (
const RollbackInfo &Rollback : Rollbacks) {
3150 const auto &[Remat, RematMI, RegMap] = Rollback;
3151 Remat->DefMI->setDesc(
DAG.
TII->
get(RematMI->getOpcode()));
3152 for (
const auto &[MOIdx,
Reg] : RegMap)
3153 Remat->DefMI->getOperand(MOIdx).setReg(
Reg);
3158 for (
const auto &[
RegionIdx, OrigMIOrder, MaxPressure] : RegionReverts) {
3165 if (!ShouldRollbackRemats) {
3166 commitRematerializations();
3167 DAG.setTargetOccupancy(AchievedOcc);
3172 DAG.setTargetOccupancy(*TargetOcc - 1);
3177 BitVector RecomputeRP(
DAG.Regions.
size());
3178 DenseSet<Register> RecomputeLI;
3179 for (
const RollbackInfo &Rollback : Rollbacks) {
3180 const auto &[Remat, RematMI, RegMap] = Rollback;
3185 Register OriginalReg = Remat->DefMI->getOperand(0).getReg();
3186 Remat->UseMI->substituteRegister(
Reg, OriginalReg, 0, *
DAG.
TRI);
3188 <<
"] Deleting rematerialization " << *RematMI);
3189 DAG.deleteMI(Remat->UseRegion, RematMI);
3193 std::pair<Register, LaneBitmask> LiveReg(OriginalReg, Remat->Mask);
3194 for (
unsigned I : Remat->LiveIn.set_bits())
3195 DAG.LiveIns[
I].insert(LiveReg);
3196 for (
unsigned I : Remat->LiveOut.set_bits())
3199 RecomputeRP |= Rollback.Remat->Live;
3202 for (MachineOperand &MO : Rollback.Remat->DefMI->operands()) {
3203 if (MO.isReg() && MO.getReg().isVirtual())
3204 RecomputeLI.
insert(MO.getReg());
3211#ifdef EXPENSIVE_CHECKS
3216 for (
unsigned I : RecomputeRP.
set_bits())
3217 DAG.Pressure[
I] =
DAG.getRealRegPressure(
I);
3222void GCNScheduleDAGMILive::deleteMI(
unsigned RegionIdx, MachineInstr *
MI) {
3229 MI->eraseFromParent();
3232void GCNScheduleDAGMILive::setTargetOccupancy(
unsigned TargetOccupancy) {
3233 MinOccupancy = TargetOccupancy;
3254 if (HasIGLPInstrs) {
3255 SavedMutations.clear();
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static SUnit * pickOnlyChoice(SchedBoundary &Zone)
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
static cl::opt< bool > GCNTrackers("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false))
static cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))
#define REMAT_PREFIX
Allows to easily filter for this stage's debug output.
static MachineInstr * getLastMIForRegion(MachineBasicBlock::iterator RegionBegin, MachineBasicBlock::iterator RegionEnd)
static bool shouldCheckPending(SchedBoundary &Zone, const TargetSchedModel *SchedModel)
static cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))
static cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))
static void printScheduleModel(std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)
static cl::opt< bool > PrintMaxRPRegUsageAfterScheduler("amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure after scheduling."), cl::init(false))
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG)
static cl::opt< bool > DisableRewriteMFMAFormSchedStage("amdgpu-disable-rewrite-mfma-form-sched-stage", cl::Hidden, cl::desc("Disable rewrite mfma rewrite scheduling stage"), cl::init(true))
static bool canUsePressureDiffs(const SUnit &SU)
Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.
static cl::opt< unsigned > PendingQueueLimit("amdgpu-scheduler-pending-queue-limit", cl::Hidden, cl::desc("Max (Available+Pending) size to inspect pending queue (0 disables)"), cl::init(256))
static cl::opt< bool > PrintMaxRPRegUsageBeforeScheduler("amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure before scheduling."), cl::init(false))
static cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
static constexpr std::pair< StringLiteral, StringLiteral > ReplaceMap[]
A common definition of LaneBitmask for use in TableGen and CodeGen.
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static constexpr unsigned SM(unsigned Version)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
iterator_range< const_set_bits_iterator > set_bits() const
size_type size() const
size - Returns the number of bits in this bitvector.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
bool initGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
GCNRegPressure bumpDownwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI) const
Mostly copy/paste from CodeGen/RegisterPressure.cpp Calculate the impact MI will have on CurPressure ...
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
Models a register pressure target, allowing to evaluate and track register savings against that targe...
unsigned getNumRegsBenefit(const GCNRegPressure &SaveRP) const
Returns the benefit towards achieving the RP target that saving SaveRP represents,...
GCNRegPressure getPressure() const
DenseMap< unsigned, LaneBitmask > LiveRegSet
virtual bool initGCNRegion()
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
virtual void finalizeGCNRegion()
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
void modifyRegionSchedule(unsigned RegionIdx, MachineBasicBlock *MBB, ArrayRef< MachineInstr * > MIOrder)
Sets the schedule of region RegionIdx in block MBB to MIOrder.
MachineBasicBlock * CurrentMBB
This is a minimal scheduler strategy.
GCNDownwardRPTracker DownwardTracker
bool useGCNTrackers() const
void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
unsigned SGPRCriticalLimit
std::vector< unsigned > MaxPressure
bool hasNextStage() const
SUnit * pickNodeBidirectional(bool &IsTopNode, bool &PickedPending)
GCNSchedStageID getCurrentStage()
bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Evaluates instructions in the pending queue using a subset of scheduling heuristics.
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
unsigned VGPRCriticalLimit
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
std::optional< bool > GCNTrackersOverride
GCNDownwardRPTracker * getDownwardTracker()
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
GCNUpwardRPTracker UpwardTracker
void printCandidateDecision(const SchedCandidate &Current, const SchedCandidate &Preferred)
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool &IsPending, bool IsBottomUp)
unsigned getStructuralStallCycles(SchedBoundary &Zone, SUnit *SU) const
Estimate how many cycles SU must wait due to structural hazards at the current boundary cycle.
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
GCNUpwardRPTracker * getUpwardTracker()
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
void recede(const MachineInstr &MI)
Move to the state of RP just before the MI .
void traceCandidate(const SchedCandidate &Cand)
LLVM_ABI void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
MachineSchedPolicy RegionPolicy
const TargetSchedModel * SchedModel
const MachineSchedContext * Context
const TargetRegisterInfo * TRI
SchedCandidate BotCand
Candidate last picked from Bot boundary.
SchedCandidate TopCand
Candidate last picked from Top boundary.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
GenericScheduler(const MachineSchedContext *C)
bool shouldRevertScheduling(unsigned WavesAfter) override
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
void removeInterval(Register Reg)
Interval removal.
LLVM_ABI void dump() const
MachineBasicBlock * getMBBFromIndex(SlotIndex index) const
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
succ_iterator succ_begin()
unsigned succ_size() const
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
LLVM_ABI BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
LLVM_ABI BlockFrequency getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, raw_ostream *OS=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI LaneBitmask getMaxLaneMaskForVReg(Register Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
LLVM_ABI bool isConstantPhysReg(MCRegister PhysReg) const
Returns true if PhysReg is unallocatable and constant throughout the function.
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
void finalizeGCNRegion() override
bool initGCNRegion() override
bool initGCNSchedStage() override
Capture a change in pressure for a single pressure set.
Simple wrapper around std::function<void(raw_ostream&)>.
Helpers for implementing custom MachineSchedStrategy classes.
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void advance()
Advance across the current instruction.
LLVM_ABI void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
LLVM_ABI void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
RegionT * getParent() const
Get the parent of the Region.
GCNRPTracker::LiveRegSet & getLiveRegsForRegionIdx(unsigned RegionIdx)
List of registers defined and used by a machine instruction.
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
LLVM_ABI void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
LLVM_ABI void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
bool isIGLPMutationOnly(unsigned Opcode) const
static bool isMAI(const MCInstrDesc &Desc)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getOccupancy() const
void increaseOccupancy(const MachineFunction &MF, unsigned Limit)
unsigned getDynamicVGPRBlockSize() const
unsigned getMinAllowedOccupancy() const
void limitOccupancy(const MachineFunction &MF)
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
unsigned NodeNum
Entry # of node in the node vector.
unsigned short Latency
Node latency.
bool isScheduled
True once scheduled.
unsigned ParentClusterIdx
The parent cluster id.
unsigned BotReadyCycle
Cycle relative to end when node is ready.
bool hasReservedResource
Uses a reserved resource.
bool isBottomReady() const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
LLVM_ABI void releasePending()
Release pending ready nodes in to the available queue.
LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
LLVM_ABI SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
ScheduleHazardRecognizer * HazardRec
LLVM_ABI void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LLVM_ABI bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
LLVM_ABI std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource can be scheduled.
A ScheduleDAG for scheduling lists of MachineInstr.
bool ScheduleSingleMIRegions
True if regions with a single MI should be scheduled.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
const MachineLoopInfo * MLI
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
RegPressureTracker RPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
const TargetRegisterInfo * TRI
Target processor register info.
MachineFunction & MF
Machine function.
static const unsigned ScaleFactor
unsigned getMetric() const
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
SlotIndex - An opaque wrapper around machine indexes.
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
static bool isEarlierInstr(SlotIndex A, SlotIndex B)
isEarlierInstr - Return true if A refers to an instruction earlier than B.
SlotIndex getPrevSlot() const
Returns the previous slot in the index list.
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
bool isReMaterializable(const MachineInstr &MI) const
Return true if the instruction would be materializable at a point in the containing function where al...
virtual bool isIgnorableUse(const MachineOperand &MO) const
Given MO is a PhysReg use return if it can be ignored for the purpose of instruction rematerializatio...
uint8_t getCopyCost() const
Return the cost of copying a value between two registers in this class.
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx.
Provide an instruction scheduling machine model to CodeGen passes.
LLVM_ABI bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
bool initGCNSchedStage() override
bool initGCNRegion() override
void finalizeGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
VNInfo - Value Number Information.
SlotIndex def
The index of the defining instruction.
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
static bool allUsesAvailableAt(const MachineInstr *MI, SlotIndex UseIdx, const LiveIntervals &LIS, const MachineRegisterInfo &MRI, const TargetInstrInfo &TII)
std::pair< iterator, bool > insert(const ValueT &V)
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
LLVM_READONLY int32_t getMFMASrcCVDstAGPROp(uint32_t Opcode)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)
Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > RegionBoundaries
A region's boundaries i.e.
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
@ UnclusteredHighRPReschedule
@ MemoryClauseInitialSchedule
@ ClusteredLowOccupancyReschedule
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
LLVM_ABI cl::opt< bool > VerifyScheduling
LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool isTheSameCluster(unsigned A, unsigned B)
Return whether the input cluster ID's are the same and valid.
DWARFExpression::Operation Op
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
LLVM_ABI void dumpMaxRegPressure(MachineFunction &MF, GCNRegPressure::RegKind Kind, LiveIntervals &LIS, const MachineLoopInfo *MLI)
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Implement std::hash so that hash_code can be used in STL containers.
bool operator()(std::pair< MachineInstr *, unsigned > A, std::pair< MachineInstr *, unsigned > B) const
unsigned getArchVGPRNum() const
unsigned getAGPRNum() const
unsigned getSGPRNum() const
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void setBest(SchedCandidate &Best)
void reset(const CandPolicy &NewPolicy)
LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SchedResourceDelta ResDelta
Status of an instruction's critical resource consumption.
unsigned DemandedResources
constexpr bool any() const
static constexpr LaneBitmask getNone()
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Execution frequency information required by scoring heuristics.
SmallVector< uint64_t > Regions
Per-region execution frequencies. 0 when unknown.
uint64_t MinFreq
Minimum and maximum observed frequencies.
FreqInfo(MachineFunction &MF, const GCNScheduleDAGMILive &DAG)
PressureChange CriticalMax
PressureChange CurrentMax