46#define DEBUG_TYPE "machine-scheduler"
51 "amdgpu-disable-unclustered-high-rp-reschedule",
cl::Hidden,
52 cl::desc(
"Disable unclustered high register pressure "
53 "reduction scheduling stage."),
57 "amdgpu-disable-clustered-low-occupancy-reschedule",
cl::Hidden,
58 cl::desc(
"Disable clustered low occupancy "
59 "rescheduling for ILP scheduling stage."),
65 "Sets the bias which adds weight to occupancy vs latency. Set it to "
66 "100 to chase the occupancy only."),
71 cl::desc(
"Relax occupancy targets for kernels which are memory "
72 "bound (amdgpu-membound-threshold), or "
73 "Wave Limited (amdgpu-limit-wave-threshold)."),
78 cl::desc(
"Use the AMDGPU specific RPTrackers during scheduling"),
82 "amdgpu-scheduler-pending-queue-limit",
cl::Hidden,
84 "Max (Available+Pending) size to inspect pending queue (0 disables)"),
87#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
88#define DUMP_MAX_REG_PRESSURE
90 "amdgpu-print-max-reg-pressure-regusage-before-scheduler",
cl::Hidden,
91 cl::desc(
"Print a list of live registers along with their def/uses at the "
92 "point of maximum register pressure before scheduling."),
96 "amdgpu-print-max-reg-pressure-regusage-after-scheduler",
cl::Hidden,
97 cl::desc(
"Print a list of live registers along with their def/uses at the "
98 "point of maximum register pressure after scheduling."),
103 "amdgpu-disable-rewrite-mfma-form-sched-stage",
cl::Hidden,
123 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
125 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
147 "VGPRCriticalLimit calculation method.\n");
151 unsigned Addressable =
154 VGPRBudget = std::max(VGPRBudget, Granule);
192 if (!
Op.isReg() ||
Op.isImplicit())
194 if (
Op.getReg().isPhysical() ||
195 (
Op.isDef() &&
Op.getSubReg() != AMDGPU::NoSubRegister))
230 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
238 if (!Zone.
isTop() || !SU)
255 if (NextAvail > CurrCycle)
256 Stall = std::max(
Stall, NextAvail - CurrCycle);
276 unsigned SGPRPressure,
277 unsigned VGPRPressure,
bool IsBottomUp) {
281 if (!
DAG->isTrackingPressure())
304 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
305 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
307 for (
const auto &Diff :
DAG->getPressureDiff(SU)) {
313 (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());
316#ifdef EXPENSIVE_CHECKS
317 std::vector<unsigned> CheckPressure, CheckMaxPressure;
320 if (
Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
321 CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
322 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
323 CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
324 errs() <<
"Register Pressure is inaccurate when calculated through "
326 <<
"SGPR got " <<
Pressure[AMDGPU::RegisterPressureSets::SReg_32]
328 << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
"\n"
329 <<
"VGPR got " <<
Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
331 << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
"\n";
337 unsigned NewSGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
338 unsigned NewVGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
348 const unsigned MaxVGPRPressureInc = 16;
349 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >=
VGPRExcessLimit;
350 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >=
SGPRExcessLimit;
381 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
383 if (SGPRDelta > VGPRDelta) {
397 bool HasBufferedModel =
416 dbgs() <<
"Prefer:\t\t";
417 DAG->dumpNode(*Preferred.
SU);
421 DAG->dumpNode(*Current.
SU);
424 dbgs() <<
"Reason:\t\t";
438 unsigned SGPRPressure = 0;
439 unsigned VGPRPressure = 0;
441 if (
DAG->isTrackingPressure()) {
443 SGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
444 VGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
449 SGPRPressure =
T->getPressure().getSGPRNum();
450 VGPRPressure =
T->getPressure().getArchVGPRNum();
455 for (
SUnit *SU : AQ) {
459 VGPRPressure, IsBottomUp);
479 for (
SUnit *SU : PQ) {
483 VGPRPressure, IsBottomUp);
503 bool &PickedPending) {
523 bool BotPending =
false;
543 "Last pick result should correspond to re-picking right now");
548 bool TopPending =
false;
568 "Last pick result should correspond to re-picking right now");
578 PickedPending = BotPending && TopPending;
581 if (BotPending || TopPending) {
588 Cand.setBest(TryCand);
593 IsTopNode = Cand.AtTop;
600 if (
DAG->top() ==
DAG->bottom()) {
602 Bot.Available.empty() &&
Bot.Pending.empty() &&
"ReadyQ garbage");
608 PickedPending =
false;
642 if (ReadyCycle > CurrentCycle)
714 if (
DAG->isTrackingPressure() &&
720 if (
DAG->isTrackingPressure() &&
725 bool SameBoundary = Zone !=
nullptr;
749 if (IsLegacyScheduler)
768 if (
DAG->isTrackingPressure() &&
778 bool SameBoundary = Zone !=
nullptr;
813 bool CandIsClusterSucc =
815 bool TryCandIsClusterSucc =
817 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
822 if (
DAG->isTrackingPressure() &&
828 if (
DAG->isTrackingPressure() &&
874 if (
DAG->isTrackingPressure()) {
890 bool CandIsClusterSucc =
892 bool TryCandIsClusterSucc =
894 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
903 bool SameBoundary = Zone !=
nullptr;
920 if (TryMayLoad || CandMayLoad) {
921 bool TryLongLatency =
923 bool CandLongLatency =
927 Zone->
isTop() ? CandLongLatency : TryLongLatency, TryCand,
945 if (
DAG->isTrackingPressure() &&
964 !
Rem.IsAcyclicLatencyLimited &&
tryLatency(TryCand, Cand, *Zone))
982 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy),
983 RegionLiveOuts(this,
true) {
989 LLVM_DEBUG(
dbgs() <<
"Starting occupancy is " << StartingOccupancy <<
".\n");
991 MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy);
992 if (MinOccupancy != StartingOccupancy)
993 LLVM_DEBUG(
dbgs() <<
"Allowing Occupancy drops to " << MinOccupancy
998std::unique_ptr<GCNSchedStage>
1000 switch (SchedStageID) {
1002 return std::make_unique<OccInitialScheduleStage>(SchedStageID, *
this);
1004 return std::make_unique<RewriteMFMAFormStage>(SchedStageID, *
this);
1006 return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *
this);
1008 return std::make_unique<ClusteredLowOccStage>(SchedStageID, *
this);
1010 return std::make_unique<PreRARematStage>(SchedStageID, *
this);
1012 return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *
this);
1014 return std::make_unique<MemoryClauseInitialScheduleStage>(SchedStageID,
1028GCNScheduleDAGMILive::getRealRegPressure(
unsigned RegionIdx)
const {
1029 if (Regions[RegionIdx].first == Regions[RegionIdx].second)
1033 &LiveIns[RegionIdx]);
1039 assert(RegionBegin != RegionEnd &&
"Region must not be empty");
1043void GCNScheduleDAGMILive::computeBlockPressure(
unsigned RegionIdx,
1055 const MachineBasicBlock *OnlySucc =
nullptr;
1058 if (!Candidate->empty() && Candidate->pred_size() == 1) {
1059 SlotIndexes *Ind =
LIS->getSlotIndexes();
1061 OnlySucc = Candidate;
1066 size_t CurRegion = RegionIdx;
1067 for (
size_t E = Regions.size(); CurRegion !=
E; ++CurRegion)
1068 if (Regions[CurRegion].first->getParent() !=
MBB)
1073 auto LiveInIt = MBBLiveIns.find(
MBB);
1074 auto &Rgn = Regions[CurRegion];
1076 if (LiveInIt != MBBLiveIns.end()) {
1077 auto LiveIn = std::move(LiveInIt->second);
1079 MBBLiveIns.erase(LiveInIt);
1082 auto LRS = BBLiveInMap.lookup(NonDbgMI);
1083#ifdef EXPENSIVE_CHECKS
1092 if (Regions[CurRegion].first ==
I || NonDbgMI ==
I) {
1093 LiveIns[CurRegion] =
RPTracker.getLiveRegs();
1097 if (Regions[CurRegion].second ==
I) {
1098 Pressure[CurRegion] =
RPTracker.moveMaxPressure();
1099 if (CurRegion-- == RegionIdx)
1101 auto &Rgn = Regions[CurRegion];
1114 MBBLiveIns[OnlySucc] =
RPTracker.moveLiveRegs();
1119GCNScheduleDAGMILive::getRegionLiveInMap()
const {
1120 assert(!Regions.empty());
1121 std::vector<MachineInstr *> RegionFirstMIs;
1122 RegionFirstMIs.reserve(Regions.size());
1124 RegionFirstMIs.push_back(
1131GCNScheduleDAGMILive::getRegionLiveOutMap()
const {
1132 assert(!Regions.empty());
1133 std::vector<MachineInstr *> RegionLastMIs;
1134 RegionLastMIs.reserve(Regions.size());
1145 IdxToInstruction.clear();
1148 IsLiveOut ? DAG->getRegionLiveOutMap() : DAG->getRegionLiveInMap();
1149 for (
unsigned I = 0;
I < DAG->Regions.size();
I++) {
1150 auto &[RegionBegin, RegionEnd] = DAG->Regions[
I];
1152 if (RegionBegin == RegionEnd)
1156 IdxToInstruction[
I] = RegionKey;
1164 LiveIns.resize(Regions.size());
1165 Pressure.resize(Regions.size());
1166 RegionsWithHighRP.resize(Regions.size());
1167 RegionsWithExcessRP.resize(Regions.size());
1168 RegionsWithIGLPInstrs.resize(Regions.size());
1169 RegionsWithHighRP.reset();
1170 RegionsWithExcessRP.reset();
1171 RegionsWithIGLPInstrs.reset();
1176void GCNScheduleDAGMILive::runSchedStages() {
1177 LLVM_DEBUG(
dbgs() <<
"All regions recorded, starting actual scheduling.\n");
1180 if (!Regions.
empty()) {
1181 BBLiveInMap = getRegionLiveInMap();
1186#ifdef DUMP_MAX_REG_PRESSURE
1196 if (!Stage->initGCNSchedStage())
1199 for (
auto Region : Regions) {
1203 if (!Stage->initGCNRegion()) {
1204 Stage->advanceRegion();
1210 const unsigned RegionIdx = Stage->getRegionIdx();
1213 MRI, RegionLiveOuts.getLiveRegsForRegionIdx(RegionIdx));
1217 Stage->finalizeGCNRegion();
1218 Stage->advanceRegion();
1222 Stage->finalizeGCNSchedStage();
1225#ifdef DUMP_MAX_REG_PRESSURE
1238 OS <<
"Max Occupancy Initial Schedule";
1241 OS <<
"Instruction Rewriting Reschedule";
1244 OS <<
"Unclustered High Register Pressure Reschedule";
1247 OS <<
"Clustered Low Occupancy Reschedule";
1250 OS <<
"Pre-RA Rematerialize";
1253 OS <<
"Max ILP Initial Schedule";
1256 OS <<
"Max memory clause Initial Schedule";
1276void RewriteMFMAFormStage::findReachingDefs(
1298 while (!Worklist.
empty()) {
1313 for (MachineBasicBlock *PredMBB : DefMBB->
predecessors()) {
1314 if (Visited.
insert(PredMBB).second)
1320void RewriteMFMAFormStage::findReachingUses(
1324 for (MachineOperand &UseMO :
1327 findReachingDefs(UseMO, LIS, ReachingDefIndexes);
1331 if (
any_of(ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) {
1343 if (!
ST.hasGFX90AInsts() ||
MFI.getMinWavesPerEU() > 1)
1346 RegionsWithExcessArchVGPR.resize(
DAG.Regions.size());
1347 RegionsWithExcessArchVGPR.reset();
1351 RegionsWithExcessArchVGPR[
Region] =
true;
1354 if (RegionsWithExcessArchVGPR.none())
1357 TII =
ST.getInstrInfo();
1358 SRI =
ST.getRegisterInfo();
1360 std::vector<std::pair<MachineInstr *, unsigned>> RewriteCands;
1364 if (!initHeuristics(RewriteCands, CopyForUse, CopyForDef))
1367 int64_t
Cost = getRewriteCost(RewriteCands, CopyForUse, CopyForDef);
1374 return rewrite(RewriteCands);
1384 if (
DAG.RegionsWithHighRP.none() &&
DAG.RegionsWithExcessRP.none())
1391 InitialOccupancy =
DAG.MinOccupancy;
1394 TempTargetOccupancy =
MFI.getMaxWavesPerEU() >
DAG.MinOccupancy
1395 ? InitialOccupancy + 1
1397 IsAnyRegionScheduled =
false;
1398 S.SGPRLimitBias =
S.HighRPSGPRBias;
1399 S.VGPRLimitBias =
S.HighRPVGPRBias;
1403 <<
"Retrying function scheduling without clustering. "
1404 "Aggressively try to reduce register pressure to achieve occupancy "
1405 << TempTargetOccupancy <<
".\n");
1420 if (
DAG.StartingOccupancy <=
DAG.MinOccupancy)
1424 dbgs() <<
"Retrying function scheduling with lowest recorded occupancy "
1425 <<
DAG.MinOccupancy <<
".\n");
1430#define REMAT_PREFIX "[PreRARemat] "
1431#define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)
1433#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1434Printable PreRARematStage::ScoredRemat::print()
const {
1436 OS <<
'(' << MaxFreq <<
", " << FreqDiff <<
", " << RegionImpact <<
')';
1451 auto PrintTargetRegions = [&]() ->
void {
1452 if (TargetRegions.none()) {
1457 for (
unsigned I : TargetRegions.set_bits())
1464 dbgs() <<
"Analyzing ";
1465 MF.getFunction().printAsOperand(
dbgs(),
false);
1468 if (!setObjective()) {
1469 LLVM_DEBUG(
dbgs() <<
"no objective to achieve, occupancy is maximal at "
1470 <<
MFI.getMaxWavesPerEU() <<
'\n');
1475 dbgs() <<
"increase occupancy from " << *TargetOcc - 1 <<
'\n';
1477 dbgs() <<
"reduce spilling (minimum target occupancy is "
1478 <<
MFI.getMinWavesPerEU() <<
")\n";
1480 PrintTargetRegions();
1485 DAG.RegionLiveOuts.buildLiveRegMap();
1487 if (!Remater.analyze()) {
1501 for (
unsigned RegIdx = 0, E = Remater.getNumRegs(); RegIdx < E; ++RegIdx) {
1505 unsigned NumUsers = 0;
1506 for (
const auto &[
_, RegionUses] : CandReg.
Uses)
1507 NumUsers += RegionUses.size();
1521 return MarkedRegs.contains(MO.getReg());
1528 SlotIndex UseIdx =
DAG.LIS->getInstructionIndex(*UseMI).getRegSlot(
true);
1535 Cand.init(RegIdx, FreqInfo, Remater,
DAG);
1536 Cand.update(TargetRegions, RPTargets, FreqInfo, !TargetOcc);
1537 if (!Cand.hasNullScore())
1548 Rollback = std::make_unique<RollbackSupport>(Remater);
1555 RecomputeRP.
reset();
1558 sort(CandidateOrder, [&](
unsigned LHSIndex,
unsigned RHSIndex) {
1559 return Candidates[LHSIndex] < Candidates[RHSIndex];
1563 dbgs() <<
"==== NEW REMAT ROUND ====\n"
1565 <<
"Candidates with non-null score, in rematerialization order:\n";
1566 for (
const ScoredRemat &Cand :
reverse(Candidates)) {
1568 << Remater.printRematReg(Cand.RegIdx) <<
'\n';
1570 PrintTargetRegions();
1576 while (!CandidateOrder.
empty()) {
1577 const ScoredRemat &Cand = Candidates[CandidateOrder.
back()];
1586 if (!Cand.maybeBeneficial(TargetRegions, RPTargets)) {
1588 << Cand.print() <<
" | "
1589 << Remater.printRematReg(Cand.RegIdx));
1594#ifdef EXPENSIVE_CHECKS
1598 if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
1608 LM =
DAG.TRI->getSubRegIndexLaneMask(MO.getSubReg());
1610 const unsigned UseRegion = Reg.Uses.begin()->first;
1612 LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);
1616 if (UncoveredLanes.
any()) {
1619 assert((SR.LaneMask & UncoveredLanes).none());
1626 REMAT_DEBUG(
dbgs() <<
"** REMAT " << Remater.printRematReg(Cand.RegIdx)
1628 removeFromLiveMaps(Reg.getDefReg(), Cand.LiveIn, Cand.LiveOut);
1630 Rollback->LiveMapUpdates.emplace_back(Cand.RegIdx, Cand.LiveIn,
1633 Cand.rematerialize(Remater);
1638 updateRPTargets(Cand.Live, Cand.RPSave);
1639 RecomputeRP |= Cand.UnpredictableRPSave;
1640 RescheduleRegions |= Cand.Live;
1641 if (!TargetRegions.any()) {
1647 if (!updateAndVerifyRPTargets(RecomputeRP) && !TargetRegions.any()) {
1656 unsigned NumUsefulCandidates = 0;
1657 for (
unsigned CandIdx : CandidateOrder) {
1658 ScoredRemat &Candidate = Candidates[CandIdx];
1659 Candidate.update(TargetRegions, RPTargets, FreqInfo, !TargetOcc);
1660 if (!Candidate.hasNullScore())
1661 CandidateOrder[NumUsefulCandidates++] = CandIdx;
1663 if (NumUsefulCandidates == 0) {
1664 REMAT_DEBUG(
dbgs() <<
"Stop on exhausted rematerialization candidates\n");
1667 CandidateOrder.truncate(NumUsefulCandidates);
1670 if (RescheduleRegions.none())
1676 unsigned DynamicVGPRBlockSize =
MFI.getDynamicVGPRBlockSize();
1677 for (
unsigned I : RescheduleRegions.set_bits()) {
1678 DAG.Pressure[
I] = RPTargets[
I].getCurrentRP();
1680 <<
DAG.Pressure[
I].getOccupancy(
ST, DynamicVGPRBlockSize)
1681 <<
" (" << RPTargets[
I] <<
")\n");
1683 AchievedOcc =
MFI.getMaxWavesPerEU();
1686 std::min(AchievedOcc, RP.getOccupancy(
ST, DynamicVGPRBlockSize));
1690 dbgs() <<
"Retrying function scheduling with new min. occupancy of "
1691 << AchievedOcc <<
" from rematerializing (original was "
1692 <<
DAG.MinOccupancy;
1694 dbgs() <<
", target was " << *TargetOcc;
1698 DAG.setTargetOccupancy(getStageTargetOccupancy());
1709 S.SGPRLimitBias =
S.VGPRLimitBias = 0;
1710 if (
DAG.MinOccupancy > InitialOccupancy) {
1711 assert(IsAnyRegionScheduled);
1713 <<
" stage successfully increased occupancy to "
1714 <<
DAG.MinOccupancy <<
'\n');
1715 }
else if (!IsAnyRegionScheduled) {
1716 assert(
DAG.MinOccupancy == InitialOccupancy);
1718 <<
": No regions scheduled, min occupancy stays at "
1719 <<
DAG.MinOccupancy <<
", MFI occupancy stays at "
1720 <<
MFI.getOccupancy() <<
".\n");
1728 if (
DAG.begin() ==
DAG.end())
1735 unsigned NumRegionInstrs = std::distance(
DAG.begin(),
DAG.end());
1739 if (
DAG.begin() == std::prev(
DAG.end()))
1745 <<
"\n From: " << *
DAG.begin() <<
" To: ";
1747 else dbgs() <<
"End";
1748 dbgs() <<
" RegionInstrs: " << NumRegionInstrs <<
'\n');
1756 for (
auto &
I :
DAG) {
1769 dbgs() <<
"Pressure before scheduling:\nRegion live-ins:"
1771 <<
"Region live-in pressure: "
1775 S.HasHighPressure =
false;
1797 unsigned DynamicVGPRBlockSize =
DAG.MFI.getDynamicVGPRBlockSize();
1800 unsigned CurrentTargetOccupancy =
1801 IsAnyRegionScheduled ?
DAG.MinOccupancy : TempTargetOccupancy;
1803 (CurrentTargetOccupancy <= InitialOccupancy ||
1804 DAG.Pressure[
RegionIdx].getOccupancy(
ST, DynamicVGPRBlockSize) !=
1811 if (!IsAnyRegionScheduled && IsSchedulingThisRegion) {
1812 IsAnyRegionScheduled =
true;
1813 if (
MFI.getMaxWavesPerEU() >
DAG.MinOccupancy)
1814 DAG.setTargetOccupancy(TempTargetOccupancy);
1816 return IsSchedulingThisRegion;
1832 return !RevertAllRegions && RescheduleRegions[
RegionIdx] &&
1852 if (
S.HasHighPressure)
1873 if (
DAG.MinOccupancy < *TargetOcc) {
1875 <<
" cannot meet occupancy target, interrupting "
1876 "re-scheduling in all regions\n");
1877 RevertAllRegions =
true;
1888 unsigned DynamicVGPRBlockSize =
DAG.MFI.getDynamicVGPRBlockSize();
1899 unsigned TargetOccupancy = std::min(
1900 S.getTargetOccupancy(),
ST.getOccupancyWithWorkGroupSizes(
MF).second);
1901 unsigned WavesAfter = std::min(
1902 TargetOccupancy,
PressureAfter.getOccupancy(
ST, DynamicVGPRBlockSize));
1903 unsigned WavesBefore = std::min(
1905 LLVM_DEBUG(
dbgs() <<
"Occupancy before scheduling: " << WavesBefore
1906 <<
", after " << WavesAfter <<
".\n");
1912 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
1916 if (WavesAfter < WavesBefore && WavesAfter <
DAG.MinOccupancy &&
1917 WavesAfter >=
MFI.getMinAllowedOccupancy()) {
1918 LLVM_DEBUG(
dbgs() <<
"Function is memory bound, allow occupancy drop up to "
1919 <<
MFI.getMinAllowedOccupancy() <<
" waves\n");
1920 NewOccupancy = WavesAfter;
1923 if (NewOccupancy <
DAG.MinOccupancy) {
1924 DAG.MinOccupancy = NewOccupancy;
1925 MFI.limitOccupancy(
DAG.MinOccupancy);
1927 <<
DAG.MinOccupancy <<
".\n");
1931 unsigned MaxVGPRs =
ST.getMaxNumVGPRs(
MF);
1934 unsigned MaxArchVGPRs = std::min(MaxVGPRs,
ST.getAddressableNumArchVGPRs());
1935 unsigned MaxSGPRs =
ST.getMaxNumSGPRs(
MF);
1959 unsigned ReadyCycle = CurrCycle;
1960 for (
auto &
D : SU.
Preds) {
1961 if (
D.isAssignedRegDep()) {
1964 unsigned DefReady = ReadyCycles[
DAG.getSUnit(
DefMI)->NodeNum];
1965 ReadyCycle = std::max(ReadyCycle, DefReady +
Latency);
1968 ReadyCycles[SU.
NodeNum] = ReadyCycle;
1975 std::pair<MachineInstr *, unsigned>
B)
const {
1976 return A.second <
B.second;
1982 if (ReadyCycles.empty())
1984 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
1985 dbgs() <<
"\n################## Schedule time ReadyCycles for MBB : " << BBNum
1986 <<
" ##################\n# Cycle #\t\t\tInstruction "
1990 for (
auto &
I : ReadyCycles) {
1991 if (
I.second > IPrev + 1)
1992 dbgs() <<
"****************************** BUBBLE OF " <<
I.second - IPrev
1993 <<
" CYCLES DETECTED ******************************\n\n";
1994 dbgs() <<
"[ " <<
I.second <<
" ] : " << *
I.first <<
"\n";
2007 unsigned SumBubbles = 0;
2009 unsigned CurrCycle = 0;
2010 for (
auto &SU : InputSchedule) {
2011 unsigned ReadyCycle =
2013 SumBubbles += ReadyCycle - CurrCycle;
2015 ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));
2017 CurrCycle = ++ReadyCycle;
2040 unsigned SumBubbles = 0;
2042 unsigned CurrCycle = 0;
2043 for (
auto &
MI :
DAG) {
2047 unsigned ReadyCycle =
2049 SumBubbles += ReadyCycle - CurrCycle;
2051 ReadyCyclesSorted.insert(std::make_pair(SU->
getInstr(), ReadyCycle));
2053 CurrCycle = ++ReadyCycle;
2070 if (WavesAfter <
DAG.MinOccupancy)
2074 if (
DAG.MFI.isDynamicVGPREnabled()) {
2076 ST,
DAG.MFI.getDynamicVGPRBlockSize(),
2080 if (BlocksAfter > BlocksBefore)
2117 <<
"\n\t *** In shouldRevertScheduling ***\n"
2118 <<
" *********** BEFORE UnclusteredHighRPStage ***********\n");
2122 <<
"\n *********** AFTER UnclusteredHighRPStage ***********\n");
2124 unsigned OldMetric = MBefore.
getMetric();
2125 unsigned NewMetric = MAfter.
getMetric();
2126 unsigned WavesBefore = std::min(
2127 S.getTargetOccupancy(),
2134 LLVM_DEBUG(
dbgs() <<
"\tMetric before " << MBefore <<
"\tMetric after "
2135 << MAfter <<
"Profit: " << Profit <<
"\n");
2166 unsigned WavesAfter) {
2173 LLVM_DEBUG(
dbgs() <<
"New pressure will result in more spilling.\n");
2185 "instruction number mismatch");
2186 if (MIOrder.
empty())
2199 if (MII != RegionEnd) {
2201 bool NonDebugReordered =
2202 !
MI->isDebugInstr() &&
2208 if (NonDebugReordered)
2209 DAG.LIS->handleMove(*
MI,
true);
2216 if (!
MI->isDebugInstr()) {
2218 SlotIndex PrevIdx =
DAG.LIS->getSlotIndexes()->getIndexBefore(*
MI);
2219 if (PrevIdx >= MIIdx)
2220 DAG.LIS->handleMove(*
MI,
true);
2224 if (
MI->isDebugInstr()) {
2231 Op.setIsUndef(
false);
2234 if (
DAG.ShouldTrackLaneMasks) {
2236 SlotIndex SlotIdx =
DAG.LIS->getInstructionIndex(*MI).getRegSlot();
2260 if (RD->
getOpcode() == AMDGPU::AV_MOV_B32_IMM_PSEUDO ||
2261 RD->
getOpcode() == AMDGPU::AV_MOV_B64_IMM_PSEUDO)
2268bool RewriteMFMAFormStage::hasUseRequiringVGPR(
2270 const SmallPtrSetImpl<MachineInstr *> &RewriteSet) {
2271 for (SlotIndex RDIdx : Src2ReachingDefs) {
2272 const MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIdx);
2274 findReachingUses(RD,
DAG.LIS, ReachingUses);
2275 for (
const MachineOperand *UseMO : ReachingUses) {
2287void RewriteMFMAFormStage::resetRewriteCandsToVGPR(
2288 ArrayRef<std::pair<MachineInstr *, unsigned>> RewriteCands) {
2289 for (
auto [
MI, OriginalOpcode] : RewriteCands) {
2291 const TargetRegisterClass *ADefRC =
2292 DAG.MRI.getRegClass(
MI->getOperand(0).getReg());
2293 const TargetRegisterClass *VDefRC = SRI->getEquivalentVGPRClass(ADefRC);
2294 DAG.MRI.setRegClass(
MI->getOperand(0).getReg(), VDefRC);
2295 MI->setDesc(
TII->get(OriginalOpcode));
2297 MachineOperand *Src2 =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src2);
2304 const TargetRegisterClass *AUseRC =
DAG.MRI.getRegClass(Src2->
getReg());
2305 const TargetRegisterClass *VUseRC = SRI->getEquivalentVGPRClass(AUseRC);
2306 DAG.MRI.setRegClass(Src2->
getReg(), VUseRC);
2310bool RewriteMFMAFormStage::isRewriteCandidate(MachineInstr *
MI)
const {
2311 if (!
static_cast<const SIInstrInfo *
>(
DAG.TII)->isMAI(*
MI))
2316 Register DstReg =
MI->getOperand(0).getReg();
2317 for (
const MachineOperand &Use :
DAG.MRI.use_nodbg_operands(DstReg)) {
2318 if (!
TII->isMAI(*
Use.getParent()) && !
Use.getParent()->isCopy())
2324bool RewriteMFMAFormStage::initHeuristics(
2325 std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
2326 DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
2327 SmallPtrSetImpl<MachineInstr *> &CopyForDef) {
2332 SmallPtrSet<MachineInstr *, 16> RewriteSet;
2333 DenseSet<Register> CandSrc2Regs;
2334 for (MachineBasicBlock &
MBB :
MF) {
2335 for (MachineInstr &
MI :
MBB) {
2336 if (!isRewriteCandidate(&
MI))
2339 MachineOperand *Src2 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src2);
2340 if (Src2 && Src2->
isReg())
2346 for (MachineBasicBlock &
MBB :
MF) {
2347 for (MachineInstr &
MI :
MBB) {
2348 if (!isRewriteCandidate(&
MI))
2352 assert(ReplacementOp != -1);
2354 RewriteCands.push_back({&
MI,
MI.getOpcode()});
2355 MI.setDesc(
TII->get(ReplacementOp));
2357 MachineOperand *Src2 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src2);
2358 if (Src2->
isReg()) {
2360 findReachingDefs(*Src2,
DAG.LIS, Src2ReachingDefs);
2364 bool Src2NeedsVGPR = hasUseRequiringVGPR(Src2ReachingDefs, RewriteSet);
2365 Src2NeedsVGPRCache[&
MI] = Src2NeedsVGPR;
2367 for (SlotIndex RDIdx : Src2ReachingDefs) {
2368 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIdx);
2369 if (!Src2NeedsVGPR &&
2376 MachineOperand &Dst =
MI.getOperand(0);
2379 findReachingUses(&
MI,
DAG.LIS, DstReachingUses);
2381 for (MachineOperand *RUOp : DstReachingUses) {
2382 MachineInstr *UserMI = RUOp->getParent();
2384 if (
TII->isMAI(*UserMI) && RewriteSet.
contains(UserMI))
2390 CopyForUse[UserMI->
getParent()].insert(RUOp->getReg());
2392 if (
TII->isMAI(*UserMI))
2396 findReachingDefs(*RUOp,
DAG.LIS, DstUsesReachingDefs);
2398 for (SlotIndex RDIndex : DstUsesReachingDefs) {
2399 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2400 if (
TII->isMAI(*RD))
2412 const TargetRegisterClass *VDefRC =
DAG.MRI.getRegClass(Dst.getReg());
2413 const TargetRegisterClass *ADefRC = SRI->getEquivalentAGPRClass(VDefRC);
2414 DAG.MRI.setRegClass(Dst.getReg(), ADefRC);
2415 if (Src2->
isReg()) {
2419 const TargetRegisterClass *VUseRC =
DAG.MRI.getRegClass(Src2->
getReg());
2420 const TargetRegisterClass *AUseRC = SRI->getEquivalentAGPRClass(VUseRC);
2421 DAG.MRI.setRegClass(Src2->
getReg(), AUseRC);
2430int64_t RewriteMFMAFormStage::getRewriteCost(
2431 ArrayRef<std::pair<MachineInstr *, unsigned>> RewriteCands,
2432 const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
2433 const SmallPtrSetImpl<MachineInstr *> &CopyForDef) {
2434 MachineBlockFrequencyInfo *MBFI =
DAG.MBFI;
2436 int64_t BestSpillCost = 0;
2440 std::pair<unsigned, unsigned> MaxVectorRegs =
2441 ST.getMaxNumVectorRegs(
MF.getFunction());
2442 unsigned ArchVGPRThreshold = MaxVectorRegs.first;
2443 unsigned AGPRThreshold = MaxVectorRegs.second;
2444 unsigned CombinedThreshold =
ST.getMaxNumVGPRs(
MF);
2447 if (!RegionsWithExcessArchVGPR[Region])
2452 MF, ArchVGPRThreshold, AGPRThreshold, CombinedThreshold);
2460 MF, ArchVGPRThreshold, AGPRThreshold, CombinedThreshold);
2462 uint64_t BlockFreq =
2466 bool RelativeFreqIsDenom = EntryFreq > BlockFreq;
2467 uint64_t RelativeFreq = EntryFreq && BlockFreq
2468 ? (RelativeFreqIsDenom ? EntryFreq / BlockFreq
2469 : BlockFreq / EntryFreq)
2474 int64_t SpillCost = ((int)SpillCostAfter - (int)SpillCostBefore) * 2;
2477 if (RelativeFreqIsDenom)
2478 SpillCost /= (int64_t)RelativeFreq;
2480 SpillCost *= (int64_t)RelativeFreq;
2483 if (SpillCost > 0) {
2484 resetRewriteCandsToVGPR(RewriteCands);
2488 if (SpillCost < BestSpillCost)
2489 BestSpillCost = SpillCost;
2494 Cost = BestSpillCost;
2497 unsigned CopyCost = 0;
2501 for (MachineInstr *
DefMI : CopyForDef) {
2508 const TargetRegisterClass *RC =
DAG.MRI.getRegClass(DefReg);
2513 for (
auto &[UseBlock, UseRegs] : CopyForUse) {
2518 const TargetRegisterClass *RC =
DAG.MRI.getRegClass(
UseReg);
2527 resetRewriteCandsToVGPR(RewriteCands);
2529 return Cost + CopyCost;
2532bool RewriteMFMAFormStage::rewrite(
2533 ArrayRef<std::pair<MachineInstr *, unsigned>> RewriteCands) {
2534 DenseMap<MachineInstr *, unsigned> FirstMIToRegion;
2535 DenseMap<MachineInstr *, unsigned> LastMIToRegion;
2543 if (
Entry.second !=
Entry.first->getParent()->end())
2586 DenseSet<Register> RewriteRegs;
2589 DenseMap<Register, Register> RedefMap;
2591 DenseMap<Register, DenseSet<MachineOperand *>>
ReplaceMap;
2593 DenseMap<Register, SmallPtrSet<MachineInstr *, 8>> ReachingDefCopyMap;
2596 DenseMap<unsigned, DenseMap<Register, SmallPtrSet<MachineOperand *, 8>>>
2601 SmallPtrSet<MachineInstr *, 16> RewriteCandsSet;
2602 DenseSet<Register> RewriteSrc2Regs;
2603 for (
auto &[
MI, OriginalOpcode] : RewriteCands) {
2605 MachineOperand *Src2 =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src2);
2606 if (Src2 && Src2->
isReg())
2610 for (
auto &[
MI, OriginalOpcode] : RewriteCands) {
2612 if (ReplacementOp == -1)
2614 MI->setDesc(
TII->get(ReplacementOp));
2617 MachineOperand *Src2 =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src2);
2618 if (Src2->
isReg()) {
2625 findReachingDefs(*Src2,
DAG.LIS, Src2ReachingDefs);
2626 SmallSetVector<MachineInstr *, 8> Src2DefsReplace;
2630 bool Src2NeedsVGPR = Src2NeedsVGPRCache.lookup(
MI);
2632 for (SlotIndex RDIndex : Src2ReachingDefs) {
2633 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2634 if (!Src2NeedsVGPR &&
2638 Src2DefsReplace.
insert(RD);
2641 if (!Src2DefsReplace.
empty()) {
2642 auto RI = RedefMap.
find(Src2Reg);
2643 if (RI != RedefMap.
end()) {
2644 MappedReg = RI->second;
2647 const TargetRegisterClass *Src2RC =
DAG.MRI.getRegClass(Src2Reg);
2648 const TargetRegisterClass *VGPRRC =
2649 SRI->getEquivalentVGPRClass(Src2RC);
2652 MappedReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2653 RedefMap[Src2Reg] = MappedReg;
2658 for (MachineInstr *RD : Src2DefsReplace) {
2660 if (ReachingDefCopyMap[Src2Reg].insert(RD).second) {
2661 MachineInstrBuilder VGPRCopy =
2664 .
addDef(MappedReg, {}, 0)
2665 .addUse(Src2Reg, {}, 0);
2666 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2671 unsigned UpdateRegion = LastMIToRegion[RD];
2672 DAG.Regions[UpdateRegion].second = VGPRCopy;
2673 LastMIToRegion.
erase(RD);
2680 RewriteRegs.
insert(Src2Reg);
2690 MachineOperand *Dst = &
MI->getOperand(0);
2699 SmallVector<MachineInstr *, 8> DstUseDefsReplace;
2701 findReachingUses(
MI,
DAG.LIS, DstReachingUses);
2703 for (MachineOperand *RUOp : DstReachingUses) {
2704 MachineInstr *UserMI = RUOp->
getParent();
2706 if (
TII->isMAI(*UserMI) && RewriteCandsSet.
contains(UserMI))
2710 if (
find(DstReachingUseCopies, RUOp) == DstReachingUseCopies.
end())
2714 if (
TII->isMAI(*UserMI))
2718 findReachingDefs(*RUOp,
DAG.LIS, DstUsesReachingDefs);
2720 for (SlotIndex RDIndex : DstUsesReachingDefs) {
2721 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2722 if (
TII->isMAI(*RD))
2727 if (
find(DstUseDefsReplace, RD) == DstUseDefsReplace.
end())
2732 if (!DstUseDefsReplace.
empty()) {
2733 auto RI = RedefMap.
find(DstReg);
2734 if (RI != RedefMap.
end()) {
2735 MappedReg = RI->second;
2738 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(DstReg);
2739 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2742 MappedReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2743 RedefMap[DstReg] = MappedReg;
2748 for (MachineInstr *RD : DstUseDefsReplace) {
2750 if (ReachingDefCopyMap[DstReg].insert(RD).second) {
2751 MachineInstrBuilder VGPRCopy =
2754 .
addDef(MappedReg, {}, 0)
2755 .addUse(DstReg, {}, 0);
2756 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2760 auto LMI = LastMIToRegion.
find(RD);
2761 if (LMI != LastMIToRegion.
end()) {
2762 unsigned UpdateRegion = LMI->second;
2763 DAG.Regions[UpdateRegion].second = VGPRCopy;
2764 LastMIToRegion.
erase(RD);
2770 DenseSet<MachineOperand *> &DstRegSet =
ReplaceMap[DstReg];
2771 for (MachineOperand *RU : DstReachingUseCopies) {
2772 MachineBasicBlock *RUBlock = RU->getParent()->getParent();
2775 if (RUBlock !=
MI->getParent()) {
2782 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(DstReg);
2783 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2784 Register NewUseReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2785 MachineInstr *UseInst = RU->getParent();
2786 MachineInstrBuilder VGPRCopy =
2789 .
addDef(NewUseReg, {}, 0)
2790 .addUse(DstReg, {}, 0);
2791 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2794 RU->setReg(NewUseReg);
2800 RewriteRegs.
insert(DstReg);
2810 std::pair<unsigned, DenseMap<Register, SmallPtrSet<MachineOperand *, 8>>>;
2811 for (RUBType RUBlockEntry : ReachingUseTracker) {
2812 using RUDType = std::pair<Register, SmallPtrSet<MachineOperand *, 8>>;
2813 for (RUDType RUDst : RUBlockEntry.second) {
2814 MachineOperand *OpBegin = *RUDst.second.begin();
2815 SlotIndex InstPt =
DAG.LIS->getInstructionIndex(*OpBegin->
getParent());
2818 for (MachineOperand *User : RUDst.second) {
2819 SlotIndex NewInstPt =
DAG.LIS->getInstructionIndex(*
User->getParent());
2824 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(RUDst.first);
2825 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2826 Register NewUseReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2827 MachineInstr *UseInst =
DAG.LIS->getInstructionFromIndex(InstPt);
2829 MachineInstrBuilder VGPRCopy =
2832 .
addDef(NewUseReg, {}, 0)
2833 .addUse(RUDst.first, {}, 0);
2834 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2838 auto FI = FirstMIToRegion.
find(UseInst);
2839 if (FI != FirstMIToRegion.
end()) {
2840 unsigned UpdateRegion = FI->second;
2841 DAG.Regions[UpdateRegion].first = VGPRCopy;
2842 FirstMIToRegion.
erase(UseInst);
2846 for (MachineOperand *User : RUDst.second) {
2847 User->setReg(NewUseReg);
2858 for (std::pair<Register, Register> NewDef : RedefMap) {
2863 for (MachineOperand *ReplaceOp :
ReplaceMap[OldReg])
2864 ReplaceOp->setReg(NewReg);
2868 for (
Register RewriteReg : RewriteRegs) {
2869 Register RegToRewrite = RewriteReg;
2872 auto RI = RedefMap.find(RewriteReg);
2873 if (RI != RedefMap.end())
2874 RegToRewrite = RI->second;
2876 const TargetRegisterClass *CurrRC =
DAG.MRI.getRegClass(RegToRewrite);
2877 const TargetRegisterClass *AGPRRC = SRI->getEquivalentAGPRClass(CurrRC);
2879 DAG.MRI.setRegClass(RegToRewrite, AGPRRC);
2883 DAG.LIS->reanalyze(
DAG.MF);
2885 RegionPressureMap LiveInUpdater(&
DAG,
false);
2886 LiveInUpdater.buildLiveRegMap();
2889 DAG.LiveIns[Region] = LiveInUpdater.getLiveRegsForRegionIdx(Region);
2896unsigned PreRARematStage::getStageTargetOccupancy()
const {
2897 return TargetOcc ? *TargetOcc :
MFI.getMinWavesPerEU();
2900bool PreRARematStage::setObjective() {
2904 unsigned MaxSGPRs =
ST.getMaxNumSGPRs(
F);
2905 unsigned MaxVGPRs =
ST.getMaxNumVGPRs(
F);
2906 bool HasVectorRegisterExcess =
false;
2907 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
2908 const GCNRegPressure &
RP =
DAG.Pressure[
I];
2909 GCNRPTarget &
Target = RPTargets.emplace_back(MaxSGPRs, MaxVGPRs,
MF, RP);
2911 TargetRegions.set(
I);
2912 HasVectorRegisterExcess |=
Target.hasVectorRegisterExcess();
2915 if (HasVectorRegisterExcess ||
DAG.MinOccupancy >=
MFI.getMaxWavesPerEU()) {
2918 TargetOcc = std::nullopt;
2922 TargetOcc =
DAG.MinOccupancy + 1;
2923 const unsigned VGPRBlockSize =
MFI.getDynamicVGPRBlockSize();
2924 MaxSGPRs =
ST.getMaxNumSGPRs(*TargetOcc,
false);
2925 MaxVGPRs =
ST.getMaxNumVGPRs(*TargetOcc, VGPRBlockSize);
2926 for (
auto [
I, Target] :
enumerate(RPTargets)) {
2927 Target.setTarget(MaxSGPRs, MaxVGPRs);
2929 TargetRegions.set(
I);
2933 return TargetRegions.any();
2936bool PreRARematStage::ScoredRemat::maybeBeneficial(
2938 for (
unsigned I : TargetRegions.set_bits()) {
2939 if (Live[
I] && RPTargets[
I].isSaveBeneficial(RPSave))
2947 assert(
DAG.MLI &&
"MLI not defined in DAG");
2951 const unsigned NumRegions =
DAG.Regions.size();
2955 for (
unsigned I = 0;
I < NumRegions; ++
I) {
2959 if (BlockFreq && BlockFreq <
MinFreq)
2968 if (
MinFreq >= ScaleFactor * ScaleFactor) {
2970 Freq /= ScaleFactor;
2976void PreRARematStage::ScoredRemat::init(RegisterIdx RegIdx,
2980 this->RegIdx = RegIdx;
2981 const unsigned NumRegions =
DAG.Regions.size();
2982 LiveIn.resize(NumRegions);
2983 LiveOut.resize(NumRegions);
2984 Live.resize(NumRegions);
2985 UnpredictableRPSave.resize(NumRegions);
2989 assert(Reg.Uses.size() == 1 &&
"expected users in single region");
2990 const unsigned UseRegion = Reg.Uses.begin()->first;
2993 for (
unsigned I = 0, E = NumRegions;
I != E; ++
I) {
2994 if (
DAG.LiveIns[
I].contains(DefReg))
2996 if (
DAG.RegionLiveOuts.getLiveRegsForRegionIdx(
I).contains(DefReg))
3001 if (!LiveIn[
I] || !LiveOut[
I] ||
I == UseRegion)
3002 UnpredictableRPSave.set(
I);
3011 int64_t DefOrMin = std::max(Freq.
Regions[Reg.DefRegion], Freq.
MinFreq);
3012 int64_t UseOrMax = Freq.
Regions[UseRegion];
3015 FreqDiff = DefOrMin - UseOrMax;
3018void PreRARematStage::ScoredRemat::update(
const BitVector &TargetRegions,
3020 const FreqInfo &FreqInfo,
3024 for (
unsigned I : TargetRegions.
set_bits()) {
3033 if (!NumRegsBenefit)
3037 RegionImpact += (UnpredictableRPSave[
I] ? 1 : 2) * NumRegsBenefit;
3041 if (UnpredictableRPSave[
I]) {
3046 MaxFreq = std::max(MaxFreq, Freq);
3051void PreRARematStage::ScoredRemat::rematerialize(
3052 Rematerializer &Remater)
const {
3053 const Rematerializer::Reg &
Reg = Remater.getReg(RegIdx);
3054 Rematerializer::DependencyReuseInfo DRI;
3055 for (
const Rematerializer::Reg::Dependency &Dep :
Reg.Dependencies)
3057 unsigned UseRegion =
Reg.Uses.begin()->first;
3058 Remater.rematerializeToRegion(RegIdx, UseRegion, DRI);
3061void PreRARematStage::updateRPTargets(
const BitVector &Regions,
3062 const GCNRegPressure &RPSave) {
3064 RPTargets[
I].saveRP(RPSave);
3065 if (TargetRegions[
I] && RPTargets[
I].satisfied()) {
3067 TargetRegions.reset(
I);
3072bool PreRARematStage::updateAndVerifyRPTargets(
const BitVector &Regions) {
3073 bool TooOptimistic =
false;
3075 GCNRPTarget &
Target = RPTargets[
I];
3081 if (!TargetRegions[
I] && !
Target.satisfied()) {
3083 TooOptimistic =
true;
3084 TargetRegions.set(
I);
3087 return TooOptimistic;
3090void PreRARematStage::removeFromLiveMaps(
Register Reg,
const BitVector &LiveIn,
3091 const BitVector &LiveOut) {
3093 LiveOut.
size() ==
DAG.Regions.size() &&
"region num mismatch");
3097 DAG.RegionLiveOuts.getLiveRegsForRegionIdx(
I).erase(
Reg);
3100void PreRARematStage::addToLiveMaps(
Register Reg, LaneBitmask Mask,
3101 const BitVector &LiveIn,
3102 const BitVector &LiveOut) {
3104 LiveOut.
size() ==
DAG.Regions.size() &&
"region num mismatch");
3105 std::pair<Register, LaneBitmask> LiveReg(
Reg, Mask);
3107 DAG.LiveIns[
I].insert(LiveReg);
3109 DAG.RegionLiveOuts.getLiveRegsForRegionIdx(
I).insert(LiveReg);
3121 if (
DAG.MinOccupancy >= *TargetOcc)
3125 for (
const auto &[
RegionIdx, OrigMIOrder, MaxPressure] : RegionReverts) {
3135 if (AchievedOcc >= *TargetOcc) {
3136 DAG.setTargetOccupancy(AchievedOcc);
3141 DAG.setTargetOccupancy(*TargetOcc - 1);
3146 assert(Rollback &&
"rollbacker should be defined");
3147 Rollback->Listener.rollback(Remater);
3148 for (
const auto &[RegIdx, LiveIn, LiveOut] : Rollback->LiveMapUpdates) {
3149 const Rematerializer::Reg &
Reg = Remater.getReg(RegIdx);
3150 addToLiveMaps(
Reg.getDefReg(),
Reg.Mask, LiveIn, LiveOut);
3153#ifdef EXPENSIVE_CHECKS
3158 for (
unsigned I : RescheduleRegions.set_bits())
3159 DAG.Pressure[
I] =
DAG.getRealRegPressure(
I);
3164void GCNScheduleDAGMILive::setTargetOccupancy(
unsigned TargetOccupancy) {
3165 MinOccupancy = TargetOccupancy;
3166 if (
MFI.getOccupancy() < TargetOccupancy)
3167 MFI.increaseOccupancy(
MF, MinOccupancy);
3169 MFI.limitOccupancy(MinOccupancy);
3186 if (HasIGLPInstrs) {
3187 SavedMutations.clear();
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static SUnit * pickOnlyChoice(SchedBoundary &Zone)
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
static cl::opt< bool > GCNTrackers("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false))
static cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))
#define REMAT_PREFIX
Allows to easily filter for this stage's debug output.
static MachineInstr * getLastMIForRegion(MachineBasicBlock::iterator RegionBegin, MachineBasicBlock::iterator RegionEnd)
static bool shouldCheckPending(SchedBoundary &Zone, const TargetSchedModel *SchedModel)
static cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))
static cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))
static void printScheduleModel(std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)
static bool isReachingDefAGPRForm(MachineInstr *RD, const SmallPtrSetImpl< MachineInstr * > &RewriteSet, const DenseSet< Register > &CandSrc2Regs, const SIInstrInfo &TII)
Returns true if reaching def RD will be in AGPR form after the rewrite and so needs no bridge copy: a...
static cl::opt< bool > PrintMaxRPRegUsageAfterScheduler("amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure after scheduling."), cl::init(false))
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG)
static cl::opt< bool > DisableRewriteMFMAFormSchedStage("amdgpu-disable-rewrite-mfma-form-sched-stage", cl::Hidden, cl::desc("Disable rewrite mfma rewrite scheduling stage"), cl::init(true))
static bool canUsePressureDiffs(const SUnit &SU)
Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.
static cl::opt< unsigned > PendingQueueLimit("amdgpu-scheduler-pending-queue-limit", cl::Hidden, cl::desc("Max (Available+Pending) size to inspect pending queue (0 disables)"), cl::init(256))
static cl::opt< bool > PrintMaxRPRegUsageBeforeScheduler("amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure before scheduling."), cl::init(false))
static cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
static constexpr std::pair< StringLiteral, StringLiteral > ReplaceMap[]
A common definition of LaneBitmask for use in TableGen and CodeGen.
Promote Memory to Register
static constexpr unsigned SM(unsigned Version)
MIR-level target-independent rematerialization helpers.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
Get the first element.
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
BitVector & reset()
Reset all bits in the bitvector.
iterator_range< const_set_bits_iterator > set_bits() const
size_type size() const
Returns the number of bits in this bitvector.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
bool initGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
bool reset(const MachineInstr &MI, MachineBasicBlock::const_iterator End, const LiveRegSet *LiveRegs=nullptr)
Reset tracker to the point before the MI filling LiveRegs upon this point using LIS.
GCNRegPressure bumpDownwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI) const
Mostly copy/paste from CodeGen/RegisterPressure.cpp Calculate the impact MI will have on CurPressure ...
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
Models a register pressure target, allowing to evaluate and track register savings against that targe...
unsigned getNumRegsBenefit(const GCNRegPressure &SaveRP) const
Returns the benefit towards achieving the RP target that saving SaveRP represents,...
GCNRegPressure getPressure() const
virtual bool initGCNRegion()
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
void modifyRegionSchedule(unsigned RegionIdx, ArrayRef< MachineInstr * > MIOrder)
Sets the schedule of region RegionIdx to MIOrder.
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
virtual void finalizeGCNRegion()
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
MachineBasicBlock * CurrentMBB
This is a minimal scheduler strategy.
GCNDownwardRPTracker DownwardTracker
bool useGCNTrackers() const
void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
unsigned SGPRCriticalLimit
std::vector< unsigned > MaxPressure
bool hasNextStage() const
SUnit * pickNodeBidirectional(bool &IsTopNode, bool &PickedPending)
GCNSchedStageID getCurrentStage()
bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Evaluates instructions in the pending queue using a subset of scheduling heuristics.
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
unsigned VGPRCriticalLimit
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
std::optional< bool > GCNTrackersOverride
GCNDownwardRPTracker * getDownwardTracker()
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
GCNUpwardRPTracker UpwardTracker
void printCandidateDecision(const SchedCandidate &Current, const SchedCandidate &Preferred)
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool &IsPending, bool IsBottomUp)
unsigned getStructuralStallCycles(SchedBoundary &Zone, SUnit *SU) const
Estimate how many cycles SU must wait due to structural hazards at the current boundary cycle.
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
GCNUpwardRPTracker * getUpwardTracker()
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
void recede(const MachineInstr &MI)
Move to the state of RP just before the MI .
void reset(const MachineInstr &MI)
Resets tracker to the point just after MI (in program order), which can be a debug instruction.
void traceCandidate(const SchedCandidate &Cand)
LLVM_ABI void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
MachineSchedPolicy RegionPolicy
const TargetSchedModel * SchedModel
const MachineSchedContext * Context
const TargetRegisterInfo * TRI
SchedCandidate BotCand
Candidate last picked from Bot boundary.
SchedCandidate TopCand
Candidate last picked from Top boundary.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
GenericScheduler(const MachineSchedContext *C)
bool shouldRevertScheduling(unsigned WavesAfter) override
A live range for subregisters.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
LLVM_ABI void dump() const
MachineBasicBlock * getMBBFromIndex(SlotIndex index) const
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
succ_iterator succ_begin()
unsigned succ_size() const
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
LLVM_ABI BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
LLVM_ABI BlockFrequency getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
filtered_mop_range all_uses()
Returns an iterator range over all operands that are (explicit or implicit) register uses.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
void finalizeGCNRegion() override
bool initGCNRegion() override
bool initGCNSchedStage() override
Capture a change in pressure for a single pressure set.
Simple wrapper around std::function<void(raw_ostream&)>.
Helpers for implementing custom MachineSchedStrategy classes.
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void advance()
Advance across the current instruction.
LLVM_ABI void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
LLVM_ABI void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
List of registers defined and used by a machine instruction.
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
LLVM_ABI void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
LLVM_ABI void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
MIR-level target-independent rematerializer.
bool isIGLPMutationOnly(unsigned Opcode) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getOccupancy() const
unsigned getDynamicVGPRBlockSize() const
unsigned getMinAllowedOccupancy() const
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
unsigned NodeNum
Entry # of node in the node vector.
unsigned short Latency
Node latency.
bool isScheduled
True once scheduled.
unsigned ParentClusterIdx
The parent cluster id.
unsigned BotReadyCycle
Cycle relative to end when node is ready.
bool hasReservedResource
Uses a reserved resource.
bool isBottomReady() const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
LLVM_ABI void releasePending()
Release pending ready nodes in to the available queue.
LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
LLVM_ABI SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
ScheduleHazardRecognizer * HazardRec
LLVM_ABI void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LLVM_ABI bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
LLVM_ABI std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource can be scheduled.
A ScheduleDAG for scheduling lists of MachineInstr.
bool ScheduleSingleMIRegions
True if regions with a single MI should be scheduled.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
const MachineLoopInfo * MLI
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
RegPressureTracker RPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
MachineFunction & MF
Machine function.
static const unsigned ScaleFactor
unsigned getMetric() const
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
SlotIndex - An opaque wrapper around machine indexes.
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
static bool isEarlierInstr(SlotIndex A, SlotIndex B)
isEarlierInstr - Return true if A refers to an instruction earlier than B.
SlotIndex getPrevSlot() const
Returns the previous slot in the index list.
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
uint8_t getCopyCost() const
Return the cost of copying a value between two registers in this class.
Provide an instruction scheduling machine model to CodeGen passes.
LLVM_ABI bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
bool initGCNSchedStage() override
bool initGCNRegion() override
void finalizeGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
VNInfo - Value Number Information.
SlotIndex def
The index of the defining instruction.
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
static LLVM_ABI bool allUsesAvailableAt(const MachineInstr *MI, SlotIndex UseIdx, const LiveIntervals &LIS, const MachineRegisterInfo &MRI, const TargetInstrInfo &TII)
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getAddressableNumVGPRs(const MCSubtargetInfo &STI, unsigned DynamicVGPRBlockSize)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo &STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getVGPRAllocGranule(const MCSubtargetInfo &STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
LLVM_READONLY int32_t getMFMASrcCVDstAGPROp(uint32_t Opcode)
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop, bool BiasPRegsExtra=false)
Minimize physical register live ranges.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)
Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > RegionBoundaries
A region's boundaries i.e.
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
@ UnclusteredHighRPReschedule
@ MemoryClauseInitialSchedule
@ ClusteredLowOccupancyReschedule
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
LLVM_ABI cl::opt< bool > VerifyScheduling
LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool isTheSameCluster(unsigned A, unsigned B)
Return whether the input cluster ID's are the same and valid.
DWARFExpression::Operation Op
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
LLVM_ABI void dumpMaxRegPressure(MachineFunction &MF, GCNRegPressure::RegKind Kind, LiveIntervals &LIS, const MachineLoopInfo *MLI)
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Implement std::hash so that hash_code can be used in STL containers.
bool operator()(std::pair< MachineInstr *, unsigned > A, std::pair< MachineInstr *, unsigned > B) const
unsigned getArchVGPRNum() const
unsigned getAGPRNum() const
unsigned getSGPRNum() const
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void setBest(SchedCandidate &Best)
void reset(const CandPolicy &NewPolicy)
LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SchedResourceDelta ResDelta
Status of an instruction's critical resource consumption.
unsigned DemandedResources
constexpr bool any() const
static constexpr LaneBitmask getNone()
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Execution frequency information required by scoring heuristics.
SmallVector< uint64_t > Regions
Per-region execution frequencies. 0 when unknown.
uint64_t MinFreq
Minimum and maximum observed frequencies.
FreqInfo(MachineFunction &MF, const GCNScheduleDAGMILive &DAG)
PressureChange CriticalMax
PressureChange CurrentMax
DependencyReuseInfo & reuse(RegisterIdx DepIdx)
RegisterIdx RegIdx
The corresponding register's index in the rematerializer.
A rematerializable register defined by a single machine instruction.
MachineInstr * DefMI
Single MI defining the rematerializable register.
SmallDenseMap< unsigned, RegionUsers, 2 > Uses
Uses of the register, mapped by region.
Register getDefReg() const
Returns the rematerializable register from its defining instruction.