46#define DEBUG_TYPE "machine-scheduler"
51 "amdgpu-disable-unclustered-high-rp-reschedule",
cl::Hidden,
52 cl::desc(
"Disable unclustered high register pressure "
53 "reduction scheduling stage."),
57 "amdgpu-disable-clustered-low-occupancy-reschedule",
cl::Hidden,
58 cl::desc(
"Disable clustered low occupancy "
59 "rescheduling for ILP scheduling stage."),
65 "Sets the bias which adds weight to occupancy vs latency. Set it to "
66 "100 to chase the occupancy only."),
71 cl::desc(
"Relax occupancy targets for kernels which are memory "
72 "bound (amdgpu-membound-threshold), or "
73 "Wave Limited (amdgpu-limit-wave-threshold)."),
78 cl::desc(
"Use the AMDGPU specific RPTrackers during scheduling"),
82 "amdgpu-scheduler-pending-queue-limit",
cl::Hidden,
84 "Max (Available+Pending) size to inspect pending queue (0 disables)"),
87#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
88#define DUMP_MAX_REG_PRESSURE
90 "amdgpu-print-max-reg-pressure-regusage-before-scheduler",
cl::Hidden,
91 cl::desc(
"Print a list of live registers along with their def/uses at the "
92 "point of maximum register pressure before scheduling."),
96 "amdgpu-print-max-reg-pressure-regusage-after-scheduler",
cl::Hidden,
97 cl::desc(
"Print a list of live registers along with their def/uses at the "
98 "point of maximum register pressure after scheduling."),
103 "amdgpu-disable-rewrite-mfma-form-sched-stage",
cl::Hidden,
123 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
125 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
147 "VGPRCriticalLimit calculation method.\n");
151 unsigned Addressable =
154 VGPRBudget = std::max(VGPRBudget, Granule);
192 if (!
Op.isReg() ||
Op.isImplicit())
194 if (
Op.getReg().isPhysical() ||
195 (
Op.isDef() &&
Op.getSubReg() != AMDGPU::NoSubRegister))
230 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
239 unsigned SGPRPressure,
240 unsigned VGPRPressure,
bool IsBottomUp) {
244 if (!
DAG->isTrackingPressure())
267 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
268 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
270 for (
const auto &Diff :
DAG->getPressureDiff(SU)) {
276 (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());
279#ifdef EXPENSIVE_CHECKS
280 std::vector<unsigned> CheckPressure, CheckMaxPressure;
283 if (
Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
284 CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
285 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
286 CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
287 errs() <<
"Register Pressure is inaccurate when calculated through "
289 <<
"SGPR got " <<
Pressure[AMDGPU::RegisterPressureSets::SReg_32]
291 << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
"\n"
292 <<
"VGPR got " <<
Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
294 << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
"\n";
300 unsigned NewSGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
301 unsigned NewVGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
311 const unsigned MaxVGPRPressureInc = 16;
312 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >=
VGPRExcessLimit;
313 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >=
SGPRExcessLimit;
344 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
346 if (SGPRDelta > VGPRDelta) {
360 bool HasBufferedModel =
379 dbgs() <<
"Prefer:\t\t";
380 DAG->dumpNode(*Preferred.
SU);
384 DAG->dumpNode(*Current.
SU);
387 dbgs() <<
"Reason:\t\t";
401 unsigned SGPRPressure = 0;
402 unsigned VGPRPressure = 0;
404 if (
DAG->isTrackingPressure()) {
406 SGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
407 VGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
412 SGPRPressure =
T->getPressure().getSGPRNum();
413 VGPRPressure =
T->getPressure().getArchVGPRNum();
418 for (
SUnit *SU : AQ) {
422 VGPRPressure, IsBottomUp);
442 for (
SUnit *SU : PQ) {
446 VGPRPressure, IsBottomUp);
466 bool &PickedPending) {
486 bool BotPending =
false;
506 "Last pick result should correspond to re-picking right now");
511 bool TopPending =
false;
531 "Last pick result should correspond to re-picking right now");
541 PickedPending = BotPending && TopPending;
544 if (BotPending || TopPending) {
551 Cand.setBest(TryCand);
556 IsTopNode = Cand.AtTop;
563 if (
DAG->top() ==
DAG->bottom()) {
565 Bot.Available.empty() &&
Bot.Pending.empty() &&
"ReadyQ garbage");
571 PickedPending =
false;
605 if (ReadyCycle > CurrentCycle)
677 if (
DAG->isTrackingPressure() &&
683 if (
DAG->isTrackingPressure() &&
688 bool SameBoundary = Zone !=
nullptr;
712 if (IsLegacyScheduler)
731 if (
DAG->isTrackingPressure() &&
741 bool SameBoundary = Zone !=
nullptr;
776 bool CandIsClusterSucc =
778 bool TryCandIsClusterSucc =
780 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
785 if (
DAG->isTrackingPressure() &&
791 if (
DAG->isTrackingPressure() &&
837 if (
DAG->isTrackingPressure()) {
853 bool CandIsClusterSucc =
855 bool TryCandIsClusterSucc =
857 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
866 bool SameBoundary = Zone !=
nullptr;
883 if (TryMayLoad || CandMayLoad) {
884 bool TryLongLatency =
886 bool CandLongLatency =
890 Zone->
isTop() ? CandLongLatency : TryLongLatency, TryCand,
908 if (
DAG->isTrackingPressure() &&
927 !
Rem.IsAcyclicLatencyLimited &&
tryLatency(TryCand, Cand, *Zone))
945 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy),
946 RegionLiveOuts(this,
true) {
952 LLVM_DEBUG(
dbgs() <<
"Starting occupancy is " << StartingOccupancy <<
".\n");
954 MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy);
955 if (MinOccupancy != StartingOccupancy)
956 LLVM_DEBUG(
dbgs() <<
"Allowing Occupancy drops to " << MinOccupancy
961std::unique_ptr<GCNSchedStage>
963 switch (SchedStageID) {
965 return std::make_unique<OccInitialScheduleStage>(SchedStageID, *
this);
967 return std::make_unique<RewriteMFMAFormStage>(SchedStageID, *
this);
969 return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *
this);
971 return std::make_unique<ClusteredLowOccStage>(SchedStageID, *
this);
973 return std::make_unique<PreRARematStage>(SchedStageID, *
this);
975 return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *
this);
977 return std::make_unique<MemoryClauseInitialScheduleStage>(SchedStageID,
991GCNScheduleDAGMILive::getRealRegPressure(
unsigned RegionIdx)
const {
992 if (Regions[RegionIdx].first == Regions[RegionIdx].second)
996 &LiveIns[RegionIdx]);
1002 assert(RegionBegin != RegionEnd &&
"Region must not be empty");
1006void GCNScheduleDAGMILive::computeBlockPressure(
unsigned RegionIdx,
1018 const MachineBasicBlock *OnlySucc =
nullptr;
1021 if (!Candidate->empty() && Candidate->pred_size() == 1) {
1022 SlotIndexes *Ind =
LIS->getSlotIndexes();
1024 OnlySucc = Candidate;
1029 size_t CurRegion = RegionIdx;
1030 for (
size_t E = Regions.size(); CurRegion !=
E; ++CurRegion)
1031 if (Regions[CurRegion].first->getParent() !=
MBB)
1036 auto LiveInIt = MBBLiveIns.find(
MBB);
1037 auto &Rgn = Regions[CurRegion];
1039 if (LiveInIt != MBBLiveIns.end()) {
1040 auto LiveIn = std::move(LiveInIt->second);
1042 MBBLiveIns.erase(LiveInIt);
1045 auto LRS = BBLiveInMap.lookup(NonDbgMI);
1046#ifdef EXPENSIVE_CHECKS
1055 if (Regions[CurRegion].first ==
I || NonDbgMI ==
I) {
1056 LiveIns[CurRegion] =
RPTracker.getLiveRegs();
1060 if (Regions[CurRegion].second ==
I) {
1061 Pressure[CurRegion] =
RPTracker.moveMaxPressure();
1062 if (CurRegion-- == RegionIdx)
1064 auto &Rgn = Regions[CurRegion];
1077 MBBLiveIns[OnlySucc] =
RPTracker.moveLiveRegs();
1082GCNScheduleDAGMILive::getRegionLiveInMap()
const {
1083 assert(!Regions.empty());
1084 std::vector<MachineInstr *> RegionFirstMIs;
1085 RegionFirstMIs.reserve(Regions.size());
1087 RegionFirstMIs.push_back(
1094GCNScheduleDAGMILive::getRegionLiveOutMap()
const {
1095 assert(!Regions.empty());
1096 std::vector<MachineInstr *> RegionLastMIs;
1097 RegionLastMIs.reserve(Regions.size());
1108 IdxToInstruction.clear();
1111 IsLiveOut ? DAG->getRegionLiveOutMap() : DAG->getRegionLiveInMap();
1112 for (
unsigned I = 0;
I < DAG->Regions.size();
I++) {
1113 auto &[RegionBegin, RegionEnd] = DAG->Regions[
I];
1115 if (RegionBegin == RegionEnd)
1119 IdxToInstruction[
I] = RegionKey;
1127 LiveIns.resize(Regions.size());
1128 Pressure.resize(Regions.size());
1129 RegionsWithHighRP.resize(Regions.size());
1130 RegionsWithExcessRP.resize(Regions.size());
1131 RegionsWithIGLPInstrs.resize(Regions.size());
1132 RegionsWithHighRP.reset();
1133 RegionsWithExcessRP.reset();
1134 RegionsWithIGLPInstrs.reset();
1139void GCNScheduleDAGMILive::runSchedStages() {
1140 LLVM_DEBUG(
dbgs() <<
"All regions recorded, starting actual scheduling.\n");
1143 if (!Regions.
empty()) {
1144 BBLiveInMap = getRegionLiveInMap();
1149#ifdef DUMP_MAX_REG_PRESSURE
1159 if (!Stage->initGCNSchedStage())
1162 for (
auto Region : Regions) {
1166 if (!Stage->initGCNRegion()) {
1167 Stage->advanceRegion();
1176 &LiveIns[Stage->getRegionIdx()];
1178 reinterpret_cast<GCNRPTracker *
>(DownwardTracker)
1179 ->reset(
MRI, *RegionLiveIns);
1180 reinterpret_cast<GCNRPTracker *
>(UpwardTracker)
1181 ->reset(
MRI, RegionLiveOuts.getLiveRegsForRegionIdx(
1182 Stage->getRegionIdx()));
1186 Stage->finalizeGCNRegion();
1187 Stage->advanceRegion();
1191 Stage->finalizeGCNSchedStage();
1194#ifdef DUMP_MAX_REG_PRESSURE
1207 OS <<
"Max Occupancy Initial Schedule";
1210 OS <<
"Instruction Rewriting Reschedule";
1213 OS <<
"Unclustered High Register Pressure Reschedule";
1216 OS <<
"Clustered Low Occupancy Reschedule";
1219 OS <<
"Pre-RA Rematerialize";
1222 OS <<
"Max ILP Initial Schedule";
1225 OS <<
"Max memory clause Initial Schedule";
1245void RewriteMFMAFormStage::findReachingDefs(
1259 SmallVector<MachineBasicBlock *, 8> Worklist;
1267 while (!Worklist.
empty()) {
1282 for (MachineBasicBlock *PredMBB : DefMBB->
predecessors()) {
1283 if (Visited.
insert(PredMBB).second)
1289void RewriteMFMAFormStage::findReachingUses(
1293 for (MachineOperand &UseMO :
1296 findReachingDefs(UseMO, LIS, ReachingDefIndexes);
1300 if (
any_of(ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) {
1312 if (!
ST.hasGFX90AInsts() ||
MFI.getMinWavesPerEU() > 1)
1315 RegionsWithExcessArchVGPR.resize(
DAG.Regions.size());
1316 RegionsWithExcessArchVGPR.reset();
1320 RegionsWithExcessArchVGPR[
Region] =
true;
1323 if (RegionsWithExcessArchVGPR.none())
1326 TII =
ST.getInstrInfo();
1327 SRI =
ST.getRegisterInfo();
1329 std::vector<std::pair<MachineInstr *, unsigned>> RewriteCands;
1333 if (!initHeuristics(RewriteCands, CopyForUse, CopyForDef))
1336 int64_t
Cost = getRewriteCost(RewriteCands, CopyForUse, CopyForDef);
1343 return rewrite(RewriteCands);
1353 if (
DAG.RegionsWithHighRP.none() &&
DAG.RegionsWithExcessRP.none())
1360 InitialOccupancy =
DAG.MinOccupancy;
1363 TempTargetOccupancy =
MFI.getMaxWavesPerEU() >
DAG.MinOccupancy
1364 ? InitialOccupancy + 1
1366 IsAnyRegionScheduled =
false;
1367 S.SGPRLimitBias =
S.HighRPSGPRBias;
1368 S.VGPRLimitBias =
S.HighRPVGPRBias;
1372 <<
"Retrying function scheduling without clustering. "
1373 "Aggressively try to reduce register pressure to achieve occupancy "
1374 << TempTargetOccupancy <<
".\n");
1389 if (
DAG.StartingOccupancy <=
DAG.MinOccupancy)
1393 dbgs() <<
"Retrying function scheduling with lowest recorded occupancy "
1394 <<
DAG.MinOccupancy <<
".\n");
1399#define REMAT_PREFIX "[PreRARemat] "
1400#define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)
1402#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1403Printable PreRARematStage::ScoredRemat::print()
const {
1405 OS <<
'(' << MaxFreq <<
", " << FreqDiff <<
", " << RegionImpact <<
')';
1426 const unsigned NumRegions =
DAG.Regions.size();
1427 for (
unsigned I = 0;
I < NumRegions; ++
I) {
1434 RegionBB.push_back(ParentMBB);
1438 auto PrintTargetRegions = [&]() ->
void {
1439 if (TargetRegions.none()) {
1444 for (
unsigned I : TargetRegions.set_bits())
1447 auto PrintRematReg = [&](
const RematReg &Remat) ->
Printable {
1451 bool HasLiveThroughRegion =
false;
1452 OS <<
'[' << Remat.DefRegion <<
" -";
1453 for (
unsigned I = 0;
I < NumRegions; ++
I) {
1454 if (Remat.isUnusedLiveThrough(
I)) {
1455 if (HasLiveThroughRegion) {
1459 HasLiveThroughRegion =
true;
1464 if (HasLiveThroughRegion)
1466 OS <<
"-> " << Remat.UseRegion <<
"] ";
1467 Remat.DefMI->print(OS,
true,
false,
1475 dbgs() <<
"Analyzing ";
1476 MF.getFunction().printAsOperand(
dbgs(),
false);
1479 if (!setObjective()) {
1480 LLVM_DEBUG(
dbgs() <<
"no objective to achieve, occupancy is maximal at "
1481 <<
MFI.getMaxWavesPerEU() <<
'\n');
1486 dbgs() <<
"increase occupancy from " << *TargetOcc - 1 <<
'\n';
1488 dbgs() <<
"reduce spilling (minimum target occupancy is "
1489 <<
MFI.getMinWavesPerEU() <<
")\n";
1491 PrintTargetRegions();
1494 if (!collectRematRegs(MIRegion)) {
1500 dbgs() <<
"Rematerializable registers:\n";
1501 for (
const RematReg &Remat : RematRegs)
1509 dbgs() <<
"unknown ";
1510 dbgs() <<
" | " << *
DAG.Regions[
I].first;
1515 for (RematReg &Remat : RematRegs)
1521 unsigned RoundNum = 0;
1525 assert(!ScoredRemats.empty() &&
"no more remat candidates");
1528 for (ScoredRemat &Remat : ScoredRemats)
1529 Remat.update(TargetRegions, RPTargets, FreqInfo, !TargetOcc);
1533 dbgs() <<
"==== ROUND " << RoundNum++ <<
" ====\n"
1535 <<
"Candidates with non-null score, in rematerialization order:\n";
1536 for (
const ScoredRemat &RematDecision :
reverse(ScoredRemats)) {
1537 if (RematDecision.hasNullScore())
1540 << *RematDecision.Remat->DefMI;
1542 PrintTargetRegions();
1545 RecomputeRP.
reset();
1546 unsigned RematIdx = ScoredRemats.
size();
1551 for (; RematIdx && TargetRegions.any(); --RematIdx) {
1552 const ScoredRemat &Candidate = ScoredRemats[RematIdx - 1];
1556 if (Candidate.hasNullScore()) {
1561 RematReg &Remat = *Candidate.Remat;
1568 if (!Remat.maybeBeneficial(TargetRegions, RPTargets))
1573 Candidate.rematerialize(RecomputeRP, RPTargets,
DAG);
1574 RescheduleRegions |= Remat.Live;
1583 RollbackInfo &Rollback = Rollbacks.emplace_back(&Remat);
1584 Rollback.RematMI = RematMI;
1590 Remat.DefMI->
setDesc(
DAG.TII->get(TargetOpcode::DBG_VALUE));
1592 if (MO.isReg() && MO.readsReg()) {
1593 Rollback.RegMap.insert({Idx, MO.getReg()});
1599 DAG.deleteMI(Remat.DefRegion, Remat.DefMI);
1602 unsetSatisfiedRPTargets(Remat.Live);
1606 if (!TargetRegions.any()) {
1607 dbgs() <<
"** Interrupt round on all targets achieved\n";
1608 }
else if (RematIdx) {
1609 dbgs() <<
"** Interrupt round on stale score for "
1610 << *ScoredRemats[RematIdx - 1].Remat->DefMI;
1612 dbgs() <<
"** Stop on exhausted rematerialization candidates\n";
1617 ScoredRemats.truncate(RematIdx);
1618 }
while ((updateAndVerifyRPTargets(RecomputeRP) || TargetRegions.any()) &&
1619 !ScoredRemats.empty());
1620 if (RescheduleRegions.none())
1626 unsigned DynamicVGPRBlockSize =
MFI.getDynamicVGPRBlockSize();
1627 for (
unsigned I : RescheduleRegions.set_bits()) {
1628 DAG.Pressure[
I] = RPTargets[
I].getCurrentRP();
1630 <<
DAG.Pressure[
I].getOccupancy(
ST, DynamicVGPRBlockSize)
1631 <<
" (" << RPTargets[
I] <<
")\n");
1633 AchievedOcc =
MFI.getMaxWavesPerEU();
1636 std::min(AchievedOcc, RP.getOccupancy(
ST, DynamicVGPRBlockSize));
1640 dbgs() <<
"Retrying function scheduling with new min. occupancy of "
1641 << AchievedOcc <<
" from rematerializing (original was "
1642 <<
DAG.MinOccupancy;
1644 dbgs() <<
", target was " << *TargetOcc;
1648 DAG.setTargetOccupancy(getStageTargetOccupancy());
1659 S.SGPRLimitBias =
S.VGPRLimitBias = 0;
1660 if (
DAG.MinOccupancy > InitialOccupancy) {
1661 assert(IsAnyRegionScheduled);
1663 <<
" stage successfully increased occupancy to "
1664 <<
DAG.MinOccupancy <<
'\n');
1665 }
else if (!IsAnyRegionScheduled) {
1666 assert(
DAG.MinOccupancy == InitialOccupancy);
1668 <<
": No regions scheduled, min occupancy stays at "
1669 <<
DAG.MinOccupancy <<
", MFI occupancy stays at "
1670 <<
MFI.getOccupancy() <<
".\n");
1678 if (
DAG.begin() ==
DAG.end())
1685 unsigned NumRegionInstrs = std::distance(
DAG.begin(),
DAG.end());
1689 if (
DAG.begin() == std::prev(
DAG.end()))
1695 <<
"\n From: " << *
DAG.begin() <<
" To: ";
1697 else dbgs() <<
"End";
1698 dbgs() <<
" RegionInstrs: " << NumRegionInstrs <<
'\n');
1706 for (
auto &
I :
DAG) {
1719 dbgs() <<
"Pressure before scheduling:\nRegion live-ins:"
1721 <<
"Region live-in pressure: "
1725 S.HasHighPressure =
false;
1747 unsigned DynamicVGPRBlockSize =
DAG.MFI.getDynamicVGPRBlockSize();
1750 unsigned CurrentTargetOccupancy =
1751 IsAnyRegionScheduled ?
DAG.MinOccupancy : TempTargetOccupancy;
1753 (CurrentTargetOccupancy <= InitialOccupancy ||
1754 DAG.Pressure[
RegionIdx].getOccupancy(
ST, DynamicVGPRBlockSize) !=
1761 if (!IsAnyRegionScheduled && IsSchedulingThisRegion) {
1762 IsAnyRegionScheduled =
true;
1763 if (
MFI.getMaxWavesPerEU() >
DAG.MinOccupancy)
1764 DAG.setTargetOccupancy(TempTargetOccupancy);
1766 return IsSchedulingThisRegion;
1801 if (
S.HasHighPressure)
1822 if (
DAG.MinOccupancy < *TargetOcc) {
1824 <<
" cannot meet occupancy target, interrupting "
1825 "re-scheduling in all regions\n");
1826 RescheduleRegions.reset();
1837 unsigned DynamicVGPRBlockSize =
DAG.MFI.getDynamicVGPRBlockSize();
1848 unsigned TargetOccupancy = std::min(
1849 S.getTargetOccupancy(),
ST.getOccupancyWithWorkGroupSizes(
MF).second);
1850 unsigned WavesAfter = std::min(
1851 TargetOccupancy,
PressureAfter.getOccupancy(
ST, DynamicVGPRBlockSize));
1852 unsigned WavesBefore = std::min(
1854 LLVM_DEBUG(
dbgs() <<
"Occupancy before scheduling: " << WavesBefore
1855 <<
", after " << WavesAfter <<
".\n");
1861 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
1865 if (WavesAfter < WavesBefore && WavesAfter <
DAG.MinOccupancy &&
1866 WavesAfter >=
MFI.getMinAllowedOccupancy()) {
1867 LLVM_DEBUG(
dbgs() <<
"Function is memory bound, allow occupancy drop up to "
1868 <<
MFI.getMinAllowedOccupancy() <<
" waves\n");
1869 NewOccupancy = WavesAfter;
1872 if (NewOccupancy <
DAG.MinOccupancy) {
1873 DAG.MinOccupancy = NewOccupancy;
1874 MFI.limitOccupancy(
DAG.MinOccupancy);
1876 <<
DAG.MinOccupancy <<
".\n");
1880 unsigned MaxVGPRs =
ST.getMaxNumVGPRs(
MF);
1883 unsigned MaxArchVGPRs = std::min(MaxVGPRs,
ST.getAddressableNumArchVGPRs());
1884 unsigned MaxSGPRs =
ST.getMaxNumSGPRs(
MF);
1908 unsigned ReadyCycle = CurrCycle;
1909 for (
auto &
D : SU.
Preds) {
1910 if (
D.isAssignedRegDep()) {
1913 unsigned DefReady = ReadyCycles[
DAG.getSUnit(
DefMI)->NodeNum];
1914 ReadyCycle = std::max(ReadyCycle, DefReady +
Latency);
1917 ReadyCycles[SU.
NodeNum] = ReadyCycle;
1924 std::pair<MachineInstr *, unsigned>
B)
const {
1925 return A.second <
B.second;
1931 if (ReadyCycles.empty())
1933 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
1934 dbgs() <<
"\n################## Schedule time ReadyCycles for MBB : " << BBNum
1935 <<
" ##################\n# Cycle #\t\t\tInstruction "
1939 for (
auto &
I : ReadyCycles) {
1940 if (
I.second > IPrev + 1)
1941 dbgs() <<
"****************************** BUBBLE OF " <<
I.second - IPrev
1942 <<
" CYCLES DETECTED ******************************\n\n";
1943 dbgs() <<
"[ " <<
I.second <<
" ] : " << *
I.first <<
"\n";
1956 unsigned SumBubbles = 0;
1958 unsigned CurrCycle = 0;
1959 for (
auto &SU : InputSchedule) {
1960 unsigned ReadyCycle =
1962 SumBubbles += ReadyCycle - CurrCycle;
1964 ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));
1966 CurrCycle = ++ReadyCycle;
1989 unsigned SumBubbles = 0;
1991 unsigned CurrCycle = 0;
1992 for (
auto &
MI :
DAG) {
1996 unsigned ReadyCycle =
1998 SumBubbles += ReadyCycle - CurrCycle;
2000 ReadyCyclesSorted.insert(std::make_pair(SU->
getInstr(), ReadyCycle));
2002 CurrCycle = ++ReadyCycle;
2019 if (WavesAfter <
DAG.MinOccupancy)
2023 if (
DAG.MFI.isDynamicVGPREnabled()) {
2025 &
ST,
DAG.MFI.getDynamicVGPRBlockSize(),
2028 &
ST,
DAG.MFI.getDynamicVGPRBlockSize(),
2030 if (BlocksAfter > BlocksBefore)
2067 <<
"\n\t *** In shouldRevertScheduling ***\n"
2068 <<
" *********** BEFORE UnclusteredHighRPStage ***********\n");
2072 <<
"\n *********** AFTER UnclusteredHighRPStage ***********\n");
2074 unsigned OldMetric = MBefore.
getMetric();
2075 unsigned NewMetric = MAfter.
getMetric();
2076 unsigned WavesBefore = std::min(
2077 S.getTargetOccupancy(),
2084 LLVM_DEBUG(
dbgs() <<
"\tMetric before " << MBefore <<
"\tMetric after "
2085 << MAfter <<
"Profit: " << Profit <<
"\n");
2116 unsigned WavesAfter) {
2123 LLVM_DEBUG(
dbgs() <<
"New pressure will result in more spilling.\n");
2136 "instruction number mismatch");
2137 if (MIOrder.
empty())
2149 if (MII != RegionEnd) {
2151 bool NonDebugReordered =
2152 !
MI->isDebugInstr() &&
2158 if (NonDebugReordered)
2159 DAG.LIS->handleMove(*
MI,
true);
2163 if (
MI->isDebugInstr()) {
2170 Op.setIsUndef(
false);
2173 if (
DAG.ShouldTrackLaneMasks) {
2175 SlotIndex SlotIdx =
DAG.LIS->getInstructionIndex(*MI).getRegSlot();
2190bool RewriteMFMAFormStage::isRewriteCandidate(
MachineInstr *
MI)
const {
2197bool RewriteMFMAFormStage::initHeuristics(
2198 std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
2206 if (!isRewriteCandidate(&
MI))
2210 assert(ReplacementOp != -1);
2212 RewriteCands.push_back({&
MI,
MI.getOpcode()});
2213 MI.setDesc(
TII->get(ReplacementOp));
2215 MachineOperand *Src2 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src2);
2216 if (Src2->
isReg()) {
2218 findReachingDefs(*Src2,
DAG.LIS, Src2ReachingDefs);
2222 for (SlotIndex RDIdx : Src2ReachingDefs) {
2223 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIdx);
2224 if (!
TII->isMAI(*RD))
2229 MachineOperand &Dst =
MI.getOperand(0);
2232 findReachingUses(&
MI,
DAG.LIS, DstReachingUses);
2234 for (MachineOperand *RUOp : DstReachingUses) {
2235 if (
TII->isMAI(*RUOp->getParent()))
2241 CopyForUse[RUOp->getParent()->getParent()].insert(RUOp->getReg());
2244 findReachingDefs(*RUOp,
DAG.LIS, DstUsesReachingDefs);
2246 for (SlotIndex RDIndex : DstUsesReachingDefs) {
2247 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2248 if (
TII->isMAI(*RD))
2260 const TargetRegisterClass *VDefRC =
DAG.MRI.getRegClass(Dst.getReg());
2261 const TargetRegisterClass *ADefRC = SRI->getEquivalentAGPRClass(VDefRC);
2262 DAG.MRI.setRegClass(Dst.getReg(), ADefRC);
2263 if (Src2->
isReg()) {
2267 const TargetRegisterClass *VUseRC =
DAG.MRI.getRegClass(Src2->
getReg());
2268 const TargetRegisterClass *AUseRC = SRI->getEquivalentAGPRClass(VUseRC);
2269 DAG.MRI.setRegClass(Src2->
getReg(), AUseRC);
2278int64_t RewriteMFMAFormStage::getRewriteCost(
2279 const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
2280 const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
2281 const SmallPtrSetImpl<MachineInstr *> &CopyForDef) {
2282 MachineBlockFrequencyInfo *MBFI =
DAG.MBFI;
2284 int64_t BestSpillCost = 0;
2288 std::pair<unsigned, unsigned> MaxVectorRegs =
2289 ST.getMaxNumVectorRegs(
MF.getFunction());
2290 unsigned ArchVGPRThreshold = MaxVectorRegs.first;
2291 unsigned AGPRThreshold = MaxVectorRegs.second;
2292 unsigned CombinedThreshold =
ST.getMaxNumVGPRs(
MF);
2295 if (!RegionsWithExcessArchVGPR[Region])
2300 MF, ArchVGPRThreshold, AGPRThreshold, CombinedThreshold);
2308 MF, ArchVGPRThreshold, AGPRThreshold, CombinedThreshold);
2310 uint64_t BlockFreq =
2314 bool RelativeFreqIsDenom = EntryFreq > BlockFreq;
2315 uint64_t RelativeFreq = EntryFreq && BlockFreq
2316 ? (RelativeFreqIsDenom ? EntryFreq / BlockFreq
2317 : BlockFreq / EntryFreq)
2322 int64_t SpillCost = ((int)SpillCostAfter - (int)SpillCostBefore) * 2;
2325 if (RelativeFreqIsDenom)
2326 SpillCost /= (int64_t)RelativeFreq;
2328 SpillCost *= (int64_t)RelativeFreq;
2334 if (SpillCost < BestSpillCost)
2335 BestSpillCost = SpillCost;
2340 Cost = BestSpillCost;
2343 unsigned CopyCost = 0;
2347 for (MachineInstr *
DefMI : CopyForDef) {
2354 const TargetRegisterClass *RC =
DAG.MRI.getRegClass(DefReg);
2359 for (
auto &[UseBlock, UseRegs] : CopyForUse) {
2364 const TargetRegisterClass *RC =
DAG.MRI.getRegClass(
UseReg);
2373 for (
auto &[
MI, OriginalOpcode] : RewriteCands) {
2375 const TargetRegisterClass *ADefRC =
2376 DAG.MRI.getRegClass(
MI->getOperand(0).getReg());
2377 const TargetRegisterClass *VDefRC = SRI->getEquivalentVGPRClass(ADefRC);
2378 DAG.MRI.setRegClass(
MI->getOperand(0).getReg(), VDefRC);
2379 MI->setDesc(
TII->get(OriginalOpcode));
2381 MachineOperand *Src2 =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src2);
2389 const TargetRegisterClass *AUseRC =
DAG.MRI.getRegClass(Src2->
getReg());
2390 const TargetRegisterClass *VUseRC = SRI->getEquivalentVGPRClass(AUseRC);
2391 DAG.MRI.setRegClass(Src2->
getReg(), VUseRC);
2394 return Cost + CopyCost;
2397bool RewriteMFMAFormStage::rewrite(
2398 const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands) {
2399 DenseMap<MachineInstr *, unsigned> FirstMIToRegion;
2400 DenseMap<MachineInstr *, unsigned> LastMIToRegion;
2408 if (
Entry.second !=
Entry.first->getParent()->end())
2451 DenseSet<Register> RewriteRegs;
2454 DenseMap<Register, Register> RedefMap;
2456 DenseMap<Register, DenseSet<MachineOperand *>>
ReplaceMap;
2458 DenseMap<Register, SmallPtrSet<MachineInstr *, 8>> ReachingDefCopyMap;
2461 DenseMap<unsigned, DenseMap<Register, SmallPtrSet<MachineOperand *, 8>>>
2464 for (
auto &[
MI, OriginalOpcode] : RewriteCands) {
2466 if (ReplacementOp == -1)
2468 MI->setDesc(
TII->get(ReplacementOp));
2471 MachineOperand *Src2 =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src2);
2472 if (Src2->
isReg()) {
2479 findReachingDefs(*Src2,
DAG.LIS, Src2ReachingDefs);
2480 SmallSetVector<MachineInstr *, 8> Src2DefsReplace;
2482 for (SlotIndex RDIndex : Src2ReachingDefs) {
2483 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2484 if (
TII->isMAI(*RD))
2488 Src2DefsReplace.
insert(RD);
2491 if (!Src2DefsReplace.
empty()) {
2493 if (RI != RedefMap.
end()) {
2494 MappedReg = RI->second;
2497 const TargetRegisterClass *Src2RC =
DAG.MRI.getRegClass(Src2Reg);
2498 const TargetRegisterClass *VGPRRC =
2499 SRI->getEquivalentVGPRClass(Src2RC);
2502 MappedReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2503 RedefMap[Src2Reg] = MappedReg;
2508 for (MachineInstr *RD : Src2DefsReplace) {
2510 if (ReachingDefCopyMap[Src2Reg].insert(RD).second) {
2511 MachineInstrBuilder VGPRCopy =
2514 .
addDef(MappedReg, {}, 0)
2515 .addUse(Src2Reg, {}, 0);
2516 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2521 unsigned UpdateRegion = LastMIToRegion[RD];
2522 DAG.Regions[UpdateRegion].second = VGPRCopy;
2523 LastMIToRegion.
erase(RD);
2530 RewriteRegs.
insert(Src2Reg);
2540 MachineOperand *Dst = &
MI->getOperand(0);
2549 SmallVector<MachineInstr *, 8> DstUseDefsReplace;
2551 findReachingUses(
MI,
DAG.LIS, DstReachingUses);
2553 for (MachineOperand *RUOp : DstReachingUses) {
2554 if (
TII->isMAI(*RUOp->getParent()))
2558 if (
find(DstReachingUseCopies, RUOp) == DstReachingUseCopies.
end())
2561 findReachingDefs(*RUOp,
DAG.LIS, DstUsesReachingDefs);
2563 for (SlotIndex RDIndex : DstUsesReachingDefs) {
2564 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2565 if (
TII->isMAI(*RD))
2570 if (
find(DstUseDefsReplace, RD) == DstUseDefsReplace.
end())
2575 if (!DstUseDefsReplace.
empty()) {
2577 if (RI != RedefMap.
end()) {
2578 MappedReg = RI->second;
2581 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(DstReg);
2582 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2585 MappedReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2586 RedefMap[DstReg] = MappedReg;
2591 for (MachineInstr *RD : DstUseDefsReplace) {
2593 if (ReachingDefCopyMap[DstReg].insert(RD).second) {
2594 MachineInstrBuilder VGPRCopy =
2597 .
addDef(MappedReg, {}, 0)
2598 .addUse(DstReg, {}, 0);
2599 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2604 LastMIToRegion.
find(RD);
2605 if (LMI != LastMIToRegion.
end()) {
2606 unsigned UpdateRegion = LMI->second;
2607 DAG.Regions[UpdateRegion].second = VGPRCopy;
2608 LastMIToRegion.
erase(RD);
2614 DenseSet<MachineOperand *> &DstRegSet =
ReplaceMap[DstReg];
2615 for (MachineOperand *RU : DstReachingUseCopies) {
2616 MachineBasicBlock *RUBlock = RU->getParent()->getParent();
2619 if (RUBlock !=
MI->getParent()) {
2626 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(DstReg);
2627 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2628 Register NewUseReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2629 MachineInstr *UseInst = RU->getParent();
2630 MachineInstrBuilder VGPRCopy =
2633 .
addDef(NewUseReg, {}, 0)
2634 .addUse(DstReg, {}, 0);
2635 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2638 RU->setReg(NewUseReg);
2644 RewriteRegs.
insert(DstReg);
2654 std::pair<unsigned, DenseMap<Register, SmallPtrSet<MachineOperand *, 8>>>;
2655 for (RUBType RUBlockEntry : ReachingUseTracker) {
2656 using RUDType = std::pair<Register, SmallPtrSet<MachineOperand *, 8>>;
2657 for (RUDType RUDst : RUBlockEntry.second) {
2658 MachineOperand *OpBegin = *RUDst.second.begin();
2659 SlotIndex InstPt =
DAG.LIS->getInstructionIndex(*OpBegin->
getParent());
2662 for (MachineOperand *User : RUDst.second) {
2663 SlotIndex NewInstPt =
DAG.LIS->getInstructionIndex(*
User->getParent());
2668 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(RUDst.first);
2669 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2670 Register NewUseReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2671 MachineInstr *UseInst =
DAG.LIS->getInstructionFromIndex(InstPt);
2673 MachineInstrBuilder VGPRCopy =
2676 .
addDef(NewUseReg, {}, 0)
2677 .addUse(RUDst.first, {}, 0);
2678 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2683 FirstMIToRegion.
find(UseInst);
2684 if (FI != FirstMIToRegion.
end()) {
2685 unsigned UpdateRegion = FI->second;
2686 DAG.Regions[UpdateRegion].first = VGPRCopy;
2687 FirstMIToRegion.
erase(UseInst);
2691 for (MachineOperand *User : RUDst.second) {
2692 User->setReg(NewUseReg);
2703 for (std::pair<Register, Register> NewDef : RedefMap) {
2708 for (MachineOperand *ReplaceOp :
ReplaceMap[OldReg])
2709 ReplaceOp->setReg(NewReg);
2713 for (
Register RewriteReg : RewriteRegs) {
2714 Register RegToRewrite = RewriteReg;
2718 if (RI != RedefMap.end())
2719 RegToRewrite = RI->second;
2721 const TargetRegisterClass *CurrRC =
DAG.MRI.getRegClass(RegToRewrite);
2722 const TargetRegisterClass *AGPRRC = SRI->getEquivalentAGPRClass(CurrRC);
2724 DAG.MRI.setRegClass(RegToRewrite, AGPRRC);
2728 DAG.LIS->reanalyze(
DAG.MF);
2730 RegionPressureMap LiveInUpdater(&
DAG,
false);
2731 LiveInUpdater.buildLiveRegMap();
2734 DAG.LiveIns[Region] = LiveInUpdater.getLiveRegsForRegionIdx(Region);
2741unsigned PreRARematStage::getStageTargetOccupancy()
const {
2742 return TargetOcc ? *TargetOcc :
MFI.getMinWavesPerEU();
2745bool PreRARematStage::setObjective() {
2749 unsigned MaxSGPRs =
ST.getMaxNumSGPRs(
F);
2750 unsigned MaxVGPRs =
ST.getMaxNumVGPRs(
F);
2751 bool HasVectorRegisterExcess =
false;
2752 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
2753 const GCNRegPressure &
RP =
DAG.Pressure[
I];
2754 GCNRPTarget &
Target = RPTargets.emplace_back(MaxSGPRs, MaxVGPRs,
MF, RP);
2756 TargetRegions.set(
I);
2757 HasVectorRegisterExcess |=
Target.hasVectorRegisterExcess();
2760 if (HasVectorRegisterExcess ||
DAG.MinOccupancy >=
MFI.getMaxWavesPerEU()) {
2763 TargetOcc = std::nullopt;
2767 TargetOcc =
DAG.MinOccupancy + 1;
2768 const unsigned VGPRBlockSize =
MFI.getDynamicVGPRBlockSize();
2769 MaxSGPRs =
ST.getMaxNumSGPRs(*TargetOcc,
false);
2770 MaxVGPRs =
ST.getMaxNumVGPRs(*TargetOcc, VGPRBlockSize);
2771 for (
auto [
I, Target] :
enumerate(RPTargets)) {
2772 Target.setTarget(MaxSGPRs, MaxVGPRs);
2774 TargetRegions.set(
I);
2778 return TargetRegions.any();
2781bool PreRARematStage::collectRematRegs(
2782 const DenseMap<MachineInstr *, unsigned> &MIRegion) {
2785 DAG.RegionLiveOuts.buildLiveRegMap();
2789 SmallSet<Register, 4> MarkedRegs;
2790 auto IsMarkedForRemat = [&MarkedRegs](
const MachineOperand &MO) ->
bool {
2791 return MO.isReg() && MarkedRegs.
contains(MO.getReg());
2795 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
2797 for (
auto MI = Bounds.first;
MI != Bounds.second; ++
MI) {
2800 if (!isReMaterializable(
DefMI))
2813 MachineInstr *
UseMI =
DAG.MRI.getOneNonDBGUser(
Reg);
2817 if (UseRegion == MIRegion.
end() || UseRegion->second ==
I)
2828 if (IsMarkedForRemat(UseMO) ||
2835 SlotIndex UseIdx =
DAG.LIS->getInstructionIndex(*UseMI).getRegSlot(
true);
2846 return !RematRegs.empty();
2849PreRARematStage::RematReg::RematReg(
2850 MachineInstr *
DefMI, MachineInstr *
UseMI, GCNScheduleDAGMILive &
DAG,
2851 const DenseMap<MachineInstr *, unsigned> &MIRegion)
2854 DefRegion(MIRegion.at(
DefMI)), UseRegion(MIRegion.at(
UseMI)) {
2858 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
2859 auto LiveInIt =
DAG.LiveIns[
I].find(
Reg);
2860 if (LiveInIt !=
DAG.LiveIns[
I].end())
2862 const auto &LiveOuts =
DAG.RegionLiveOuts.getLiveRegsForRegionIdx(
I);
2863 if (
auto LiveOutIt = LiveOuts.find(
Reg); LiveOutIt != LiveOuts.end())
2868 Mask =
DAG.RegionLiveOuts.getLiveRegsForRegionIdx(DefRegion).at(
Reg);
2871bool PreRARematStage::RematReg::maybeBeneficial(
2874 for (
unsigned I : TargetRegions.
set_bits()) {
2875 if (Live[
I] && RPTargets[
I].isSaveBeneficial(
Reg))
2881void PreRARematStage::RematReg::insertMI(
unsigned RegionIdx,
2882 MachineInstr *RematMI,
2883 GCNScheduleDAGMILive &
DAG)
const {
2886 Bounds.first = RematMI;
2887 DAG.LIS->InsertMachineInstrInMaps(*RematMI);
2893 assert(
DAG.MLI &&
"MLI not defined in DAG");
2897 const unsigned NumRegions =
DAG.Regions.size();
2901 for (
unsigned I = 0;
I < NumRegions; ++
I) {
2905 if (BlockFreq && BlockFreq <
MinFreq)
2914 if (
MinFreq >= ScaleFactor * ScaleFactor) {
2916 Freq /= ScaleFactor;
2922PreRARematStage::ScoredRemat::ScoredRemat(RematReg *Remat,
const FreqInfo &Freq,
2924 : Remat(Remat), FreqDiff(getFreqDiff(Freq)) {
2928int64_t PreRARematStage::ScoredRemat::getFreqDiff(
const FreqInfo &Freq)
const {
2936 int64_t DefOrMin = std::max(Freq.Regions[Remat->DefRegion], Freq.MinFreq);
2937 int64_t UseOrMax = Freq.Regions[Remat->UseRegion];
2939 UseOrMax = Freq.MaxFreq;
2940 return DefOrMin - UseOrMax;
2943void PreRARematStage::ScoredRemat::update(
const BitVector &TargetRegions,
2945 const FreqInfo &FreqInfo,
2949 for (
unsigned I : TargetRegions.
set_bits()) {
2950 if (!Remat->Live[
I])
2958 if (!NumRegsBenefit)
2961 bool UnusedLT = Remat->isUnusedLiveThrough(
I);
2964 RegionImpact += (UnusedLT ? 2 : 1) * NumRegsBenefit;
2973 MaxFreq = std::max(MaxFreq, Freq);
2978MachineInstr *PreRARematStage::ScoredRemat::rematerialize(
2979 BitVector &RecomputeRP, SmallVectorImpl<GCNRPTarget> &RPTargets,
2980 GCNScheduleDAGMILive &
DAG)
const {
2982 MachineInstr &
DefMI = *Remat->DefMI;
2988 TII->reMaterialize(*InsertPos->getParent(), InsertPos, NewReg, 0,
DefMI);
2989 MachineInstr *RematMI = &*std::prev(InsertPos);
2990 Remat->UseMI->substituteRegister(
Reg, NewReg, 0, *
DAG.
TRI);
2991 Remat->insertMI(Remat->UseRegion, RematMI,
DAG);
2993#ifdef EXPENSIVE_CHECKS
2996 for (MachineOperand &MO :
DefMI.operands()) {
2997 if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
3009 LaneBitmask LiveInMask =
DAG.LiveIns[Remat->UseRegion].at(
UseReg);
3010 LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);
3014 if (UncoveredLanes.
any()) {
3016 for (LiveInterval::SubRange &SR : LI.
subranges())
3017 assert((SR.LaneMask & UncoveredLanes).none());
3026 for (
unsigned I : Remat->Live.set_bits()) {
3027 RPTargets[
I].saveRP(RPSave);
3030 if (!Remat->isUnusedLiveThrough(
I))
3037void PreRARematStage::commitRematerializations()
const {
3039 for (
const RollbackInfo &Rollback : Rollbacks)
3040 DAG.deleteMI(Rollback.Remat->DefRegion, Rollback.Remat->DefMI);
3043void PreRARematStage::unsetSatisfiedRPTargets(
const BitVector &Regions) {
3045 if (TargetRegions[
I] && RPTargets[
I].satisfied()) {
3052bool PreRARematStage::updateAndVerifyRPTargets(
const BitVector &Regions) {
3053 bool TooOptimistic =
false;
3055 GCNRPTarget &
Target = RPTargets[
I];
3061 if (!TargetRegions[
I] && !
Target.satisfied()) {
3063 TooOptimistic =
true;
3064 TargetRegions.
set(
I);
3067 return TooOptimistic;
3071bool PreRARematStage::isReMaterializable(
const MachineInstr &
MI) {
3075 for (
const MachineOperand &MO :
MI.all_uses()) {
3078 if (MO.getReg().isPhysical()) {
3097 if (
DAG.MinOccupancy >= *TargetOcc) {
3098 commitRematerializations();
3105 const bool ShouldRollbackRemats = AchievedOcc < *TargetOcc;
3110 if (ShouldRollbackRemats) {
3111 for (
const RollbackInfo &Rollback : Rollbacks) {
3112 const auto &[Remat, RematMI, RegMap] = Rollback;
3113 Remat->DefMI->setDesc(
DAG.
TII->
get(RematMI->getOpcode()));
3114 for (
const auto &[MOIdx,
Reg] : RegMap)
3115 Remat->DefMI->getOperand(MOIdx).setReg(
Reg);
3120 for (
const auto &[
RegionIdx, OrigMIOrder, MaxPressure] : RegionReverts) {
3127 if (!ShouldRollbackRemats) {
3128 commitRematerializations();
3129 DAG.setTargetOccupancy(AchievedOcc);
3134 DAG.setTargetOccupancy(*TargetOcc - 1);
3139 BitVector RecomputeRP(
DAG.Regions.
size());
3140 DenseSet<Register> RecomputeLI;
3141 for (
const RollbackInfo &Rollback : Rollbacks) {
3142 const auto &[Remat, RematMI, RegMap] = Rollback;
3147 Register OriginalReg = Remat->DefMI->getOperand(0).getReg();
3148 Remat->UseMI->substituteRegister(
Reg, OriginalReg, 0, *
DAG.
TRI);
3150 <<
"] Deleting rematerialization " << *RematMI);
3151 DAG.deleteMI(Remat->UseRegion, RematMI);
3155 std::pair<Register, LaneBitmask> LiveReg(OriginalReg, Remat->Mask);
3156 for (
unsigned I : Remat->LiveIn.set_bits())
3157 DAG.LiveIns[
I].insert(LiveReg);
3158 for (
unsigned I : Remat->LiveOut.set_bits())
3161 RecomputeRP |= Rollback.Remat->Live;
3164 for (MachineOperand &MO : Rollback.Remat->DefMI->operands()) {
3165 if (MO.isReg() && MO.getReg().isVirtual())
3166 RecomputeLI.
insert(MO.getReg());
3173#ifdef EXPENSIVE_CHECKS
3178 for (
unsigned I : RecomputeRP.
set_bits())
3179 DAG.Pressure[
I] =
DAG.getRealRegPressure(
I);
3184void GCNScheduleDAGMILive::deleteMI(
unsigned RegionIdx, MachineInstr *
MI) {
3191 MI->eraseFromParent();
3194void GCNScheduleDAGMILive::setTargetOccupancy(
unsigned TargetOccupancy) {
3195 MinOccupancy = TargetOccupancy;
3216 if (HasIGLPInstrs) {
3217 SavedMutations.clear();
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static SUnit * pickOnlyChoice(SchedBoundary &Zone)
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
static cl::opt< bool > GCNTrackers("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false))
static cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))
#define REMAT_PREFIX
Allows to easily filter for this stage's debug output.
static MachineInstr * getLastMIForRegion(MachineBasicBlock::iterator RegionBegin, MachineBasicBlock::iterator RegionEnd)
static bool shouldCheckPending(SchedBoundary &Zone, const TargetSchedModel *SchedModel)
static cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))
static cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))
static void printScheduleModel(std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)
static cl::opt< bool > PrintMaxRPRegUsageAfterScheduler("amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure after scheduling."), cl::init(false))
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG)
static cl::opt< bool > DisableRewriteMFMAFormSchedStage("amdgpu-disable-rewrite-mfma-form-sched-stage", cl::Hidden, cl::desc("Disable rewrite mfma rewrite scheduling stage"), cl::init(true))
static bool canUsePressureDiffs(const SUnit &SU)
Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.
static cl::opt< unsigned > PendingQueueLimit("amdgpu-scheduler-pending-queue-limit", cl::Hidden, cl::desc("Max (Available+Pending) size to inspect pending queue (0 disables)"), cl::init(256))
static cl::opt< bool > PrintMaxRPRegUsageBeforeScheduler("amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure before scheduling."), cl::init(false))
static cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
static constexpr std::pair< StringLiteral, StringLiteral > ReplaceMap[]
A common definition of LaneBitmask for use in TableGen and CodeGen.
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static constexpr unsigned SM(unsigned Version)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
iterator_range< const_set_bits_iterator > set_bits() const
size_type size() const
size - Returns the number of bits in this bitvector.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
bool initGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
GCNRegPressure bumpDownwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI) const
Mostly copy/paste from CodeGen/RegisterPressure.cpp Calculate the impact MI will have on CurPressure ...
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
Models a register pressure target, allowing to evaluate and track register savings against that targe...
unsigned getNumRegsBenefit(const GCNRegPressure &SaveRP) const
Returns the benefit towards achieving the RP target that saving SaveRP represents,...
GCNRegPressure getPressure() const
DenseMap< unsigned, LaneBitmask > LiveRegSet
virtual bool initGCNRegion()
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
virtual void finalizeGCNRegion()
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
void modifyRegionSchedule(unsigned RegionIdx, MachineBasicBlock *MBB, ArrayRef< MachineInstr * > MIOrder)
Sets the schedule of region RegionIdx in block MBB to MIOrder.
MachineBasicBlock * CurrentMBB
This is a minimal scheduler strategy.
GCNDownwardRPTracker DownwardTracker
bool useGCNTrackers() const
void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
unsigned SGPRCriticalLimit
std::vector< unsigned > MaxPressure
bool hasNextStage() const
SUnit * pickNodeBidirectional(bool &IsTopNode, bool &PickedPending)
GCNSchedStageID getCurrentStage()
bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Evaluates instructions in the pending queue using a subset of scheduling heuristics.
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
unsigned VGPRCriticalLimit
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
std::optional< bool > GCNTrackersOverride
GCNDownwardRPTracker * getDownwardTracker()
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
GCNUpwardRPTracker UpwardTracker
void printCandidateDecision(const SchedCandidate &Current, const SchedCandidate &Preferred)
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool &IsPending, bool IsBottomUp)
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
GCNUpwardRPTracker * getUpwardTracker()
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
void recede(const MachineInstr &MI)
Move to the state of RP just before the MI .
void traceCandidate(const SchedCandidate &Cand)
LLVM_ABI void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
MachineSchedPolicy RegionPolicy
const TargetSchedModel * SchedModel
const MachineSchedContext * Context
const TargetRegisterInfo * TRI
SchedCandidate BotCand
Candidate last picked from Bot boundary.
SchedCandidate TopCand
Candidate last picked from Top boundary.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
GenericScheduler(const MachineSchedContext *C)
bool shouldRevertScheduling(unsigned WavesAfter) override
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
void removeInterval(Register Reg)
Interval removal.
LLVM_ABI void dump() const
MachineBasicBlock * getMBBFromIndex(SlotIndex index) const
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
succ_iterator succ_begin()
unsigned succ_size() const
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
LLVM_ABI BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
LLVM_ABI BlockFrequency getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, raw_ostream *OS=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI LaneBitmask getMaxLaneMaskForVReg(Register Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
LLVM_ABI bool isConstantPhysReg(MCRegister PhysReg) const
Returns true if PhysReg is unallocatable and constant throughout the function.
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
void finalizeGCNRegion() override
bool initGCNRegion() override
bool initGCNSchedStage() override
Capture a change in pressure for a single pressure set.
Simple wrapper around std::function<void(raw_ostream&)>.
Helpers for implementing custom MachineSchedStrategy classes.
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void advance()
Advance across the current instruction.
LLVM_ABI void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
LLVM_ABI void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
RegionT * getParent() const
Get the parent of the Region.
GCNRPTracker::LiveRegSet & getLiveRegsForRegionIdx(unsigned RegionIdx)
List of registers defined and used by a machine instruction.
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
LLVM_ABI void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
LLVM_ABI void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
bool isIGLPMutationOnly(unsigned Opcode) const
static bool isMAI(const MCInstrDesc &Desc)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getOccupancy() const
void increaseOccupancy(const MachineFunction &MF, unsigned Limit)
unsigned getDynamicVGPRBlockSize() const
unsigned getMinAllowedOccupancy() const
void limitOccupancy(const MachineFunction &MF)
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
unsigned NodeNum
Entry # of node in the node vector.
unsigned short Latency
Node latency.
bool isScheduled
True once scheduled.
unsigned ParentClusterIdx
The parent cluster id.
unsigned BotReadyCycle
Cycle relative to end when node is ready.
bool isBottomReady() const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
LLVM_ABI void releasePending()
Release pending ready nodes in to the available queue.
LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
LLVM_ABI SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
LLVM_ABI void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LLVM_ABI bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
A ScheduleDAG for scheduling lists of MachineInstr.
bool ScheduleSingleMIRegions
True if regions with a single MI should be scheduled.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
const MachineLoopInfo * MLI
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
RegPressureTracker RPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
const TargetRegisterInfo * TRI
Target processor register info.
MachineFunction & MF
Machine function.
static const unsigned ScaleFactor
unsigned getMetric() const
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
SlotIndex - An opaque wrapper around machine indexes.
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
static bool isEarlierInstr(SlotIndex A, SlotIndex B)
isEarlierInstr - Return true if A refers to an instruction earlier than B.
SlotIndex getPrevSlot() const
Returns the previous slot in the index list.
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
bool isReMaterializable(const MachineInstr &MI) const
Return true if the instruction would be materializable at a point in the containing function where al...
virtual bool isIgnorableUse(const MachineOperand &MO) const
Given MO is a PhysReg use return if it can be ignored for the purpose of instruction rematerializatio...
uint8_t getCopyCost() const
Return the cost of copying a value between two registers in this class.
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx.
Provide an instruction scheduling machine model to CodeGen passes.
LLVM_ABI bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
bool initGCNSchedStage() override
bool initGCNRegion() override
void finalizeGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
VNInfo - Value Number Information.
SlotIndex def
The index of the defining instruction.
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
static bool allUsesAvailableAt(const MachineInstr *MI, SlotIndex UseIdx, const LiveIntervals &LIS, const MachineRegisterInfo &MRI, const TargetInstrInfo &TII)
std::pair< iterator, bool > insert(const ValueT &V)
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
LLVM_READONLY int32_t getMFMASrcCVDstAGPROp(uint32_t Opcode)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)
Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > RegionBoundaries
A region's boundaries i.e.
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
@ UnclusteredHighRPReschedule
@ MemoryClauseInitialSchedule
@ ClusteredLowOccupancyReschedule
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
LLVM_ABI cl::opt< bool > VerifyScheduling
LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool isTheSameCluster(unsigned A, unsigned B)
Return whether the input cluster ID's are the same and valid.
DWARFExpression::Operation Op
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
LLVM_ABI void dumpMaxRegPressure(MachineFunction &MF, GCNRegPressure::RegKind Kind, LiveIntervals &LIS, const MachineLoopInfo *MLI)
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Implement std::hash so that hash_code can be used in STL containers.
bool operator()(std::pair< MachineInstr *, unsigned > A, std::pair< MachineInstr *, unsigned > B) const
unsigned getArchVGPRNum() const
unsigned getAGPRNum() const
unsigned getSGPRNum() const
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void setBest(SchedCandidate &Best)
void reset(const CandPolicy &NewPolicy)
LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SchedResourceDelta ResDelta
Status of an instruction's critical resource consumption.
unsigned DemandedResources
constexpr bool any() const
static constexpr LaneBitmask getNone()
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Execution frequency information required by scoring heuristics.
SmallVector< uint64_t > Regions
Per-region execution frequencies. 0 when unknown.
uint64_t MinFreq
Minimum and maximum observed frequencies.
FreqInfo(MachineFunction &MF, const GCNScheduleDAGMILive &DAG)
PressureChange CriticalMax
PressureChange CurrentMax