46#define DEBUG_TYPE "machine-scheduler"
51 "amdgpu-disable-unclustered-high-rp-reschedule",
cl::Hidden,
52 cl::desc(
"Disable unclustered high register pressure "
53 "reduction scheduling stage."),
57 "amdgpu-disable-clustered-low-occupancy-reschedule",
cl::Hidden,
58 cl::desc(
"Disable clustered low occupancy "
59 "rescheduling for ILP scheduling stage."),
65 "Sets the bias which adds weight to occupancy vs latency. Set it to "
66 "100 to chase the occupancy only."),
71 cl::desc(
"Relax occupancy targets for kernels which are memory "
72 "bound (amdgpu-membound-threshold), or "
73 "Wave Limited (amdgpu-limit-wave-threshold)."),
78 cl::desc(
"Use the AMDGPU specific RPTrackers during scheduling"),
82 "amdgpu-scheduler-pending-queue-limit",
cl::Hidden,
84 "Max (Available+Pending) size to inspect pending queue (0 disables)"),
87#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
88#define DUMP_MAX_REG_PRESSURE
90 "amdgpu-print-max-reg-pressure-regusage-before-scheduler",
cl::Hidden,
91 cl::desc(
"Print a list of live registers along with their def/uses at the "
92 "point of maximum register pressure before scheduling."),
96 "amdgpu-print-max-reg-pressure-regusage-after-scheduler",
cl::Hidden,
97 cl::desc(
"Print a list of live registers along with their def/uses at the "
98 "point of maximum register pressure after scheduling."),
103 "amdgpu-disable-rewrite-mfma-form-sched-stage",
cl::Hidden,
104 cl::desc(
"Disable rewrite mfma rewrite scheduling stage"),
cl::init(
false));
121 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
123 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
145 "VGPRCriticalLimit calculation method.\n");
149 unsigned Addressable =
152 VGPRBudget = std::max(VGPRBudget, Granule);
190 if (!
Op.isReg() ||
Op.isImplicit())
192 if (
Op.getReg().isPhysical() ||
193 (
Op.isDef() &&
Op.getSubReg() != AMDGPU::NoSubRegister))
201 std::vector<unsigned> &Pressure, std::vector<unsigned> &MaxPressure,
216 Pressure.resize(4, 0);
227 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = NewPressure.
getSGPRNum();
228 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
230 Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = NewPressure.
getAGPRNum();
237 unsigned SGPRPressure,
238 unsigned VGPRPressure,
bool IsBottomUp) {
242 if (!
DAG->isTrackingPressure())
265 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
266 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
268 for (
const auto &Diff :
DAG->getPressureDiff(SU)) {
274 (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());
277#ifdef EXPENSIVE_CHECKS
278 std::vector<unsigned> CheckPressure, CheckMaxPressure;
281 if (
Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
282 CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
283 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
284 CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
285 errs() <<
"Register Pressure is inaccurate when calculated through "
287 <<
"SGPR got " <<
Pressure[AMDGPU::RegisterPressureSets::SReg_32]
289 << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
"\n"
290 <<
"VGPR got " <<
Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
292 << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
"\n";
298 unsigned NewSGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
299 unsigned NewVGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
309 const unsigned MaxVGPRPressureInc = 16;
310 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >=
VGPRExcessLimit;
311 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >=
SGPRExcessLimit;
342 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
344 if (SGPRDelta > VGPRDelta) {
358 bool HasBufferedModel =
377 dbgs() <<
"Prefer:\t\t";
378 DAG->dumpNode(*Preferred.
SU);
382 DAG->dumpNode(*Current.
SU);
385 dbgs() <<
"Reason:\t\t";
399 unsigned SGPRPressure = 0;
400 unsigned VGPRPressure = 0;
402 if (
DAG->isTrackingPressure()) {
404 SGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
405 VGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
410 SGPRPressure =
T->getPressure().getSGPRNum();
411 VGPRPressure =
T->getPressure().getArchVGPRNum();
416 for (
SUnit *SU : AQ) {
420 VGPRPressure, IsBottomUp);
440 for (
SUnit *SU : PQ) {
444 VGPRPressure, IsBottomUp);
464 bool &PickedPending) {
484 bool BotPending =
false;
504 "Last pick result should correspond to re-picking right now");
509 bool TopPending =
false;
529 "Last pick result should correspond to re-picking right now");
539 PickedPending = BotPending && TopPending;
542 if (BotPending || TopPending) {
549 Cand.setBest(TryCand);
554 IsTopNode = Cand.AtTop;
561 if (
DAG->top() ==
DAG->bottom()) {
563 Bot.Available.empty() &&
Bot.Pending.empty() &&
"ReadyQ garbage");
569 PickedPending =
false;
603 if (ReadyCycle > CurrentCycle)
675 if (
DAG->isTrackingPressure() &&
681 if (
DAG->isTrackingPressure() &&
686 bool SameBoundary = Zone !=
nullptr;
728 if (
DAG->isTrackingPressure() &&
738 bool SameBoundary = Zone !=
nullptr;
773 bool CandIsClusterSucc =
775 bool TryCandIsClusterSucc =
777 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
782 if (
DAG->isTrackingPressure() &&
788 if (
DAG->isTrackingPressure() &&
834 if (
DAG->isTrackingPressure()) {
850 bool CandIsClusterSucc =
852 bool TryCandIsClusterSucc =
854 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
863 bool SameBoundary = Zone !=
nullptr;
880 if (TryMayLoad || CandMayLoad) {
881 bool TryLongLatency =
883 bool CandLongLatency =
887 Zone->
isTop() ? CandLongLatency : TryLongLatency, TryCand,
905 if (
DAG->isTrackingPressure() &&
924 !
Rem.IsAcyclicLatencyLimited &&
tryLatency(TryCand, Cand, *Zone))
942 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy),
943 RegionLiveOuts(this,
true) {
949 LLVM_DEBUG(
dbgs() <<
"Starting occupancy is " << StartingOccupancy <<
".\n");
951 MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy);
952 if (MinOccupancy != StartingOccupancy)
953 LLVM_DEBUG(
dbgs() <<
"Allowing Occupancy drops to " << MinOccupancy
958std::unique_ptr<GCNSchedStage>
960 switch (SchedStageID) {
962 return std::make_unique<OccInitialScheduleStage>(SchedStageID, *
this);
964 return std::make_unique<RewriteMFMAFormStage>(SchedStageID, *
this);
966 return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *
this);
968 return std::make_unique<ClusteredLowOccStage>(SchedStageID, *
this);
970 return std::make_unique<PreRARematStage>(SchedStageID, *
this);
972 return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *
this);
974 return std::make_unique<MemoryClauseInitialScheduleStage>(SchedStageID,
988GCNScheduleDAGMILive::getRealRegPressure(
unsigned RegionIdx)
const {
989 if (Regions[RegionIdx].first == Regions[RegionIdx].second)
993 &LiveIns[RegionIdx]);
999 assert(RegionBegin != RegionEnd &&
"Region must not be empty");
1003void GCNScheduleDAGMILive::computeBlockPressure(
unsigned RegionIdx,
1015 const MachineBasicBlock *OnlySucc =
nullptr;
1018 if (!Candidate->empty() && Candidate->pred_size() == 1) {
1019 SlotIndexes *Ind =
LIS->getSlotIndexes();
1021 OnlySucc = Candidate;
1026 size_t CurRegion = RegionIdx;
1027 for (
size_t E = Regions.size(); CurRegion !=
E; ++CurRegion)
1028 if (Regions[CurRegion].first->getParent() !=
MBB)
1033 auto LiveInIt = MBBLiveIns.find(
MBB);
1034 auto &Rgn = Regions[CurRegion];
1036 if (LiveInIt != MBBLiveIns.end()) {
1037 auto LiveIn = std::move(LiveInIt->second);
1039 MBBLiveIns.erase(LiveInIt);
1042 auto LRS = BBLiveInMap.lookup(NonDbgMI);
1043#ifdef EXPENSIVE_CHECKS
1052 if (Regions[CurRegion].first ==
I || NonDbgMI ==
I) {
1053 LiveIns[CurRegion] =
RPTracker.getLiveRegs();
1057 if (Regions[CurRegion].second ==
I) {
1058 Pressure[CurRegion] =
RPTracker.moveMaxPressure();
1059 if (CurRegion-- == RegionIdx)
1061 auto &Rgn = Regions[CurRegion];
1074 MBBLiveIns[OnlySucc] =
RPTracker.moveLiveRegs();
1079GCNScheduleDAGMILive::getRegionLiveInMap()
const {
1080 assert(!Regions.empty());
1081 std::vector<MachineInstr *> RegionFirstMIs;
1082 RegionFirstMIs.reserve(Regions.size());
1084 RegionFirstMIs.push_back(
1091GCNScheduleDAGMILive::getRegionLiveOutMap()
const {
1092 assert(!Regions.empty());
1093 std::vector<MachineInstr *> RegionLastMIs;
1094 RegionLastMIs.reserve(Regions.size());
1105 IdxToInstruction.clear();
1108 IsLiveOut ? DAG->getRegionLiveOutMap() : DAG->getRegionLiveInMap();
1109 for (
unsigned I = 0;
I < DAG->Regions.size();
I++) {
1110 auto &[RegionBegin, RegionEnd] = DAG->Regions[
I];
1112 if (RegionBegin == RegionEnd)
1116 IdxToInstruction[
I] = RegionKey;
1124 LiveIns.resize(Regions.size());
1125 Pressure.resize(Regions.size());
1126 RegionsWithHighRP.resize(Regions.size());
1127 RegionsWithExcessRP.resize(Regions.size());
1128 RegionsWithIGLPInstrs.resize(Regions.size());
1129 RegionsWithHighRP.reset();
1130 RegionsWithExcessRP.reset();
1131 RegionsWithIGLPInstrs.reset();
1136void GCNScheduleDAGMILive::runSchedStages() {
1137 LLVM_DEBUG(
dbgs() <<
"All regions recorded, starting actual scheduling.\n");
1139 if (!Regions.
empty()) {
1140 BBLiveInMap = getRegionLiveInMap();
1145#ifdef DUMP_MAX_REG_PRESSURE
1153 GCNSchedStrategy &S =
static_cast<GCNSchedStrategy &
>(*SchedImpl);
1156 if (!Stage->initGCNSchedStage())
1159 for (
auto Region : Regions) {
1163 if (!Stage->initGCNRegion()) {
1164 Stage->advanceRegion();
1173 &LiveIns[Stage->getRegionIdx()];
1175 reinterpret_cast<GCNRPTracker *
>(DownwardTracker)
1176 ->reset(
MRI, *RegionLiveIns);
1177 reinterpret_cast<GCNRPTracker *
>(UpwardTracker)
1178 ->reset(
MRI, RegionLiveOuts.getLiveRegsForRegionIdx(
1179 Stage->getRegionIdx()));
1183 Stage->finalizeGCNRegion();
1184 Stage->advanceRegion();
1188 Stage->finalizeGCNSchedStage();
1191#ifdef DUMP_MAX_REG_PRESSURE
1204 OS <<
"Max Occupancy Initial Schedule";
1207 OS <<
"Instruction Rewriting Reschedule";
1210 OS <<
"Unclustered High Register Pressure Reschedule";
1213 OS <<
"Clustered Low Occupancy Reschedule";
1216 OS <<
"Pre-RA Rematerialize";
1219 OS <<
"Max ILP Initial Schedule";
1222 OS <<
"Max memory clause Initial Schedule";
1242void RewriteMFMAFormStage::findReachingDefs(
1256 SmallVector<MachineBasicBlock *, 8> Worklist;
1264 while (!Worklist.
empty()) {
1279 for (MachineBasicBlock *PredMBB : DefMBB->
predecessors()) {
1280 if (Visited.
insert(PredMBB).second)
1286void RewriteMFMAFormStage::findReachingUses(
1290 for (MachineOperand &UseMO :
1293 findReachingDefs(UseMO, LIS, ReachingDefIndexes);
1297 if (
any_of(ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) {
1309 if (!
ST.hasGFX90AInsts() ||
MFI.getMinWavesPerEU() > 1)
1312 RegionsWithExcessArchVGPR.resize(
DAG.Regions.size());
1313 RegionsWithExcessArchVGPR.reset();
1317 RegionsWithExcessArchVGPR[
Region] =
true;
1320 if (RegionsWithExcessArchVGPR.none())
1323 TII =
ST.getInstrInfo();
1324 SRI =
ST.getRegisterInfo();
1326 std::vector<std::pair<MachineInstr *, unsigned>> RewriteCands;
1330 if (!initHeuristics(RewriteCands, CopyForUse, CopyForDef))
1333 int64_t
Cost = getRewriteCost(RewriteCands, CopyForUse, CopyForDef);
1340 return rewrite(RewriteCands);
1350 if (
DAG.RegionsWithHighRP.none() &&
DAG.RegionsWithExcessRP.none())
1357 InitialOccupancy =
DAG.MinOccupancy;
1360 TempTargetOccupancy =
MFI.getMaxWavesPerEU() >
DAG.MinOccupancy
1361 ? InitialOccupancy + 1
1363 IsAnyRegionScheduled =
false;
1364 S.SGPRLimitBias =
S.HighRPSGPRBias;
1365 S.VGPRLimitBias =
S.HighRPVGPRBias;
1369 <<
"Retrying function scheduling without clustering. "
1370 "Aggressively try to reduce register pressure to achieve occupancy "
1371 << TempTargetOccupancy <<
".\n");
1386 if (
DAG.StartingOccupancy <=
DAG.MinOccupancy)
1390 dbgs() <<
"Retrying function scheduling with lowest recorded occupancy "
1391 <<
DAG.MinOccupancy <<
".\n");
1396#define REMAT_PREFIX "[PreRARemat] "
1397#define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)
1399#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1400Printable PreRARematStage::ScoredRemat::print()
const {
1402 OS <<
'(' << MaxFreq <<
", " << FreqDiff <<
", " << RegionImpact <<
')';
1423 const unsigned NumRegions =
DAG.Regions.size();
1424 for (
unsigned I = 0;
I < NumRegions; ++
I) {
1431 RegionBB.push_back(ParentMBB);
1435 auto PrintTargetRegions = [&]() ->
void {
1436 if (TargetRegions.none()) {
1441 for (
unsigned I : TargetRegions.set_bits())
1444 auto PrintRematReg = [&](
const RematReg &Remat) ->
Printable {
1448 bool HasLiveThroughRegion =
false;
1449 OS <<
'[' << Remat.DefRegion <<
" -";
1450 for (
unsigned I = 0;
I < NumRegions; ++
I) {
1451 if (Remat.isUnusedLiveThrough(
I)) {
1452 if (HasLiveThroughRegion) {
1456 HasLiveThroughRegion =
true;
1461 if (HasLiveThroughRegion)
1463 OS <<
"-> " << Remat.UseRegion <<
"] ";
1464 Remat.DefMI->print(OS,
true,
false,
1472 dbgs() <<
"Analyzing ";
1473 MF.getFunction().printAsOperand(
dbgs(),
false);
1476 if (!setObjective()) {
1477 LLVM_DEBUG(
dbgs() <<
"no objective to achieve, occupancy is maximal at "
1478 <<
MFI.getMaxWavesPerEU() <<
'\n');
1483 dbgs() <<
"increase occupancy from " << *TargetOcc - 1 <<
'\n';
1485 dbgs() <<
"reduce spilling (minimum target occupancy is "
1486 <<
MFI.getMinWavesPerEU() <<
")\n";
1488 PrintTargetRegions();
1491 if (!collectRematRegs(MIRegion)) {
1497 dbgs() <<
"Rematerializable registers:\n";
1498 for (
const RematReg &Remat : RematRegs)
1506 dbgs() <<
"unknown ";
1507 dbgs() <<
" | " << *
DAG.Regions[
I].first;
1512 for (RematReg &Remat : RematRegs)
1518 unsigned RoundNum = 0;
1522 assert(!ScoredRemats.empty() &&
"no more remat candidates");
1525 for (ScoredRemat &Remat : ScoredRemats)
1526 Remat.update(TargetRegions, RPTargets, FreqInfo, !TargetOcc);
1530 dbgs() <<
"==== ROUND " << RoundNum++ <<
" ====\n"
1532 <<
"Candidates with non-null score, in rematerialization order:\n";
1533 for (
const ScoredRemat &RematDecision :
reverse(ScoredRemats)) {
1534 if (RematDecision.hasNullScore())
1537 << *RematDecision.Remat->DefMI;
1539 PrintTargetRegions();
1542 RecomputeRP.
reset();
1543 unsigned RematIdx = ScoredRemats.
size();
1548 for (; RematIdx && TargetRegions.any(); --RematIdx) {
1549 const ScoredRemat &Candidate = ScoredRemats[RematIdx - 1];
1553 if (Candidate.hasNullScore()) {
1558 RematReg &Remat = *Candidate.Remat;
1565 if (!Remat.maybeBeneficial(TargetRegions, RPTargets))
1570 Candidate.rematerialize(RecomputeRP, RPTargets,
DAG);
1571 RescheduleRegions |= Remat.Live;
1580 RollbackInfo &Rollback = Rollbacks.emplace_back(&Remat);
1581 Rollback.RematMI = RematMI;
1587 Remat.DefMI->
setDesc(
DAG.TII->get(TargetOpcode::DBG_VALUE));
1589 if (MO.isReg() && MO.readsReg()) {
1590 Rollback.RegMap.insert({Idx, MO.getReg()});
1596 DAG.deleteMI(Remat.DefRegion, Remat.DefMI);
1599 unsetSatisfiedRPTargets(Remat.Live);
1603 if (!TargetRegions.any()) {
1604 dbgs() <<
"** Interrupt round on all targets achieved\n";
1605 }
else if (RematIdx) {
1606 dbgs() <<
"** Interrupt round on stale score for "
1607 << *ScoredRemats[RematIdx - 1].Remat->DefMI;
1609 dbgs() <<
"** Stop on exhausted rematerialization candidates\n";
1614 ScoredRemats.truncate(RematIdx);
1615 }
while ((updateAndVerifyRPTargets(RecomputeRP) || TargetRegions.any()) &&
1616 !ScoredRemats.empty());
1617 if (RescheduleRegions.none())
1623 unsigned DynamicVGPRBlockSize =
MFI.getDynamicVGPRBlockSize();
1624 for (
unsigned I : RescheduleRegions.set_bits()) {
1625 DAG.Pressure[
I] = RPTargets[
I].getCurrentRP();
1627 <<
DAG.Pressure[
I].getOccupancy(
ST, DynamicVGPRBlockSize)
1628 <<
" (" << RPTargets[
I] <<
")\n");
1630 AchievedOcc =
MFI.getMaxWavesPerEU();
1633 std::min(AchievedOcc, RP.getOccupancy(
ST, DynamicVGPRBlockSize));
1637 dbgs() <<
"Retrying function scheduling with new min. occupancy of "
1638 << AchievedOcc <<
" from rematerializing (original was "
1639 <<
DAG.MinOccupancy;
1641 dbgs() <<
", target was " << *TargetOcc;
1645 DAG.setTargetOccupancy(getStageTargetOccupancy());
1656 S.SGPRLimitBias =
S.VGPRLimitBias = 0;
1657 if (
DAG.MinOccupancy > InitialOccupancy) {
1658 assert(IsAnyRegionScheduled);
1660 <<
" stage successfully increased occupancy to "
1661 <<
DAG.MinOccupancy <<
'\n');
1662 }
else if (!IsAnyRegionScheduled) {
1663 assert(
DAG.MinOccupancy == InitialOccupancy);
1665 <<
": No regions scheduled, min occupancy stays at "
1666 <<
DAG.MinOccupancy <<
", MFI occupancy stays at "
1667 <<
MFI.getOccupancy() <<
".\n");
1675 if (
DAG.begin() ==
DAG.end())
1682 unsigned NumRegionInstrs = std::distance(
DAG.begin(),
DAG.end());
1686 if (
DAG.begin() == std::prev(
DAG.end()))
1692 <<
"\n From: " << *
DAG.begin() <<
" To: ";
1694 else dbgs() <<
"End";
1695 dbgs() <<
" RegionInstrs: " << NumRegionInstrs <<
'\n');
1703 for (
auto &
I :
DAG) {
1716 dbgs() <<
"Pressure before scheduling:\nRegion live-ins:"
1718 <<
"Region live-in pressure: "
1722 S.HasHighPressure =
false;
1744 unsigned DynamicVGPRBlockSize =
DAG.MFI.getDynamicVGPRBlockSize();
1747 unsigned CurrentTargetOccupancy =
1748 IsAnyRegionScheduled ?
DAG.MinOccupancy : TempTargetOccupancy;
1750 (CurrentTargetOccupancy <= InitialOccupancy ||
1751 DAG.Pressure[
RegionIdx].getOccupancy(
ST, DynamicVGPRBlockSize) !=
1758 if (!IsAnyRegionScheduled && IsSchedulingThisRegion) {
1759 IsAnyRegionScheduled =
true;
1760 if (
MFI.getMaxWavesPerEU() >
DAG.MinOccupancy)
1761 DAG.setTargetOccupancy(TempTargetOccupancy);
1763 return IsSchedulingThisRegion;
1798 if (
S.HasHighPressure)
1819 if (
DAG.MinOccupancy < *TargetOcc) {
1821 <<
" cannot meet occupancy target, interrupting "
1822 "re-scheduling in all regions\n");
1823 RescheduleRegions.reset();
1834 unsigned DynamicVGPRBlockSize =
DAG.MFI.getDynamicVGPRBlockSize();
1845 unsigned TargetOccupancy = std::min(
1846 S.getTargetOccupancy(),
ST.getOccupancyWithWorkGroupSizes(
MF).second);
1847 unsigned WavesAfter = std::min(
1848 TargetOccupancy,
PressureAfter.getOccupancy(
ST, DynamicVGPRBlockSize));
1849 unsigned WavesBefore = std::min(
1851 LLVM_DEBUG(
dbgs() <<
"Occupancy before scheduling: " << WavesBefore
1852 <<
", after " << WavesAfter <<
".\n");
1858 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
1862 if (WavesAfter < WavesBefore && WavesAfter <
DAG.MinOccupancy &&
1863 WavesAfter >=
MFI.getMinAllowedOccupancy()) {
1864 LLVM_DEBUG(
dbgs() <<
"Function is memory bound, allow occupancy drop up to "
1865 <<
MFI.getMinAllowedOccupancy() <<
" waves\n");
1866 NewOccupancy = WavesAfter;
1869 if (NewOccupancy <
DAG.MinOccupancy) {
1870 DAG.MinOccupancy = NewOccupancy;
1871 MFI.limitOccupancy(
DAG.MinOccupancy);
1873 <<
DAG.MinOccupancy <<
".\n");
1877 unsigned MaxVGPRs =
ST.getMaxNumVGPRs(
MF);
1880 unsigned MaxArchVGPRs = std::min(MaxVGPRs,
ST.getAddressableNumArchVGPRs());
1881 unsigned MaxSGPRs =
ST.getMaxNumSGPRs(
MF);
1905 unsigned ReadyCycle = CurrCycle;
1906 for (
auto &
D : SU.
Preds) {
1907 if (
D.isAssignedRegDep()) {
1910 unsigned DefReady = ReadyCycles[
DAG.getSUnit(
DefMI)->NodeNum];
1911 ReadyCycle = std::max(ReadyCycle, DefReady +
Latency);
1914 ReadyCycles[SU.
NodeNum] = ReadyCycle;
1921 std::pair<MachineInstr *, unsigned>
B)
const {
1922 return A.second <
B.second;
1928 if (ReadyCycles.empty())
1930 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
1931 dbgs() <<
"\n################## Schedule time ReadyCycles for MBB : " << BBNum
1932 <<
" ##################\n# Cycle #\t\t\tInstruction "
1936 for (
auto &
I : ReadyCycles) {
1937 if (
I.second > IPrev + 1)
1938 dbgs() <<
"****************************** BUBBLE OF " <<
I.second - IPrev
1939 <<
" CYCLES DETECTED ******************************\n\n";
1940 dbgs() <<
"[ " <<
I.second <<
" ] : " << *
I.first <<
"\n";
1953 unsigned SumBubbles = 0;
1955 unsigned CurrCycle = 0;
1956 for (
auto &SU : InputSchedule) {
1957 unsigned ReadyCycle =
1959 SumBubbles += ReadyCycle - CurrCycle;
1961 ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));
1963 CurrCycle = ++ReadyCycle;
1986 unsigned SumBubbles = 0;
1988 unsigned CurrCycle = 0;
1989 for (
auto &
MI :
DAG) {
1993 unsigned ReadyCycle =
1995 SumBubbles += ReadyCycle - CurrCycle;
1997 ReadyCyclesSorted.insert(std::make_pair(SU->
getInstr(), ReadyCycle));
1999 CurrCycle = ++ReadyCycle;
2016 if (WavesAfter <
DAG.MinOccupancy)
2020 if (
DAG.MFI.isDynamicVGPREnabled()) {
2022 &
ST,
DAG.MFI.getDynamicVGPRBlockSize(),
2025 &
ST,
DAG.MFI.getDynamicVGPRBlockSize(),
2027 if (BlocksAfter > BlocksBefore)
2064 <<
"\n\t *** In shouldRevertScheduling ***\n"
2065 <<
" *********** BEFORE UnclusteredHighRPStage ***********\n");
2069 <<
"\n *********** AFTER UnclusteredHighRPStage ***********\n");
2071 unsigned OldMetric = MBefore.
getMetric();
2072 unsigned NewMetric = MAfter.
getMetric();
2073 unsigned WavesBefore = std::min(
2074 S.getTargetOccupancy(),
2081 LLVM_DEBUG(
dbgs() <<
"\tMetric before " << MBefore <<
"\tMetric after "
2082 << MAfter <<
"Profit: " << Profit <<
"\n");
2113 unsigned WavesAfter) {
2120 LLVM_DEBUG(
dbgs() <<
"New pressure will result in more spilling.\n");
2133 "instruction number mismatch");
2134 if (MIOrder.
empty())
2146 if (MII != RegionEnd) {
2148 bool NonDebugReordered =
2149 !
MI->isDebugInstr() &&
2155 if (NonDebugReordered)
2156 DAG.LIS->handleMove(*
MI,
true);
2160 if (
MI->isDebugInstr()) {
2167 Op.setIsUndef(
false);
2170 if (
DAG.ShouldTrackLaneMasks) {
2172 SlotIndex SlotIdx =
DAG.LIS->getInstructionIndex(*MI).getRegSlot();
2187bool RewriteMFMAFormStage::isRewriteCandidate(
MachineInstr *
MI)
const {
2194bool RewriteMFMAFormStage::initHeuristics(
2195 std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
2203 if (!isRewriteCandidate(&
MI))
2207 assert(ReplacementOp != -1);
2209 RewriteCands.push_back({&
MI,
MI.getOpcode()});
2210 MI.setDesc(
TII->get(ReplacementOp));
2212 MachineOperand *Src2 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src2);
2213 if (Src2->
isReg()) {
2215 findReachingDefs(*Src2,
DAG.LIS, Src2ReachingDefs);
2219 for (SlotIndex RDIdx : Src2ReachingDefs) {
2220 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIdx);
2221 if (!
TII->isMAI(*RD))
2226 MachineOperand &Dst =
MI.getOperand(0);
2229 findReachingUses(&
MI,
DAG.LIS, DstReachingUses);
2231 for (MachineOperand *RUOp : DstReachingUses) {
2232 if (
TII->isMAI(*RUOp->getParent()))
2238 CopyForUse[RUOp->getParent()->getParent()].insert(RUOp->getReg());
2241 findReachingDefs(*RUOp,
DAG.LIS, DstUsesReachingDefs);
2243 for (SlotIndex RDIndex : DstUsesReachingDefs) {
2244 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2245 if (
TII->isMAI(*RD))
2257 const TargetRegisterClass *VDefRC =
DAG.MRI.getRegClass(Dst.getReg());
2258 const TargetRegisterClass *ADefRC = SRI->getEquivalentAGPRClass(VDefRC);
2259 DAG.MRI.setRegClass(Dst.getReg(), ADefRC);
2260 if (Src2->
isReg()) {
2264 const TargetRegisterClass *VUseRC =
DAG.MRI.getRegClass(Src2->
getReg());
2265 const TargetRegisterClass *AUseRC = SRI->getEquivalentAGPRClass(VUseRC);
2266 DAG.MRI.setRegClass(Src2->
getReg(), AUseRC);
2275int64_t RewriteMFMAFormStage::getRewriteCost(
2276 const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
2277 const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
2278 const SmallPtrSetImpl<MachineInstr *> &CopyForDef) {
2279 MachineBlockFrequencyInfo *MBFI =
DAG.MBFI;
2281 int64_t BestSpillCost = 0;
2285 std::pair<unsigned, unsigned> MaxVectorRegs =
2286 ST.getMaxNumVectorRegs(
MF.getFunction());
2287 unsigned ArchVGPRThreshold = MaxVectorRegs.first;
2288 unsigned AGPRThreshold = MaxVectorRegs.second;
2289 unsigned CombinedThreshold =
ST.getMaxNumVGPRs(
MF);
2292 if (!RegionsWithExcessArchVGPR[Region])
2297 MF, ArchVGPRThreshold, AGPRThreshold, CombinedThreshold);
2305 MF, ArchVGPRThreshold, AGPRThreshold, CombinedThreshold);
2307 uint64_t BlockFreq =
2311 bool RelativeFreqIsDenom = EntryFreq > BlockFreq;
2312 uint64_t RelativeFreq = EntryFreq && BlockFreq
2313 ? (RelativeFreqIsDenom ? EntryFreq / BlockFreq
2314 : BlockFreq / EntryFreq)
2319 int64_t SpillCost = ((int)SpillCostAfter - (int)SpillCostBefore) * 2;
2322 if (RelativeFreqIsDenom)
2323 SpillCost /= (int64_t)RelativeFreq;
2325 SpillCost *= (int64_t)RelativeFreq;
2331 if (SpillCost < BestSpillCost)
2332 BestSpillCost = SpillCost;
2337 Cost = BestSpillCost;
2340 unsigned CopyCost = 0;
2344 for (MachineInstr *
DefMI : CopyForDef) {
2351 const TargetRegisterClass *RC =
DAG.MRI.getRegClass(DefReg);
2356 for (
auto &[UseBlock, UseRegs] : CopyForUse) {
2361 const TargetRegisterClass *RC =
DAG.MRI.getRegClass(
UseReg);
2370 for (
auto &[
MI, OriginalOpcode] : RewriteCands) {
2372 const TargetRegisterClass *ADefRC =
2373 DAG.MRI.getRegClass(
MI->getOperand(0).getReg());
2374 const TargetRegisterClass *VDefRC = SRI->getEquivalentVGPRClass(ADefRC);
2375 DAG.MRI.setRegClass(
MI->getOperand(0).getReg(), VDefRC);
2376 MI->setDesc(
TII->get(OriginalOpcode));
2378 MachineOperand *Src2 =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src2);
2386 const TargetRegisterClass *AUseRC =
DAG.MRI.getRegClass(Src2->
getReg());
2387 const TargetRegisterClass *VUseRC = SRI->getEquivalentVGPRClass(AUseRC);
2388 DAG.MRI.setRegClass(Src2->
getReg(), VUseRC);
2391 return Cost + CopyCost;
2394bool RewriteMFMAFormStage::rewrite(
2395 const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands) {
2396 DenseMap<MachineInstr *, unsigned> FirstMIToRegion;
2397 DenseMap<MachineInstr *, unsigned> LastMIToRegion;
2405 if (
Entry.second !=
Entry.first->getParent()->end())
2448 DenseSet<Register> RewriteRegs;
2451 DenseMap<Register, Register> RedefMap;
2453 DenseMap<Register, DenseSet<MachineOperand *>>
ReplaceMap;
2455 DenseMap<Register, SmallPtrSet<MachineInstr *, 8>> ReachingDefCopyMap;
2458 DenseMap<unsigned, DenseMap<Register, SmallPtrSet<MachineOperand *, 8>>>
2461 for (
auto &[
MI, OriginalOpcode] : RewriteCands) {
2463 if (ReplacementOp == -1)
2465 MI->setDesc(
TII->get(ReplacementOp));
2468 MachineOperand *Src2 =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src2);
2469 if (Src2->
isReg()) {
2476 findReachingDefs(*Src2,
DAG.LIS, Src2ReachingDefs);
2477 SmallSetVector<MachineInstr *, 8> Src2DefsReplace;
2479 for (SlotIndex RDIndex : Src2ReachingDefs) {
2480 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2481 if (
TII->isMAI(*RD))
2485 Src2DefsReplace.
insert(RD);
2488 if (!Src2DefsReplace.
empty()) {
2490 if (RI != RedefMap.
end()) {
2491 MappedReg = RI->second;
2494 const TargetRegisterClass *Src2RC =
DAG.MRI.getRegClass(Src2Reg);
2495 const TargetRegisterClass *VGPRRC =
2496 SRI->getEquivalentVGPRClass(Src2RC);
2499 MappedReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2500 RedefMap[Src2Reg] = MappedReg;
2505 for (MachineInstr *RD : Src2DefsReplace) {
2507 if (ReachingDefCopyMap[Src2Reg].insert(RD).second) {
2508 MachineInstrBuilder VGPRCopy =
2511 .
addDef(MappedReg, {}, 0)
2512 .addUse(Src2Reg, {}, 0);
2513 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2518 unsigned UpdateRegion = LastMIToRegion[RD];
2519 DAG.Regions[UpdateRegion].second = VGPRCopy;
2520 LastMIToRegion.
erase(RD);
2527 RewriteRegs.
insert(Src2Reg);
2537 MachineOperand *Dst = &
MI->getOperand(0);
2546 SmallVector<MachineInstr *, 8> DstUseDefsReplace;
2548 findReachingUses(
MI,
DAG.LIS, DstReachingUses);
2550 for (MachineOperand *RUOp : DstReachingUses) {
2551 if (
TII->isMAI(*RUOp->getParent()))
2555 if (
find(DstReachingUseCopies, RUOp) == DstReachingUseCopies.
end())
2558 findReachingDefs(*RUOp,
DAG.LIS, DstUsesReachingDefs);
2560 for (SlotIndex RDIndex : DstUsesReachingDefs) {
2561 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2562 if (
TII->isMAI(*RD))
2567 if (
find(DstUseDefsReplace, RD) == DstUseDefsReplace.
end())
2572 if (!DstUseDefsReplace.
empty()) {
2574 if (RI != RedefMap.
end()) {
2575 MappedReg = RI->second;
2578 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(DstReg);
2579 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2582 MappedReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2583 RedefMap[DstReg] = MappedReg;
2588 for (MachineInstr *RD : DstUseDefsReplace) {
2590 if (ReachingDefCopyMap[DstReg].insert(RD).second) {
2591 MachineInstrBuilder VGPRCopy =
2594 .
addDef(MappedReg, {}, 0)
2595 .addUse(DstReg, {}, 0);
2596 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2601 LastMIToRegion.
find(RD);
2602 if (LMI != LastMIToRegion.
end()) {
2603 unsigned UpdateRegion = LMI->second;
2604 DAG.Regions[UpdateRegion].second = VGPRCopy;
2605 LastMIToRegion.
erase(RD);
2611 DenseSet<MachineOperand *> &DstRegSet =
ReplaceMap[DstReg];
2612 for (MachineOperand *RU : DstReachingUseCopies) {
2613 MachineBasicBlock *RUBlock = RU->getParent()->getParent();
2616 if (RUBlock !=
MI->getParent()) {
2623 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(DstReg);
2624 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2625 Register NewUseReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2626 MachineInstr *UseInst = RU->getParent();
2627 MachineInstrBuilder VGPRCopy =
2630 .
addDef(NewUseReg, {}, 0)
2631 .addUse(DstReg, {}, 0);
2632 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2635 RU->setReg(NewUseReg);
2641 RewriteRegs.
insert(DstReg);
2651 std::pair<unsigned, DenseMap<Register, SmallPtrSet<MachineOperand *, 8>>>;
2652 for (RUBType RUBlockEntry : ReachingUseTracker) {
2653 using RUDType = std::pair<Register, SmallPtrSet<MachineOperand *, 8>>;
2654 for (RUDType RUDst : RUBlockEntry.second) {
2655 MachineOperand *OpBegin = *RUDst.second.begin();
2656 SlotIndex InstPt =
DAG.LIS->getInstructionIndex(*OpBegin->
getParent());
2659 for (MachineOperand *User : RUDst.second) {
2660 SlotIndex NewInstPt =
DAG.LIS->getInstructionIndex(*
User->getParent());
2665 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(RUDst.first);
2666 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2667 Register NewUseReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2668 MachineInstr *UseInst =
DAG.LIS->getInstructionFromIndex(InstPt);
2670 MachineInstrBuilder VGPRCopy =
2673 .
addDef(NewUseReg, {}, 0)
2674 .addUse(RUDst.first, {}, 0);
2675 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2680 FirstMIToRegion.
find(UseInst);
2681 if (FI != FirstMIToRegion.
end()) {
2682 unsigned UpdateRegion = FI->second;
2683 DAG.Regions[UpdateRegion].first = VGPRCopy;
2684 FirstMIToRegion.
erase(UseInst);
2688 for (MachineOperand *User : RUDst.second) {
2689 User->setReg(NewUseReg);
2700 for (std::pair<Register, Register> NewDef : RedefMap) {
2705 for (MachineOperand *ReplaceOp :
ReplaceMap[OldReg])
2706 ReplaceOp->setReg(NewReg);
2710 for (
Register RewriteReg : RewriteRegs) {
2711 Register RegToRewrite = RewriteReg;
2715 if (RI != RedefMap.end())
2716 RegToRewrite = RI->second;
2718 const TargetRegisterClass *CurrRC =
DAG.MRI.getRegClass(RegToRewrite);
2719 const TargetRegisterClass *AGPRRC = SRI->getEquivalentAGPRClass(CurrRC);
2721 DAG.MRI.setRegClass(RegToRewrite, AGPRRC);
2725 DAG.LIS->reanalyze(
DAG.MF);
2727 RegionPressureMap LiveInUpdater(&
DAG,
false);
2728 LiveInUpdater.buildLiveRegMap();
2731 DAG.LiveIns[Region] = LiveInUpdater.getLiveRegsForRegionIdx(Region);
2738unsigned PreRARematStage::getStageTargetOccupancy()
const {
2739 return TargetOcc ? *TargetOcc :
MFI.getMinWavesPerEU();
2742bool PreRARematStage::setObjective() {
2746 unsigned MaxSGPRs =
ST.getMaxNumSGPRs(
F);
2747 unsigned MaxVGPRs =
ST.getMaxNumVGPRs(
F);
2748 bool HasVectorRegisterExcess =
false;
2749 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
2750 const GCNRegPressure &
RP =
DAG.Pressure[
I];
2751 GCNRPTarget &
Target = RPTargets.emplace_back(MaxSGPRs, MaxVGPRs,
MF, RP);
2753 TargetRegions.set(
I);
2754 HasVectorRegisterExcess |=
Target.hasVectorRegisterExcess();
2757 if (HasVectorRegisterExcess ||
DAG.MinOccupancy >=
MFI.getMaxWavesPerEU()) {
2760 TargetOcc = std::nullopt;
2764 TargetOcc =
DAG.MinOccupancy + 1;
2765 const unsigned VGPRBlockSize =
MFI.getDynamicVGPRBlockSize();
2766 MaxSGPRs =
ST.getMaxNumSGPRs(*TargetOcc,
false);
2767 MaxVGPRs =
ST.getMaxNumVGPRs(*TargetOcc, VGPRBlockSize);
2768 for (
auto [
I, Target] :
enumerate(RPTargets)) {
2769 Target.setTarget(MaxSGPRs, MaxVGPRs);
2771 TargetRegions.set(
I);
2775 return TargetRegions.any();
2778bool PreRARematStage::collectRematRegs(
2779 const DenseMap<MachineInstr *, unsigned> &MIRegion) {
2782 DAG.RegionLiveOuts.buildLiveRegMap();
2786 SmallSet<Register, 4> MarkedRegs;
2787 auto IsMarkedForRemat = [&MarkedRegs](
const MachineOperand &MO) ->
bool {
2788 return MO.isReg() && MarkedRegs.
contains(MO.getReg());
2792 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
2794 for (
auto MI = Bounds.first;
MI != Bounds.second; ++
MI) {
2797 if (!isReMaterializable(
DefMI))
2810 MachineInstr *
UseMI =
DAG.MRI.getOneNonDBGUser(
Reg);
2814 if (UseRegion == MIRegion.
end() || UseRegion->second ==
I)
2825 if (IsMarkedForRemat(UseMO) ||
2832 SlotIndex UseIdx =
DAG.LIS->getInstructionIndex(*UseMI).getRegSlot(
true);
2843 return !RematRegs.empty();
2846PreRARematStage::RematReg::RematReg(
2847 MachineInstr *
DefMI, MachineInstr *
UseMI, GCNScheduleDAGMILive &
DAG,
2848 const DenseMap<MachineInstr *, unsigned> &MIRegion)
2851 DefRegion(MIRegion.at(
DefMI)), UseRegion(MIRegion.at(
UseMI)) {
2855 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
2856 auto LiveInIt =
DAG.LiveIns[
I].find(
Reg);
2857 if (LiveInIt !=
DAG.LiveIns[
I].end())
2859 const auto &LiveOuts =
DAG.RegionLiveOuts.getLiveRegsForRegionIdx(
I);
2860 if (
auto LiveOutIt = LiveOuts.find(
Reg); LiveOutIt != LiveOuts.end())
2865 Mask =
DAG.RegionLiveOuts.getLiveRegsForRegionIdx(DefRegion).at(
Reg);
2868bool PreRARematStage::RematReg::maybeBeneficial(
2871 for (
unsigned I : TargetRegions.
set_bits()) {
2872 if (Live[
I] && RPTargets[
I].isSaveBeneficial(
Reg))
2878void PreRARematStage::RematReg::insertMI(
unsigned RegionIdx,
2879 MachineInstr *RematMI,
2880 GCNScheduleDAGMILive &
DAG)
const {
2883 Bounds.first = RematMI;
2884 DAG.LIS->InsertMachineInstrInMaps(*RematMI);
2890 assert(
DAG.MLI &&
"MLI not defined in DAG");
2894 const unsigned NumRegions =
DAG.Regions.size();
2898 for (
unsigned I = 0;
I < NumRegions; ++
I) {
2902 if (BlockFreq && BlockFreq <
MinFreq)
2911 if (
MinFreq >= ScaleFactor * ScaleFactor) {
2913 Freq /= ScaleFactor;
2919PreRARematStage::ScoredRemat::ScoredRemat(RematReg *Remat,
const FreqInfo &Freq,
2921 : Remat(Remat), FreqDiff(getFreqDiff(Freq)) {
2925int64_t PreRARematStage::ScoredRemat::getFreqDiff(
const FreqInfo &Freq)
const {
2933 int64_t DefOrMin = std::max(Freq.Regions[Remat->DefRegion], Freq.MinFreq);
2934 int64_t UseOrMax = Freq.Regions[Remat->UseRegion];
2936 UseOrMax = Freq.MaxFreq;
2937 return DefOrMin - UseOrMax;
2940void PreRARematStage::ScoredRemat::update(
const BitVector &TargetRegions,
2942 const FreqInfo &FreqInfo,
2946 for (
unsigned I : TargetRegions.
set_bits()) {
2947 if (!Remat->Live[
I])
2955 if (!NumRegsBenefit)
2958 bool UnusedLT = Remat->isUnusedLiveThrough(
I);
2961 RegionImpact += (UnusedLT ? 2 : 1) * NumRegsBenefit;
2970 MaxFreq = std::max(MaxFreq, Freq);
2975MachineInstr *PreRARematStage::ScoredRemat::rematerialize(
2976 BitVector &RecomputeRP, SmallVectorImpl<GCNRPTarget> &RPTargets,
2977 GCNScheduleDAGMILive &
DAG)
const {
2979 MachineInstr &
DefMI = *Remat->DefMI;
2985 TII->reMaterialize(*InsertPos->getParent(), InsertPos, NewReg, 0,
DefMI);
2986 MachineInstr *RematMI = &*std::prev(InsertPos);
2987 Remat->UseMI->substituteRegister(
Reg, NewReg, 0, *
DAG.
TRI);
2988 Remat->insertMI(Remat->UseRegion, RematMI,
DAG);
2990#ifdef EXPENSIVE_CHECKS
2993 for (MachineOperand &MO :
DefMI.operands()) {
2994 if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
3006 LaneBitmask LiveInMask =
DAG.LiveIns[Remat->UseRegion].at(
UseReg);
3007 LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);
3011 if (UncoveredLanes.
any()) {
3013 for (LiveInterval::SubRange &SR : LI.
subranges())
3014 assert((SR.LaneMask & UncoveredLanes).none());
3023 for (
unsigned I : Remat->Live.set_bits()) {
3024 RPTargets[
I].saveRP(RPSave);
3027 if (!Remat->isUnusedLiveThrough(
I))
3034void PreRARematStage::commitRematerializations()
const {
3036 for (
const RollbackInfo &Rollback : Rollbacks)
3037 DAG.deleteMI(Rollback.Remat->DefRegion, Rollback.Remat->DefMI);
3040void PreRARematStage::unsetSatisfiedRPTargets(
const BitVector &Regions) {
3042 if (TargetRegions[
I] && RPTargets[
I].satisfied()) {
3049bool PreRARematStage::updateAndVerifyRPTargets(
const BitVector &Regions) {
3050 bool TooOptimistic =
false;
3052 GCNRPTarget &
Target = RPTargets[
I];
3058 if (!TargetRegions[
I] && !
Target.satisfied()) {
3060 TooOptimistic =
true;
3061 TargetRegions.
set(
I);
3064 return TooOptimistic;
3068bool PreRARematStage::isReMaterializable(
const MachineInstr &
MI) {
3072 for (
const MachineOperand &MO :
MI.all_uses()) {
3075 if (MO.getReg().isPhysical()) {
3094 if (
DAG.MinOccupancy >= *TargetOcc) {
3095 commitRematerializations();
3102 const bool ShouldRollbackRemats = AchievedOcc < *TargetOcc;
3107 if (ShouldRollbackRemats) {
3108 for (
const RollbackInfo &Rollback : Rollbacks) {
3109 const auto &[Remat, RematMI, RegMap] = Rollback;
3110 Remat->DefMI->setDesc(
DAG.
TII->
get(RematMI->getOpcode()));
3111 for (
const auto &[MOIdx,
Reg] : RegMap)
3112 Remat->DefMI->getOperand(MOIdx).setReg(
Reg);
3117 for (
const auto &[
RegionIdx, OrigMIOrder, MaxPressure] : RegionReverts) {
3124 if (!ShouldRollbackRemats) {
3125 commitRematerializations();
3126 DAG.setTargetOccupancy(AchievedOcc);
3131 DAG.setTargetOccupancy(*TargetOcc - 1);
3136 BitVector RecomputeRP(
DAG.Regions.
size());
3137 DenseSet<Register> RecomputeLI;
3138 for (
const RollbackInfo &Rollback : Rollbacks) {
3139 const auto &[Remat, RematMI, RegMap] = Rollback;
3144 Register OriginalReg = Remat->DefMI->getOperand(0).getReg();
3145 Remat->UseMI->substituteRegister(
Reg, OriginalReg, 0, *
DAG.
TRI);
3147 <<
"] Deleting rematerialization " << *RematMI);
3148 DAG.deleteMI(Remat->UseRegion, RematMI);
3152 std::pair<Register, LaneBitmask> LiveReg(OriginalReg, Remat->Mask);
3153 for (
unsigned I : Remat->LiveIn.set_bits())
3154 DAG.LiveIns[
I].insert(LiveReg);
3155 for (
unsigned I : Remat->LiveOut.set_bits())
3158 RecomputeRP |= Rollback.Remat->Live;
3161 for (MachineOperand &MO : Rollback.Remat->DefMI->operands()) {
3162 if (MO.isReg() && MO.getReg().isVirtual())
3163 RecomputeLI.
insert(MO.getReg());
3170#ifdef EXPENSIVE_CHECKS
3175 for (
unsigned I : RecomputeRP.
set_bits())
3176 DAG.Pressure[
I] =
DAG.getRealRegPressure(
I);
3181void GCNScheduleDAGMILive::deleteMI(
unsigned RegionIdx, MachineInstr *
MI) {
3188 MI->eraseFromParent();
3191void GCNScheduleDAGMILive::setTargetOccupancy(
unsigned TargetOccupancy) {
3192 MinOccupancy = TargetOccupancy;
3213 if (HasIGLPInstrs) {
3214 SavedMutations.clear();
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
static cl::opt< bool > GCNTrackers("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false))
static cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))
#define REMAT_PREFIX
Allows to easily filter for this stage's debug output.
static cl::opt< bool > DisableRewriteMFMAFormSchedStage("amdgpu-disable-rewrite-mfma-form-sched-stage", cl::Hidden, cl::desc("Disable rewrite mfma rewrite scheduling stage"), cl::init(false))
static MachineInstr * getLastMIForRegion(MachineBasicBlock::iterator RegionBegin, MachineBasicBlock::iterator RegionEnd)
static bool shouldCheckPending(SchedBoundary &Zone, const TargetSchedModel *SchedModel)
static cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))
static cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))
static void printScheduleModel(std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)
static cl::opt< bool > PrintMaxRPRegUsageAfterScheduler("amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure after scheduling."), cl::init(false))
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG)
static bool canUsePressureDiffs(const SUnit &SU)
Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.
static cl::opt< unsigned > PendingQueueLimit("amdgpu-scheduler-pending-queue-limit", cl::Hidden, cl::desc("Max (Available+Pending) size to inspect pending queue (0 disables)"), cl::init(256))
static void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)
static cl::opt< bool > PrintMaxRPRegUsageBeforeScheduler("amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure before scheduling."), cl::init(false))
static SUnit * pickOnlyChoice(SchedBoundary &Zone, const TargetSchedModel *SchedModel)
static cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
static constexpr std::pair< StringLiteral, StringLiteral > ReplaceMap[]
A common definition of LaneBitmask for use in TableGen and CodeGen.
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static constexpr unsigned SM(unsigned Version)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
iterator_range< const_set_bits_iterator > set_bits() const
size_type size() const
size - Returns the number of bits in this bitvector.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
bool initGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
GCNRegPressure bumpDownwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI) const
Mostly copy/paste from CodeGen/RegisterPressure.cpp Calculate the impact MI will have on CurPressure ...
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
Models a register pressure target, allowing to evaluate and track register savings against that targe...
unsigned getNumRegsBenefit(const GCNRegPressure &SaveRP) const
Returns the benefit towards achieving the RP target that saving SaveRP represents,...
GCNRegPressure getPressure() const
DenseMap< unsigned, LaneBitmask > LiveRegSet
virtual bool initGCNRegion()
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
virtual void finalizeGCNRegion()
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
void modifyRegionSchedule(unsigned RegionIdx, MachineBasicBlock *MBB, ArrayRef< MachineInstr * > MIOrder)
Sets the schedule of region RegionIdx in block MBB to MIOrder.
MachineBasicBlock * CurrentMBB
This is a minimal scheduler strategy.
GCNDownwardRPTracker DownwardTracker
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
unsigned SGPRCriticalLimit
std::vector< unsigned > MaxPressure
bool hasNextStage() const
SUnit * pickNodeBidirectional(bool &IsTopNode, bool &PickedPending)
GCNSchedStageID getCurrentStage()
bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Evaluates instructions in the pending queue using a subset of scheduling heuristics.
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
unsigned VGPRCriticalLimit
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
GCNDownwardRPTracker * getDownwardTracker()
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
GCNUpwardRPTracker UpwardTracker
void printCandidateDecision(const SchedCandidate &Current, const SchedCandidate &Preferred)
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool &IsPending, bool IsBottomUp)
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
GCNUpwardRPTracker * getUpwardTracker()
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
void recede(const MachineInstr &MI)
Move to the state of RP just before the MI .
void traceCandidate(const SchedCandidate &Cand)
LLVM_ABI void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
MachineSchedPolicy RegionPolicy
const TargetSchedModel * SchedModel
const MachineSchedContext * Context
const TargetRegisterInfo * TRI
SchedCandidate BotCand
Candidate last picked from Bot boundary.
SchedCandidate TopCand
Candidate last picked from Top boundary.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
GenericScheduler(const MachineSchedContext *C)
bool shouldRevertScheduling(unsigned WavesAfter) override
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
void removeInterval(Register Reg)
Interval removal.
LLVM_ABI void dump() const
MachineBasicBlock * getMBBFromIndex(SlotIndex index) const
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
succ_iterator succ_begin()
unsigned succ_size() const
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
LLVM_ABI BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
LLVM_ABI BlockFrequency getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, raw_ostream *OS=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI LaneBitmask getMaxLaneMaskForVReg(Register Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
LLVM_ABI bool isConstantPhysReg(MCRegister PhysReg) const
Returns true if PhysReg is unallocatable and constant throughout the function.
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
void finalizeGCNRegion() override
bool initGCNRegion() override
bool initGCNSchedStage() override
Capture a change in pressure for a single pressure set.
Simple wrapper around std::function<void(raw_ostream&)>.
Helpers for implementing custom MachineSchedStrategy classes.
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void advance()
Advance across the current instruction.
LLVM_ABI void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
LLVM_ABI void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
RegionT * getParent() const
Get the parent of the Region.
GCNRPTracker::LiveRegSet & getLiveRegsForRegionIdx(unsigned RegionIdx)
List of registers defined and used by a machine instruction.
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
LLVM_ABI void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
LLVM_ABI void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
bool isIGLPMutationOnly(unsigned Opcode) const
static bool isMAI(const MCInstrDesc &Desc)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getOccupancy() const
void increaseOccupancy(const MachineFunction &MF, unsigned Limit)
unsigned getDynamicVGPRBlockSize() const
unsigned getMinAllowedOccupancy() const
void limitOccupancy(const MachineFunction &MF)
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
unsigned NodeNum
Entry # of node in the node vector.
unsigned short Latency
Node latency.
bool isScheduled
True once scheduled.
unsigned ParentClusterIdx
The parent cluster id.
unsigned BotReadyCycle
Cycle relative to end when node is ready.
bool isBottomReady() const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
LLVM_ABI void releasePending()
Release pending ready nodes in to the available queue.
LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
LLVM_ABI SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
LLVM_ABI void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LLVM_ABI bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
A ScheduleDAG for scheduling lists of MachineInstr.
bool ScheduleSingleMIRegions
True if regions with a single MI should be scheduled.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
const MachineLoopInfo * MLI
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
RegPressureTracker RPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
const TargetRegisterInfo * TRI
Target processor register info.
MachineFunction & MF
Machine function.
static const unsigned ScaleFactor
unsigned getMetric() const
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
SlotIndex - An opaque wrapper around machine indexes.
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
static bool isEarlierInstr(SlotIndex A, SlotIndex B)
isEarlierInstr - Return true if A refers to an instruction earlier than B.
SlotIndex getPrevSlot() const
Returns the previous slot in the index list.
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
bool isReMaterializable(const MachineInstr &MI) const
Return true if the instruction would be materializable at a point in the containing function where al...
virtual bool isIgnorableUse(const MachineOperand &MO) const
Given MO is a PhysReg use return if it can be ignored for the purpose of instruction rematerializatio...
uint8_t getCopyCost() const
Return the cost of copying a value between two registers in this class.
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx.
Provide an instruction scheduling machine model to CodeGen passes.
LLVM_ABI bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
bool initGCNSchedStage() override
bool initGCNRegion() override
void finalizeGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
VNInfo - Value Number Information.
SlotIndex def
The index of the defining instruction.
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
static bool allUsesAvailableAt(const MachineInstr *MI, SlotIndex UseIdx, const LiveIntervals &LIS, const MachineRegisterInfo &MRI, const TargetInstrInfo &TII)
std::pair< iterator, bool > insert(const ValueT &V)
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
LLVM_READONLY int32_t getMFMASrcCVDstAGPROp(uint32_t Opcode)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)
Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > RegionBoundaries
A region's boundaries i.e.
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
@ UnclusteredHighRPReschedule
@ MemoryClauseInitialSchedule
@ ClusteredLowOccupancyReschedule
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
LLVM_ABI cl::opt< bool > VerifyScheduling
LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool isTheSameCluster(unsigned A, unsigned B)
Return whether the input cluster ID's are the same and valid.
DWARFExpression::Operation Op
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
LLVM_ABI void dumpMaxRegPressure(MachineFunction &MF, GCNRegPressure::RegKind Kind, LiveIntervals &LIS, const MachineLoopInfo *MLI)
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Implement std::hash so that hash_code can be used in STL containers.
bool operator()(std::pair< MachineInstr *, unsigned > A, std::pair< MachineInstr *, unsigned > B) const
unsigned getArchVGPRNum() const
unsigned getAGPRNum() const
unsigned getSGPRNum() const
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void setBest(SchedCandidate &Best)
void reset(const CandPolicy &NewPolicy)
LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SchedResourceDelta ResDelta
Status of an instruction's critical resource consumption.
unsigned DemandedResources
constexpr bool any() const
static constexpr LaneBitmask getNone()
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Execution frequency information required by scoring heuristics.
SmallVector< uint64_t > Regions
Per-region execution frequencies. 0 when unknown.
uint64_t MinFreq
Minimum and maximum observed frequencies.
FreqInfo(MachineFunction &MF, const GCNScheduleDAGMILive &DAG)
PressureChange CriticalMax
PressureChange CurrentMax