LLVM 19.0.0git
GCNSchedStrategy.cpp
Go to the documentation of this file.
1//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This contains a MachineSchedStrategy implementation for maximizing wave
11/// occupancy on GCN hardware.
12///
13/// This pass will apply multiple scheduling stages to the same function.
14/// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual
15/// entry point for the scheduling of those regions is
16/// GCNScheduleDAGMILive::runSchedStages.
17
18/// Generally, the reason for having multiple scheduling stages is to account
19/// for the kernel-wide effect of register usage on occupancy. Usually, only a
20/// few scheduling regions will have register pressure high enough to limit
21/// occupancy for the kernel, so constraints can be relaxed to improve ILP in
22/// other regions.
23///
24//===----------------------------------------------------------------------===//
25
26#include "GCNSchedStrategy.h"
27#include "AMDGPUIGroupLP.h"
30
31#define DEBUG_TYPE "machine-scheduler"
32
33using namespace llvm;
34
36 "amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden,
37 cl::desc("Disable unclustered high register pressure "
38 "reduction scheduling stage."),
39 cl::init(false));
40
42 "amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden,
43 cl::desc("Disable clustered low occupancy "
44 "rescheduling for ILP scheduling stage."),
45 cl::init(false));
46
48 "amdgpu-schedule-metric-bias", cl::Hidden,
50 "Sets the bias which adds weight to occupancy vs latency. Set it to "
51 "100 to chase the occupancy only."),
52 cl::init(10));
53
54static cl::opt<bool>
55 RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden,
56 cl::desc("Relax occupancy targets for kernels which are memory "
57 "bound (amdgpu-membound-threshold), or "
58 "Wave Limited (amdgpu-limit-wave-threshold)."),
59 cl::init(false));
60
61const unsigned ScheduleMetrics::ScaleFactor = 100;
62
64 : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
65 HasHighPressure(false) {}
66
69
70 MF = &DAG->MF;
71
73
75 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
77 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
78
80 // Set the initial TargetOccupnacy to the maximum occupancy that we can
81 // achieve for this function. This effectively sets a lower bound on the
82 // 'Critical' register limits in the scheduler.
83 // Allow for lower occupancy targets if kernel is wave limited or memory
84 // bound, and using the relaxed occupancy feature.
88 std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit);
89
90 if (!KnownExcessRP) {
92 std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit);
93 } else {
94 // This is similar to ST.getMaxNumVGPRs(TargetOccupancy) result except
95 // returns a reasonably small number for targets with lots of VGPRs, such
96 // as GFX10 and GFX11.
97 LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative "
98 "VGPRCriticalLimit calculation method.\n");
99
100 unsigned Granule = AMDGPU::IsaInfo::getVGPRAllocGranule(&ST);
101 unsigned Addressable = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST);
102 unsigned VGPRBudget = alignDown(Addressable / TargetOccupancy, Granule);
103 VGPRBudget = std::max(VGPRBudget, Granule);
104 VGPRCriticalLimit = std::min(VGPRBudget, VGPRExcessLimit);
105 }
106
107 // Subtract error margin and bias from register limits and avoid overflow.
112
113 LLVM_DEBUG(dbgs() << "VGPRCriticalLimit = " << VGPRCriticalLimit
114 << ", VGPRExcessLimit = " << VGPRExcessLimit
115 << ", SGPRCriticalLimit = " << SGPRCriticalLimit
116 << ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n");
117}
118
120 bool AtTop,
121 const RegPressureTracker &RPTracker,
122 const SIRegisterInfo *SRI,
123 unsigned SGPRPressure,
124 unsigned VGPRPressure) {
125 Cand.SU = SU;
126 Cand.AtTop = AtTop;
127
128 if (!DAG->isTrackingPressure())
129 return;
130
131 // getDownwardPressure() and getUpwardPressure() make temporary changes to
132 // the tracker, so we need to pass those function a non-const copy.
133 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
134
135 Pressure.clear();
136 MaxPressure.clear();
137
138 if (AtTop)
140 else {
141 // FIXME: I think for bottom up scheduling, the register pressure is cached
142 // and can be retrieved by DAG->getPressureDif(SU).
143 TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
144 }
145
146 unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
147 unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
148
149 // If two instructions increase the pressure of different register sets
150 // by the same amount, the generic scheduler will prefer to schedule the
151 // instruction that increases the set with the least amount of registers,
152 // which in our case would be SGPRs. This is rarely what we want, so
153 // when we report excess/critical register pressure, we do it either
154 // only for VGPRs or only for SGPRs.
155
156 // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
157 const unsigned MaxVGPRPressureInc = 16;
158 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
159 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
160
161
162 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
163 // to increase the likelihood we don't go over the limits. We should improve
164 // the analysis to look through dependencies to find the path with the least
165 // register pressure.
166
167 // We only need to update the RPDelta for instructions that increase register
168 // pressure. Instructions that decrease or keep reg pressure the same will be
169 // marked as RegExcess in tryCandidate() when they are compared with
170 // instructions that increase the register pressure.
171 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
172 HasHighPressure = true;
173 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
174 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
175 }
176
177 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
178 HasHighPressure = true;
179 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
180 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
181 }
182
183 // Register pressure is considered 'CRITICAL' if it is approaching a value
184 // that would reduce the wave occupancy for the execution unit. When
185 // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both
186 // has the same cost, so we don't need to prefer one over the other.
187
188 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
189 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
190
191 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
192 HasHighPressure = true;
193 if (SGPRDelta > VGPRDelta) {
194 Cand.RPDelta.CriticalMax =
195 PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
196 Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
197 } else {
198 Cand.RPDelta.CriticalMax =
199 PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
200 Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
201 }
202 }
203}
204
205// This function is mostly cut and pasted from
206// GenericScheduler::pickNodeFromQueue()
208 const CandPolicy &ZonePolicy,
209 const RegPressureTracker &RPTracker,
210 SchedCandidate &Cand) {
211 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
213 unsigned SGPRPressure = 0;
214 unsigned VGPRPressure = 0;
215 if (DAG->isTrackingPressure()) {
216 SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
217 VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
218 }
219 ReadyQueue &Q = Zone.Available;
220 for (SUnit *SU : Q) {
221
222 SchedCandidate TryCand(ZonePolicy);
223 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
224 SGPRPressure, VGPRPressure);
225 // Pass SchedBoundary only when comparing nodes from the same boundary.
226 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
227 tryCandidate(Cand, TryCand, ZoneArg);
228 if (TryCand.Reason != NoCand) {
229 // Initialize resource delta if needed in case future heuristics query it.
230 if (TryCand.ResDelta == SchedResourceDelta())
231 TryCand.initResourceDelta(Zone.DAG, SchedModel);
232 Cand.setBest(TryCand);
234 }
235 }
236}
237
238// This function is mostly cut and pasted from
239// GenericScheduler::pickNodeBidirectional()
241 // Schedule as far as possible in the direction of no choice. This is most
242 // efficient, but also provides the best heuristics for CriticalPSets.
243 if (SUnit *SU = Bot.pickOnlyChoice()) {
244 IsTopNode = false;
245 return SU;
246 }
247 if (SUnit *SU = Top.pickOnlyChoice()) {
248 IsTopNode = true;
249 return SU;
250 }
251 // Set the bottom-up policy based on the state of the current bottom zone and
252 // the instructions outside the zone, including the top zone.
253 CandPolicy BotPolicy;
254 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
255 // Set the top-down policy based on the state of the current top zone and
256 // the instructions outside the zone, including the bottom zone.
257 CandPolicy TopPolicy;
258 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
259
260 // See if BotCand is still valid (because we previously scheduled from Top).
261 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
262 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
263 BotCand.Policy != BotPolicy) {
266 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
267 } else {
269#ifndef NDEBUG
270 if (VerifyScheduling) {
271 SchedCandidate TCand;
272 TCand.reset(CandPolicy());
273 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
274 assert(TCand.SU == BotCand.SU &&
275 "Last pick result should correspond to re-picking right now");
276 }
277#endif
278 }
279
280 // Check if the top Q has a better candidate.
281 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
282 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
283 TopCand.Policy != TopPolicy) {
286 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
287 } else {
289#ifndef NDEBUG
290 if (VerifyScheduling) {
291 SchedCandidate TCand;
292 TCand.reset(CandPolicy());
293 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
294 assert(TCand.SU == TopCand.SU &&
295 "Last pick result should correspond to re-picking right now");
296 }
297#endif
298 }
299
300 // Pick best from BotCand and TopCand.
301 LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
302 dbgs() << "Bot Cand: "; traceCandidate(BotCand););
303 SchedCandidate Cand = BotCand;
305 tryCandidate(Cand, TopCand, nullptr);
306 if (TopCand.Reason != NoCand) {
307 Cand.setBest(TopCand);
308 }
309 LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
310
311 IsTopNode = Cand.AtTop;
312 return Cand.SU;
313}
314
315// This function is mostly cut and pasted from
316// GenericScheduler::pickNode()
318 if (DAG->top() == DAG->bottom()) {
320 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
321 return nullptr;
322 }
323 SUnit *SU;
324 do {
326 SU = Top.pickOnlyChoice();
327 if (!SU) {
328 CandPolicy NoPolicy;
329 TopCand.reset(NoPolicy);
331 assert(TopCand.Reason != NoCand && "failed to find a candidate");
332 SU = TopCand.SU;
333 }
334 IsTopNode = true;
335 } else if (RegionPolicy.OnlyBottomUp) {
336 SU = Bot.pickOnlyChoice();
337 if (!SU) {
338 CandPolicy NoPolicy;
339 BotCand.reset(NoPolicy);
341 assert(BotCand.Reason != NoCand && "failed to find a candidate");
342 SU = BotCand.SU;
343 }
344 IsTopNode = false;
345 } else {
346 SU = pickNodeBidirectional(IsTopNode);
347 }
348 } while (SU->isScheduled);
349
350 if (SU->isTopReady())
351 Top.removeReady(SU);
352 if (SU->isBottomReady())
353 Bot.removeReady(SU);
354
355 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
356 << *SU->getInstr());
357 return SU;
358}
359
362 return *CurrentStage;
363}
364
367 if (!CurrentStage)
369 else
370 CurrentStage++;
371
372 return CurrentStage != SchedStages.end();
373}
374
377 return std::next(CurrentStage) != SchedStages.end();
378}
379
381 assert(CurrentStage && std::next(CurrentStage) != SchedStages.end());
382 return *std::next(CurrentStage);
383}
384
386 const MachineSchedContext *C)
392}
393
397}
398
400 SchedCandidate &TryCand,
401 SchedBoundary *Zone) const {
402 // Initialize the candidate if needed.
403 if (!Cand.isValid()) {
404 TryCand.Reason = NodeOrder;
405 return true;
406 }
407
408 // Avoid spilling by exceeding the register limit.
409 if (DAG->isTrackingPressure() &&
410 tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
411 RegExcess, TRI, DAG->MF))
412 return TryCand.Reason != NoCand;
413
414 // Bias PhysReg Defs and copies to their uses and defined respectively.
415 if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
416 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
417 return TryCand.Reason != NoCand;
418
419 bool SameBoundary = Zone != nullptr;
420 if (SameBoundary) {
421 // Prioritize instructions that read unbuffered resources by stall cycles.
422 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
423 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
424 return TryCand.Reason != NoCand;
425
426 // Avoid critical resource consumption and balance the schedule.
429 TryCand, Cand, ResourceReduce))
430 return TryCand.Reason != NoCand;
432 Cand.ResDelta.DemandedResources, TryCand, Cand,
434 return TryCand.Reason != NoCand;
435
436 // Unconditionally try to reduce latency.
437 if (tryLatency(TryCand, Cand, *Zone))
438 return TryCand.Reason != NoCand;
439
440 // Weak edges are for clustering and other constraints.
441 if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
442 getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))
443 return TryCand.Reason != NoCand;
444 }
445
446 // Keep clustered nodes together to encourage downstream peephole
447 // optimizations which may reduce resource requirements.
448 //
449 // This is a best effort to set things up for a post-RA pass. Optimizations
450 // like generating loads of multiple registers should ideally be done within
451 // the scheduler pass by combining the loads during DAG postprocessing.
452 const SUnit *CandNextClusterSU =
454 const SUnit *TryCandNextClusterSU =
456 if (tryGreater(TryCand.SU == TryCandNextClusterSU,
457 Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
458 return TryCand.Reason != NoCand;
459
460 // Avoid increasing the max critical pressure in the scheduled region.
461 if (DAG->isTrackingPressure() &&
463 TryCand, Cand, RegCritical, TRI, DAG->MF))
464 return TryCand.Reason != NoCand;
465
466 // Avoid increasing the max pressure of the entire region.
467 if (DAG->isTrackingPressure() &&
468 tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,
469 Cand, RegMax, TRI, DAG->MF))
470 return TryCand.Reason != NoCand;
471
472 if (SameBoundary) {
473 // Fall through to original instruction order.
474 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||
475 (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
476 TryCand.Reason = NodeOrder;
477 return true;
478 }
479 }
480 return false;
481}
482
484 MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)
485 : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),
486 MFI(*MF.getInfo<SIMachineFunctionInfo>()),
487 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) {
488
489 LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
490 if (RelaxedOcc) {
491 MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy);
492 if (MinOccupancy != StartingOccupancy)
493 LLVM_DEBUG(dbgs() << "Allowing Occupancy drops to " << MinOccupancy
494 << ".\n");
495 }
496}
497
498std::unique_ptr<GCNSchedStage>
499GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {
500 switch (SchedStageID) {
502 return std::make_unique<OccInitialScheduleStage>(SchedStageID, *this);
504 return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *this);
506 return std::make_unique<ClusteredLowOccStage>(SchedStageID, *this);
508 return std::make_unique<PreRARematStage>(SchedStageID, *this);
510 return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *this);
511 }
512
513 llvm_unreachable("Unknown SchedStageID.");
514}
515
517 // Collect all scheduling regions. The actual scheduling is performed in
518 // GCNScheduleDAGMILive::finalizeSchedule.
519 Regions.push_back(std::pair(RegionBegin, RegionEnd));
520}
521
523GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
525 RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]);
526 return RPTracker.moveMaxPressure();
527}
528
529void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
530 const MachineBasicBlock *MBB) {
532
533 // If the block has the only successor then live-ins of that successor are
534 // live-outs of the current block. We can reuse calculated live set if the
535 // successor will be sent to scheduling past current block.
536
537 // However, due to the bug in LiveInterval analysis it may happen that two
538 // predecessors of the same successor block have different lane bitmasks for
539 // a live-out register. Workaround that by sticking to one-to-one relationship
540 // i.e. one predecessor with one successor block.
541 const MachineBasicBlock *OnlySucc = nullptr;
542 if (MBB->succ_size() == 1) {
543 auto *Candidate = *MBB->succ_begin();
544 if (!Candidate->empty() && Candidate->pred_size() == 1) {
546 if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(Candidate))
547 OnlySucc = Candidate;
548 }
549 }
550
551 // Scheduler sends regions from the end of the block upwards.
552 size_t CurRegion = RegionIdx;
553 for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)
554 if (Regions[CurRegion].first->getParent() != MBB)
555 break;
556 --CurRegion;
557
558 auto I = MBB->begin();
559 auto LiveInIt = MBBLiveIns.find(MBB);
560 auto &Rgn = Regions[CurRegion];
561 auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
562 if (LiveInIt != MBBLiveIns.end()) {
563 auto LiveIn = std::move(LiveInIt->second);
564 RPTracker.reset(*MBB->begin(), &LiveIn);
565 MBBLiveIns.erase(LiveInIt);
566 } else {
567 I = Rgn.first;
568 auto LRS = BBLiveInMap.lookup(NonDbgMI);
569#ifdef EXPENSIVE_CHECKS
570 assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
571#endif
572 RPTracker.reset(*I, &LRS);
573 }
574
575 for (;;) {
576 I = RPTracker.getNext();
577
578 if (Regions[CurRegion].first == I || NonDbgMI == I) {
579 LiveIns[CurRegion] = RPTracker.getLiveRegs();
580 RPTracker.clearMaxPressure();
581 }
582
583 if (Regions[CurRegion].second == I) {
584 Pressure[CurRegion] = RPTracker.moveMaxPressure();
585 if (CurRegion-- == RegionIdx)
586 break;
587 }
588 RPTracker.advanceToNext();
589 RPTracker.advanceBeforeNext();
590 }
591
592 if (OnlySucc) {
593 if (I != MBB->end()) {
594 RPTracker.advanceToNext();
596 }
597 RPTracker.advanceBeforeNext();
598 MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
599 }
600}
601
603GCNScheduleDAGMILive::getBBLiveInMap() const {
604 assert(!Regions.empty());
605 std::vector<MachineInstr *> BBStarters;
606 BBStarters.reserve(Regions.size());
607 auto I = Regions.rbegin(), E = Regions.rend();
608 auto *BB = I->first->getParent();
609 do {
610 auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
611 BBStarters.push_back(MI);
612 do {
613 ++I;
614 } while (I != E && I->first->getParent() == BB);
615 } while (I != E);
616 return getLiveRegMap(BBStarters, false /*After*/, *LIS);
617}
618
620 // Start actual scheduling here. This function is called by the base
621 // MachineScheduler after all regions have been recorded by
622 // GCNScheduleDAGMILive::schedule().
623 LiveIns.resize(Regions.size());
624 Pressure.resize(Regions.size());
625 RescheduleRegions.resize(Regions.size());
626 RegionsWithHighRP.resize(Regions.size());
627 RegionsWithExcessRP.resize(Regions.size());
628 RegionsWithMinOcc.resize(Regions.size());
629 RegionsWithIGLPInstrs.resize(Regions.size());
630 RescheduleRegions.set();
631 RegionsWithHighRP.reset();
632 RegionsWithExcessRP.reset();
633 RegionsWithMinOcc.reset();
634 RegionsWithIGLPInstrs.reset();
635
636 runSchedStages();
637}
638
639void GCNScheduleDAGMILive::runSchedStages() {
640 LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
641
642 if (!Regions.empty())
643 BBLiveInMap = getBBLiveInMap();
644
645 GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
646 while (S.advanceStage()) {
647 auto Stage = createSchedStage(S.getCurrentStage());
648 if (!Stage->initGCNSchedStage())
649 continue;
650
651 for (auto Region : Regions) {
652 RegionBegin = Region.first;
653 RegionEnd = Region.second;
654 // Setup for scheduling the region and check whether it should be skipped.
655 if (!Stage->initGCNRegion()) {
656 Stage->advanceRegion();
657 exitRegion();
658 continue;
659 }
660
662 Stage->finalizeGCNRegion();
663 }
664
665 Stage->finalizeGCNSchedStage();
666 }
667}
668
669#ifndef NDEBUG
671 switch (StageID) {
673 OS << "Max Occupancy Initial Schedule";
674 break;
676 OS << "Unclustered High Register Pressure Reschedule";
677 break;
679 OS << "Clustered Low Occupancy Reschedule";
680 break;
682 OS << "Pre-RA Rematerialize";
683 break;
685 OS << "Max ILP Initial Schedule";
686 break;
687 }
688
689 return OS;
690}
691#endif
692
694 : DAG(DAG), S(static_cast<GCNSchedStrategy &>(*DAG.SchedImpl)), MF(DAG.MF),
695 MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {}
696
698 if (!DAG.LIS)
699 return false;
700
701 LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID << "\n");
702 return true;
703}
704
707 return false;
708
710 return false;
711
712 if (DAG.RegionsWithHighRP.none() && DAG.RegionsWithExcessRP.none())
713 return false;
714
718
719 InitialOccupancy = DAG.MinOccupancy;
720 // Aggressivly try to reduce register pressure in the unclustered high RP
721 // stage. Temporarily increase occupancy target in the region.
724 if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy)
725 MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
726
728 dbgs()
729 << "Retrying function scheduling without clustering. "
730 "Aggressivly try to reduce register pressure to achieve occupancy "
731 << DAG.MinOccupancy << ".\n");
732
733 return true;
734}
735
738 return false;
739
741 return false;
742
743 // Don't bother trying to improve ILP in lower RP regions if occupancy has not
744 // been dropped. All regions will have already been scheduled with the ideal
745 // occupancy targets.
746 if (DAG.StartingOccupancy <= DAG.MinOccupancy)
747 return false;
748
750 dbgs() << "Retrying function scheduling with lowest recorded occupancy "
751 << DAG.MinOccupancy << ".\n");
752 return true;
753}
754
757 return false;
758
759 if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1)
760 return false;
761
763 // Check maximum occupancy
765 DAG.MinOccupancy)
766 return false;
767
768 // FIXME: This pass will invalidate cached MBBLiveIns for regions
769 // inbetween the defs and region we sinked the def to. Cached pressure
770 // for regions where a def is sinked from will also be invalidated. Will
771 // need to be fixed if there is another pass after this pass.
773
774 collectRematerializableInstructions();
775 if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))
776 return false;
777
779 dbgs() << "Retrying function scheduling with improved occupancy of "
780 << DAG.MinOccupancy << " from rematerializing\n");
781 return true;
782}
783
786 LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n");
787}
788
792 if (DAG.MinOccupancy > InitialOccupancy) {
793 for (unsigned IDX = 0; IDX < DAG.Pressure.size(); ++IDX)
794 DAG.RegionsWithMinOcc[IDX] =
795 DAG.Pressure[IDX].getOccupancy(DAG.ST) == DAG.MinOccupancy;
796
798 << " stage successfully increased occupancy to "
799 << DAG.MinOccupancy << '\n');
800 }
801
803}
804
806 // Check whether this new region is also a new block.
807 if (DAG.RegionBegin->getParent() != CurrentMBB)
809
810 unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end());
811 DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs);
812
813 // Skip empty scheduling regions (0 or 1 schedulable instructions).
814 if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end()))
815 return false;
816
817 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
819 << " " << CurrentMBB->getName()
820 << "\n From: " << *DAG.begin() << " To: ";
822 else dbgs() << "End";
823 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
824
825 // Save original instruction order before scheduling for possible revert.
826 Unsched.clear();
827 Unsched.reserve(DAG.NumRegionInstrs);
830 for (auto &I : DAG) {
831 Unsched.push_back(&I);
832 if (I.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER ||
833 I.getOpcode() == AMDGPU::IGLP_OPT)
834 DAG.RegionsWithIGLPInstrs[RegionIdx] = true;
835 }
836 } else {
837 for (auto &I : DAG)
838 Unsched.push_back(&I);
839 }
840
841 PressureBefore = DAG.Pressure[RegionIdx];
842
844 dbgs() << "Pressure before scheduling:\nRegion live-ins:"
845 << print(DAG.LiveIns[RegionIdx], DAG.MRI)
846 << "Region live-in pressure: "
848 << "Region register pressure: " << print(PressureBefore));
849
850 S.HasHighPressure = false;
852
853 if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&
855 SavedMutations.clear();
857 bool IsInitialStage = StageID == GCNSchedStageID::OccInitialSchedule ||
860 IsInitialStage ? AMDGPU::SchedulingPhase::Initial
862 }
863
864 return true;
865}
866
868 // Only reschedule regions with the minimum occupancy or regions that may have
869 // spilling (excess register pressure).
870 if ((!DAG.RegionsWithMinOcc[RegionIdx] ||
871 DAG.MinOccupancy <= InitialOccupancy) &&
872 !DAG.RegionsWithExcessRP[RegionIdx])
873 return false;
874
876}
877
879 // We may need to reschedule this region if it wasn't rescheduled in the last
880 // stage, or if we found it was testing critical register pressure limits in
881 // the unclustered reschedule stage. The later is because we may not have been
882 // able to raise the min occupancy in the previous stage so the region may be
883 // overly constrained even if it was already rescheduled.
884 if (!DAG.RegionsWithHighRP[RegionIdx])
885 return false;
886
888}
889
891 if (!DAG.RescheduleRegions[RegionIdx])
892 return false;
893
895}
896
898 if (CurrentMBB)
900
901 CurrentMBB = DAG.RegionBegin->getParent();
903 // Get real RP for the region if it hasn't be calculated before. After the
904 // initial schedule stage real RP will be collected after scheduling.
907 DAG.computeBlockPressure(RegionIdx, CurrentMBB);
908}
909
911 DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
912 DAG.RescheduleRegions[RegionIdx] = false;
913 if (S.HasHighPressure)
914 DAG.RegionsWithHighRP[RegionIdx] = true;
915
916 // Revert scheduling if we have dropped occupancy or there is some other
917 // reason that the original schedule is better.
919
920 if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&
923
924 DAG.exitRegion();
925 RegionIdx++;
926}
927
929 // Check the results of scheduling.
930 PressureAfter = DAG.getRealRegPressure(RegionIdx);
931 LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));
932 LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
933
936 DAG.Pressure[RegionIdx] = PressureAfter;
937 DAG.RegionsWithMinOcc[RegionIdx] =
938 PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
939
940 // Early out if we have achieved the occupancy target.
941 LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
942 return;
943 }
944
945 unsigned TargetOccupancy =
947 unsigned WavesAfter =
948 std::min(TargetOccupancy, PressureAfter.getOccupancy(ST));
949 unsigned WavesBefore =
950 std::min(TargetOccupancy, PressureBefore.getOccupancy(ST));
951 LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
952 << ", after " << WavesAfter << ".\n");
953
954 // We may not be able to keep the current target occupancy because of the just
955 // scheduled region. We might still be able to revert scheduling if the
956 // occupancy before was higher, or if the current schedule has register
957 // pressure higher than the excess limits which could lead to more spilling.
958 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
959
960 // Allow memory bound functions to drop to 4 waves if not limited by an
961 // attribute.
962 if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy &&
963 WavesAfter >= MFI.getMinAllowedOccupancy()) {
964 LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
965 << MFI.getMinAllowedOccupancy() << " waves\n");
966 NewOccupancy = WavesAfter;
967 }
968
969 if (NewOccupancy < DAG.MinOccupancy) {
970 DAG.MinOccupancy = NewOccupancy;
971 MFI.limitOccupancy(DAG.MinOccupancy);
972 DAG.RegionsWithMinOcc.reset();
973 LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
974 << DAG.MinOccupancy << ".\n");
975 }
976 // The maximum number of arch VGPR on non-unified register file, or the
977 // maximum VGPR + AGPR in the unified register file case.
978 unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
979 // The maximum number of arch VGPR for both unified and non-unified register
980 // file.
981 unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
982 unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
983
984 if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) > MaxVGPRs ||
985 PressureAfter.getVGPRNum(false) > MaxArchVGPRs ||
986 PressureAfter.getAGPRNum() > MaxArchVGPRs ||
987 PressureAfter.getSGPRNum() > MaxSGPRs) {
988 DAG.RescheduleRegions[RegionIdx] = true;
989 DAG.RegionsWithHighRP[RegionIdx] = true;
990 DAG.RegionsWithExcessRP[RegionIdx] = true;
991 }
992
993 // Revert if this region's schedule would cause a drop in occupancy or
994 // spilling.
995 if (shouldRevertScheduling(WavesAfter)) {
997 } else {
998 DAG.Pressure[RegionIdx] = PressureAfter;
999 DAG.RegionsWithMinOcc[RegionIdx] =
1000 PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
1001 }
1002}
1003
1004unsigned
1005GCNSchedStage::computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
1006 DenseMap<unsigned, unsigned> &ReadyCycles,
1007 const TargetSchedModel &SM) {
1008 unsigned ReadyCycle = CurrCycle;
1009 for (auto &D : SU.Preds) {
1010 if (D.isAssignedRegDep()) {
1011 MachineInstr *DefMI = D.getSUnit()->getInstr();
1012 unsigned Latency = SM.computeInstrLatency(DefMI);
1013 unsigned DefReady = ReadyCycles[DAG.getSUnit(DefMI)->NodeNum];
1014 ReadyCycle = std::max(ReadyCycle, DefReady + Latency);
1015 }
1016 }
1017 ReadyCycles[SU.NodeNum] = ReadyCycle;
1018 return ReadyCycle;
1019}
1020
1021#ifndef NDEBUG
1023 bool operator()(std::pair<MachineInstr *, unsigned> A,
1024 std::pair<MachineInstr *, unsigned> B) const {
1025 return A.second < B.second;
1026 }
1027};
1028
1029static void printScheduleModel(std::set<std::pair<MachineInstr *, unsigned>,
1030 EarlierIssuingCycle> &ReadyCycles) {
1031 if (ReadyCycles.empty())
1032 return;
1033 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
1034 dbgs() << "\n################## Schedule time ReadyCycles for MBB : " << BBNum
1035 << " ##################\n# Cycle #\t\t\tInstruction "
1036 " "
1037 " \n";
1038 unsigned IPrev = 1;
1039 for (auto &I : ReadyCycles) {
1040 if (I.second > IPrev + 1)
1041 dbgs() << "****************************** BUBBLE OF " << I.second - IPrev
1042 << " CYCLES DETECTED ******************************\n\n";
1043 dbgs() << "[ " << I.second << " ] : " << *I.first << "\n";
1044 IPrev = I.second;
1045 }
1046}
1047#endif
1048
1050GCNSchedStage::getScheduleMetrics(const std::vector<SUnit> &InputSchedule) {
1051#ifndef NDEBUG
1052 std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>
1053 ReadyCyclesSorted;
1054#endif
1056 unsigned SumBubbles = 0;
1057 DenseMap<unsigned, unsigned> ReadyCycles;
1058 unsigned CurrCycle = 0;
1059 for (auto &SU : InputSchedule) {
1060 unsigned ReadyCycle =
1061 computeSUnitReadyCycle(SU, CurrCycle, ReadyCycles, SM);
1062 SumBubbles += ReadyCycle - CurrCycle;
1063#ifndef NDEBUG
1064 ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));
1065#endif
1066 CurrCycle = ++ReadyCycle;
1067 }
1068#ifndef NDEBUG
1069 LLVM_DEBUG(
1070 printScheduleModel(ReadyCyclesSorted);
1071 dbgs() << "\n\t"
1072 << "Metric: "
1073 << (SumBubbles
1074 ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1075 : 1)
1076 << "\n\n");
1077#endif
1078
1079 return ScheduleMetrics(CurrCycle, SumBubbles);
1080}
1081
1084#ifndef NDEBUG
1085 std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>
1086 ReadyCyclesSorted;
1087#endif
1089 unsigned SumBubbles = 0;
1090 DenseMap<unsigned, unsigned> ReadyCycles;
1091 unsigned CurrCycle = 0;
1092 for (auto &MI : DAG) {
1093 SUnit *SU = DAG.getSUnit(&MI);
1094 if (!SU)
1095 continue;
1096 unsigned ReadyCycle =
1097 computeSUnitReadyCycle(*SU, CurrCycle, ReadyCycles, SM);
1098 SumBubbles += ReadyCycle - CurrCycle;
1099#ifndef NDEBUG
1100 ReadyCyclesSorted.insert(std::make_pair(SU->getInstr(), ReadyCycle));
1101#endif
1102 CurrCycle = ++ReadyCycle;
1103 }
1104#ifndef NDEBUG
1105 LLVM_DEBUG(
1106 printScheduleModel(ReadyCyclesSorted);
1107 dbgs() << "\n\t"
1108 << "Metric: "
1109 << (SumBubbles
1110 ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1111 : 1)
1112 << "\n\n");
1113#endif
1114
1115 return ScheduleMetrics(CurrCycle, SumBubbles);
1116}
1117
1118bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {
1119 if (WavesAfter < DAG.MinOccupancy)
1120 return true;
1121
1122 return false;
1123}
1124
1127 return false;
1128
1130 return true;
1131
1132 if (mayCauseSpilling(WavesAfter))
1133 return true;
1134
1135 return false;
1136}
1137
1139 // If RP is not reduced in the unclustered reschedule stage, revert to the
1140 // old schedule.
1141 if ((WavesAfter <= PressureBefore.getOccupancy(ST) &&
1142 mayCauseSpilling(WavesAfter)) ||
1144 LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
1145 return true;
1146 }
1147
1148 // Do not attempt to relax schedule even more if we are already spilling.
1150 return false;
1151
1152 LLVM_DEBUG(
1153 dbgs()
1154 << "\n\t *** In shouldRevertScheduling ***\n"
1155 << " *********** BEFORE UnclusteredHighRPStage ***********\n");
1156 ScheduleMetrics MBefore =
1158 LLVM_DEBUG(
1159 dbgs()
1160 << "\n *********** AFTER UnclusteredHighRPStage ***********\n");
1162 unsigned OldMetric = MBefore.getMetric();
1163 unsigned NewMetric = MAfter.getMetric();
1164 unsigned WavesBefore =
1166 unsigned Profit =
1167 ((WavesAfter * ScheduleMetrics::ScaleFactor) / WavesBefore *
1169 NewMetric) /
1171 LLVM_DEBUG(dbgs() << "\tMetric before " << MBefore << "\tMetric after "
1172 << MAfter << "Profit: " << Profit << "\n");
1173 return Profit < ScheduleMetrics::ScaleFactor;
1174}
1175
1178 return false;
1179
1181 return true;
1182
1183 if (mayCauseSpilling(WavesAfter))
1184 return true;
1185
1186 return false;
1187}
1188
1191 return true;
1192
1193 if (mayCauseSpilling(WavesAfter))
1194 return true;
1195
1196 return false;
1197}
1198
1200 if (mayCauseSpilling(WavesAfter))
1201 return true;
1202
1203 return false;
1204}
1205
1206bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
1207 if (WavesAfter <= MFI.getMinWavesPerEU() && isRegionWithExcessRP() &&
1209 LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
1210 return true;
1211 }
1212
1213 return false;
1214}
1215
1217 DAG.RegionsWithMinOcc[RegionIdx] =
1218 PressureBefore.getOccupancy(ST) == DAG.MinOccupancy;
1219 LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
1220 DAG.RescheduleRegions[RegionIdx] =
1221 S.hasNextStage() &&
1224 int SkippedDebugInstr = 0;
1225 for (MachineInstr *MI : Unsched) {
1226 if (MI->isDebugInstr()) {
1227 ++SkippedDebugInstr;
1228 continue;
1229 }
1230
1231 if (MI->getIterator() != DAG.RegionEnd) {
1232 DAG.BB->remove(MI);
1234 if (!MI->isDebugInstr())
1235 DAG.LIS->handleMove(*MI, true);
1236 }
1237
1238 // Reset read-undef flags and update them later.
1239 for (auto &Op : MI->all_defs())
1240 Op.setIsUndef(false);
1241 RegisterOperands RegOpers;
1242 RegOpers.collect(*MI, *DAG.TRI, DAG.MRI, DAG.ShouldTrackLaneMasks, false);
1243 if (!MI->isDebugInstr()) {
1245 // Adjust liveness and add missing dead+read-undef flags.
1247 RegOpers.adjustLaneLiveness(*DAG.LIS, DAG.MRI, SlotIdx, MI);
1248 } else {
1249 // Adjust for missing dead-def flags.
1250 RegOpers.detectDeadDefs(*MI, *DAG.LIS);
1251 }
1252 }
1253 DAG.RegionEnd = MI->getIterator();
1254 ++DAG.RegionEnd;
1255 LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
1256 }
1257
1258 // After reverting schedule, debug instrs will now be at the end of the block
1259 // and RegionEnd will point to the first debug instr. Increment RegionEnd
1260 // pass debug instrs to the actual end of the scheduling region.
1261 while (SkippedDebugInstr-- > 0)
1262 ++DAG.RegionEnd;
1263
1264 // If Unsched.front() instruction is a debug instruction, this will actually
1265 // shrink the region since we moved all debug instructions to the end of the
1266 // block. Find the first instruction that is not a debug instruction.
1267 DAG.RegionBegin = Unsched.front()->getIterator();
1268 if (DAG.RegionBegin->isDebugInstr()) {
1269 for (MachineInstr *MI : Unsched) {
1270 if (MI->isDebugInstr())
1271 continue;
1272 DAG.RegionBegin = MI->getIterator();
1273 break;
1274 }
1275 }
1276
1277 // Then move the debug instructions back into their correct place and set
1278 // RegionBegin and RegionEnd if needed.
1280
1281 DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
1282}
1283
1284void PreRARematStage::collectRematerializableInstructions() {
1285 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
1286 for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
1288 if (!DAG.LIS->hasInterval(Reg))
1289 continue;
1290
1291 // TODO: Handle AGPR and SGPR rematerialization
1292 if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
1293 !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg))
1294 continue;
1295
1297 MachineInstr *Def = Op->getParent();
1298 if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
1299 continue;
1300
1302 if (Def->getParent() == UseI->getParent())
1303 continue;
1304
1305 // We are only collecting defs that are defined in another block and are
1306 // live-through or used inside regions at MinOccupancy. This means that the
1307 // register must be in the live-in set for the region.
1308 bool AddedToRematList = false;
1309 for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
1310 auto It = DAG.LiveIns[I].find(Reg);
1311 if (It != DAG.LiveIns[I].end() && !It->second.none()) {
1312 if (DAG.RegionsWithMinOcc[I]) {
1313 RematerializableInsts[I][Def] = UseI;
1314 AddedToRematList = true;
1315 }
1316
1317 // Collect regions with rematerializable reg as live-in to avoid
1318 // searching later when updating RP.
1319 RematDefToLiveInRegions[Def].push_back(I);
1320 }
1321 }
1322 if (!AddedToRematList)
1323 RematDefToLiveInRegions.erase(Def);
1324 }
1325}
1326
1327bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
1328 const TargetInstrInfo *TII) {
1329 // Temporary copies of cached variables we will be modifying and replacing if
1330 // sinking succeeds.
1332 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32>
1333 NewRegions;
1336 BitVector NewRescheduleRegions;
1337 LiveIntervals *LIS = DAG.LIS;
1338
1339 NewRegions.resize(DAG.Regions.size());
1340 NewRescheduleRegions.resize(DAG.Regions.size());
1341
1342 // Collect only regions that has a rematerializable def as a live-in.
1343 SmallSet<unsigned, 16> ImpactedRegions;
1344 for (const auto &It : RematDefToLiveInRegions)
1345 ImpactedRegions.insert(It.second.begin(), It.second.end());
1346
1347 // Make copies of register pressure and live-ins cache that will be updated
1348 // as we rematerialize.
1349 for (auto Idx : ImpactedRegions) {
1350 NewPressure[Idx] = DAG.Pressure[Idx];
1351 NewLiveIns[Idx] = DAG.LiveIns[Idx];
1352 }
1353 NewRegions = DAG.Regions;
1354 NewRescheduleRegions.reset();
1355
1357 bool Improved = false;
1358 for (auto I : ImpactedRegions) {
1359 if (!DAG.RegionsWithMinOcc[I])
1360 continue;
1361
1362 Improved = false;
1363 int VGPRUsage = NewPressure[I].getVGPRNum(ST.hasGFX90AInsts());
1364 int SGPRUsage = NewPressure[I].getSGPRNum();
1365
1366 // TODO: Handle occupancy drop due to AGPR and SGPR.
1367 // Check if cause of occupancy drop is due to VGPR usage and not SGPR.
1368 if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == DAG.MinOccupancy)
1369 break;
1370
1371 // The occupancy of this region could have been improved by a previous
1372 // iteration's sinking of defs.
1373 if (NewPressure[I].getOccupancy(ST) > DAG.MinOccupancy) {
1374 NewRescheduleRegions[I] = true;
1375 Improved = true;
1376 continue;
1377 }
1378
1379 // First check if we have enough trivially rematerializable instructions to
1380 // improve occupancy. Optimistically assume all instructions we are able to
1381 // sink decreased RP.
1382 int TotalSinkableRegs = 0;
1383 for (const auto &It : RematerializableInsts[I]) {
1384 MachineInstr *Def = It.first;
1385 Register DefReg = Def->getOperand(0).getReg();
1386 TotalSinkableRegs +=
1387 SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]);
1388 }
1389 int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
1390 unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink);
1391 // If in the most optimistic scenario, we cannot improve occupancy, then do
1392 // not attempt to sink any instructions.
1393 if (OptimisticOccupancy <= DAG.MinOccupancy)
1394 break;
1395
1396 unsigned ImproveOccupancy = 0;
1398 for (auto &It : RematerializableInsts[I]) {
1399 MachineInstr *Def = It.first;
1400 MachineBasicBlock::iterator InsertPos =
1401 MachineBasicBlock::iterator(It.second);
1402 Register Reg = Def->getOperand(0).getReg();
1403 // Rematerialize MI to its use block. Since we are only rematerializing
1404 // instructions that do not have any virtual reg uses, we do not need to
1405 // call LiveRangeEdit::allUsesAvailableAt() and
1406 // LiveRangeEdit::canRematerializeAt().
1407 TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
1408 Def->getOperand(0).getSubReg(), *Def, *DAG.TRI);
1409 MachineInstr *NewMI = &*std::prev(InsertPos);
1410 LIS->InsertMachineInstrInMaps(*NewMI);
1411 LIS->removeInterval(Reg);
1413 InsertedMIToOldDef[NewMI] = Def;
1414
1415 // Update region boundaries in scheduling region we sinked from since we
1416 // may sink an instruction that was at the beginning or end of its region
1417 DAG.updateRegionBoundaries(NewRegions, Def, /*NewMI =*/nullptr,
1418 /*Removing =*/true);
1419
1420 // Update region boundaries in region we sinked to.
1421 DAG.updateRegionBoundaries(NewRegions, InsertPos, NewMI);
1422
1423 LaneBitmask PrevMask = NewLiveIns[I][Reg];
1424 // FIXME: Also update cached pressure for where the def was sinked from.
1425 // Update RP for all regions that has this reg as a live-in and remove
1426 // the reg from all regions as a live-in.
1427 for (auto Idx : RematDefToLiveInRegions[Def]) {
1428 NewLiveIns[Idx].erase(Reg);
1429 if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) {
1430 // Def is live-through and not used in this block.
1431 NewPressure[Idx].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI);
1432 } else {
1433 // Def is used and rematerialized into this block.
1434 GCNDownwardRPTracker RPT(*LIS);
1435 auto *NonDbgMI = &*skipDebugInstructionsForward(
1436 NewRegions[Idx].first, NewRegions[Idx].second);
1437 RPT.reset(*NonDbgMI, &NewLiveIns[Idx]);
1438 RPT.advance(NewRegions[Idx].second);
1439 NewPressure[Idx] = RPT.moveMaxPressure();
1440 }
1441 }
1442
1443 SinkedDefs.push_back(Def);
1444 ImproveOccupancy = NewPressure[I].getOccupancy(ST);
1445 if (ImproveOccupancy > DAG.MinOccupancy)
1446 break;
1447 }
1448
1449 // Remove defs we just sinked from all regions' list of sinkable defs
1450 for (auto &Def : SinkedDefs)
1451 for (auto TrackedIdx : RematDefToLiveInRegions[Def])
1452 RematerializableInsts[TrackedIdx].erase(Def);
1453
1454 if (ImproveOccupancy <= DAG.MinOccupancy)
1455 break;
1456
1457 NewRescheduleRegions[I] = true;
1458 Improved = true;
1459 }
1460
1461 if (!Improved) {
1462 // Occupancy was not improved for all regions that were at MinOccupancy.
1463 // Undo sinking and remove newly rematerialized instructions.
1464 for (auto &Entry : InsertedMIToOldDef) {
1465 MachineInstr *MI = Entry.first;
1466 MachineInstr *OldMI = Entry.second;
1467 Register Reg = MI->getOperand(0).getReg();
1469 MI->eraseFromParent();
1470 OldMI->clearRegisterDeads(Reg);
1471 LIS->removeInterval(Reg);
1473 }
1474 return false;
1475 }
1476
1477 // Occupancy was improved for all regions.
1478 for (auto &Entry : InsertedMIToOldDef) {
1479 MachineInstr *MI = Entry.first;
1480 MachineInstr *OldMI = Entry.second;
1481
1482 // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
1483 DAG.BBLiveInMap.erase(OldMI);
1484
1485 // Remove OldMI and update LIS
1486 Register Reg = MI->getOperand(0).getReg();
1487 LIS->RemoveMachineInstrFromMaps(*OldMI);
1488 OldMI->eraseFromParent();
1489 LIS->removeInterval(Reg);
1491 }
1492
1493 // Update live-ins, register pressure, and regions caches.
1494 for (auto Idx : ImpactedRegions) {
1495 DAG.LiveIns[Idx] = NewLiveIns[Idx];
1496 DAG.Pressure[Idx] = NewPressure[Idx];
1497 DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent());
1498 }
1499 DAG.Regions = NewRegions;
1500 DAG.RescheduleRegions = NewRescheduleRegions;
1501
1503 MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
1504
1505 return true;
1506}
1507
1508// Copied from MachineLICM
1509bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
1511 return false;
1512
1513 for (const MachineOperand &MO : MI.all_uses())
1514 if (MO.getReg().isVirtual())
1515 return false;
1516
1517 return true;
1518}
1519
1520// When removing, we will have to check both beginning and ending of the region.
1521// When inserting, we will only have to check if we are inserting NewMI in front
1522// of a scheduling region and do not need to check the ending since we will only
1523// ever be inserting before an already existing MI.
1524void GCNScheduleDAGMILive::updateRegionBoundaries(
1526 MachineBasicBlock::iterator>> &RegionBoundaries,
1527 MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing) {
1528 unsigned I = 0, E = RegionBoundaries.size();
1529 // Search for first region of the block where MI is located
1530 while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent())
1531 ++I;
1532
1533 for (; I != E; ++I) {
1534 if (MI->getParent() != RegionBoundaries[I].first->getParent())
1535 return;
1536
1537 if (Removing && MI == RegionBoundaries[I].first &&
1538 MI == RegionBoundaries[I].second) {
1539 // MI is in a region with size 1, after removing, the region will be
1540 // size 0, set RegionBegin and RegionEnd to pass end of block iterator.
1541 RegionBoundaries[I] =
1542 std::pair(MI->getParent()->end(), MI->getParent()->end());
1543 return;
1544 }
1545 if (MI == RegionBoundaries[I].first) {
1546 if (Removing)
1547 RegionBoundaries[I] =
1548 std::pair(std::next(MI), RegionBoundaries[I].second);
1549 else
1550 // Inserted NewMI in front of region, set new RegionBegin to NewMI
1551 RegionBoundaries[I] = std::pair(MachineBasicBlock::iterator(NewMI),
1552 RegionBoundaries[I].second);
1553 return;
1554 }
1555 if (Removing && MI == RegionBoundaries[I].second) {
1556 RegionBoundaries[I] = std::pair(RegionBoundaries[I].first, std::prev(MI));
1557 return;
1558 }
1559 }
1560}
1561
1563 return std::any_of(
1564 DAG->begin(), DAG->end(), [](MachineBasicBlock::iterator MI) {
1565 unsigned Opc = MI->getOpcode();
1566 return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT;
1567 });
1568}
1569
1571 MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
1572 bool RemoveKillFlags)
1573 : ScheduleDAGMI(C, std::move(S), RemoveKillFlags) {}
1574
1576 HasIGLPInstrs = hasIGLPInstrs(this);
1577 if (HasIGLPInstrs) {
1578 SavedMutations.clear();
1579 SavedMutations.swap(Mutations);
1581 }
1582
1584}
1585
1587 if (HasIGLPInstrs)
1588 SavedMutations.swap(Mutations);
1589
1591}
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))
static cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))
static cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))
static void printScheduleModel(std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG)
static cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
if(VerifyEach)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
BitVector & reset()
Definition: BitVector.h:392
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341
BitVector & set()
Definition: BitVector.h:351
bool none() const
none - Returns true if none of the bits are set.
Definition: BitVector.h:188
bool shouldRevertScheduling(unsigned WavesAfter) override
This class represents an Operation in the Expression.
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
virtual bool initGCNRegion()
GCNSchedStrategy & S
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
MachineFunction & MF
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
MachineBasicBlock * CurrentMBB
const GCNSubtarget & ST
This is a minimal scheduler strategy.
const unsigned HighRPSGPRBias
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand)
SUnit * pickNodeBidirectional(bool &IsTopNode)
std::vector< unsigned > MaxPressure
GCNSchedStageID getCurrentStage()
MachineFunction * MF
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure)
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
const unsigned HighRPVGPRBias
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
unsigned getAddressableNumArchVGPRs() const
bool hasGFX90AInsts() const
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:251
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
void traceCandidate(const SchedCandidate &Cand)
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
const TargetSchedModel * SchedModel
const MachineSchedContext * Context
const TargetRegisterInfo * TRI
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
SchedCandidate BotCand
Candidate last picked from Bot boundary.
SchedCandidate TopCand
Candidate last picked from Top boundary.
MachineSchedPolicy RegionPolicy
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
ScheduleDAGMILive * DAG
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
bool shouldRevertScheduling(unsigned WavesAfter) override
bool hasInterval(Register Reg) const
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndexes * getSlotIndexes() const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
void removeInterval(Register Reg)
Interval removal.
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
unsigned succ_size() const
MachineInstrBundleIterator< MachineInstr > iterator
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:329
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void clearRegisterDeads(Register Reg)
Clear all dead flags on operands defining register Reg.
MachineOperand class - Representation of each machine instruction operand.
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineOperand * getOneDef(Register Reg) const
Returns the defining operand if there is exactly one operand defining the specified register,...
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
bool hasOneDef(Register RegNo) const
Return true if there is exactly one operand defining the specified register.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
bool initGCNSchedStage() override
Capture a change in pressure for a single pressure set.
void setUnitInc(int Inc)
Helpers for implementing custom MachineSchedStrategy classes.
bool empty() const
Track the current register pressure at some position in the instruction stream, and remember the high...
void advance()
Advance across the current instruction.
void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
const TargetSchedModel & getSchedModel() const
Definition: SIInstrInfo.h:1374
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
void increaseOccupancy(const MachineFunction &MF, unsigned Limit)
void limitOccupancy(const MachineFunction &MF)
static unsigned getNumCoveredRegs(LaneBitmask LM)
static bool isVGPRClass(const TargetRegisterClass *RC)
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
bool isBottomReady() const
Definition: ScheduleDAG.h:449
bool isTopReady() const
Definition: ScheduleDAG.h:446
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
Each Scheduling boundary is associated with ready queues.
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
ScheduleDAGMI * DAG
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
A ScheduleDAG for scheduling lists of MachineInstr.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
MachineBasicBlock * BB
The block in which to insert instructions.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
const RegPressureTracker & getBotRPTracker() const
bool isTrackingPressure() const
Return true if register pressure tracking is enabled.
const RegPressureTracker & getTopRPTracker() const
RegPressureTracker RPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
MachineBasicBlock::iterator top() const
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
MachineBasicBlock::iterator bottom() const
void finishBlock() override
Cleans up after scheduling in the given block.
LiveIntervals * LIS
const SUnit * getNextClusterPred() const
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
const SUnit * getNextClusterSucc() const
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:560
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:557
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:561
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:558
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:559
static const unsigned ScaleFactor
unsigned getMetric() const
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:68
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:240
SlotIndexes pass.
Definition: SlotIndexes.h:300
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition: SlotIndexes.h:452
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
TargetInstrInfo - Interface to description of machine instruction set.
bool isTriviallyReMaterializable(const MachineInstr &MI) const
Return true if the instruction is trivially rematerializable, meaning it has no side effects and requ...
Provide an instruction scheduling machine model to CodeGen passes.
virtual const TargetInstrInfo * getInstrInfo() const
bool shouldRevertScheduling(unsigned WavesAfter) override
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
unsigned getWeakLeft(const SUnit *SU, bool isTop)
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)
Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.
cl::opt< bool > VerifyScheduling
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1858
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:428
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
bool operator()(std::pair< MachineInstr *, unsigned > A, std::pair< MachineInstr *, unsigned > B) const
unsigned getOccupancy(const GCNSubtarget &ST) const
unsigned getVGPRNum(bool UnifiedVGPRFile) const
unsigned getAGPRNum() const
unsigned getSGPRNum() const
bool less(const MachineFunction &MF, const GCNRegPressure &O, unsigned MaxOccupancy=std::numeric_limits< unsigned >::max()) const
Compares this GCNRegpressure to O, returning true if this is less.
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void reset(const CandPolicy &NewPolicy)
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Status of an instruction's critical resource consumption.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:81
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
RegisterClassInfo * RegClassInfo
PressureChange CriticalMax