LLVM 17.0.0git
GCNSchedStrategy.cpp
Go to the documentation of this file.
1//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This contains a MachineSchedStrategy implementation for maximizing wave
11/// occupancy on GCN hardware.
12///
13/// This pass will apply multiple scheduling stages to the same function.
14/// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual
15/// entry point for the scheduling of those regions is
16/// GCNScheduleDAGMILive::runSchedStages.
17
18/// Generally, the reason for having multiple scheduling stages is to account
19/// for the kernel-wide effect of register usage on occupancy. Usually, only a
20/// few scheduling regions will have register pressure high enough to limit
21/// occupancy for the kernel, so constraints can be relaxed to improve ILP in
22/// other regions.
23///
24//===----------------------------------------------------------------------===//
25
26#include "GCNSchedStrategy.h"
27#include "AMDGPUIGroupLP.h"
30
31#define DEBUG_TYPE "machine-scheduler"
32
33using namespace llvm;
34
35static cl::opt<bool>
36 DisableUnclusterHighRP("amdgpu-disable-unclustred-high-rp-reschedule",
38 cl::desc("Disable unclustred high register pressure "
39 "reduction scheduling stage."),
40 cl::init(false));
42 "amdgpu-schedule-metric-bias", cl::Hidden,
44 "Sets the bias which adds weight to occupancy vs latency. Set it to "
45 "100 to chase the occupancy only."),
46 cl::init(10));
47
48const unsigned ScheduleMetrics::ScaleFactor = 100;
49
51 : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
52 HasHighPressure(false) {}
53
56
57 MF = &DAG->MF;
58
60
62 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
64 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
65
67 // Set the initial TargetOccupnacy to the maximum occupancy that we can
68 // achieve for this function. This effectively sets a lower bound on the
69 // 'Critical' register limits in the scheduler.
72 std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit);
73
74 if (!KnownExcessRP) {
76 std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit);
77 } else {
78 // This is similar to ST.getMaxNumVGPRs(TargetOccupancy) result except
79 // returns a reasonably small number for targets with lots of VGPRs, such
80 // as GFX10 and GFX11.
81 LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative "
82 "VGPRCriticalLimit calculation method.\n");
83
84 unsigned Granule = AMDGPU::IsaInfo::getVGPRAllocGranule(&ST);
85 unsigned Addressable = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST);
86 unsigned VGPRBudget = alignDown(Addressable / TargetOccupancy, Granule);
87 VGPRBudget = std::max(VGPRBudget, Granule);
88 VGPRCriticalLimit = std::min(VGPRBudget, VGPRExcessLimit);
89 }
90
91 // Subtract error margin and bias from register limits and avoid overflow.
96
97 LLVM_DEBUG(dbgs() << "VGPRCriticalLimit = " << VGPRCriticalLimit
98 << ", VGPRExcessLimit = " << VGPRExcessLimit
99 << ", SGPRCriticalLimit = " << SGPRCriticalLimit
100 << ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n");
101}
102
104 bool AtTop,
105 const RegPressureTracker &RPTracker,
106 const SIRegisterInfo *SRI,
107 unsigned SGPRPressure,
108 unsigned VGPRPressure) {
109 Cand.SU = SU;
110 Cand.AtTop = AtTop;
111
112 if (!DAG->isTrackingPressure())
113 return;
114
115 // getDownwardPressure() and getUpwardPressure() make temporary changes to
116 // the tracker, so we need to pass those function a non-const copy.
117 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
118
119 Pressure.clear();
120 MaxPressure.clear();
121
122 if (AtTop)
124 else {
125 // FIXME: I think for bottom up scheduling, the register pressure is cached
126 // and can be retrieved by DAG->getPressureDif(SU).
127 TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
128 }
129
130 unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
131 unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
132
133 // If two instructions increase the pressure of different register sets
134 // by the same amount, the generic scheduler will prefer to schedule the
135 // instruction that increases the set with the least amount of registers,
136 // which in our case would be SGPRs. This is rarely what we want, so
137 // when we report excess/critical register pressure, we do it either
138 // only for VGPRs or only for SGPRs.
139
140 // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
141 const unsigned MaxVGPRPressureInc = 16;
142 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
143 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
144
145
146 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
147 // to increase the likelihood we don't go over the limits. We should improve
148 // the analysis to look through dependencies to find the path with the least
149 // register pressure.
150
151 // We only need to update the RPDelta for instructions that increase register
152 // pressure. Instructions that decrease or keep reg pressure the same will be
153 // marked as RegExcess in tryCandidate() when they are compared with
154 // instructions that increase the register pressure.
155 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
156 HasHighPressure = true;
157 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
158 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
159 }
160
161 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
162 HasHighPressure = true;
163 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
164 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
165 }
166
167 // Register pressure is considered 'CRITICAL' if it is approaching a value
168 // that would reduce the wave occupancy for the execution unit. When
169 // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both
170 // has the same cost, so we don't need to prefer one over the other.
171
172 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
173 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
174
175 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
176 HasHighPressure = true;
177 if (SGPRDelta > VGPRDelta) {
178 Cand.RPDelta.CriticalMax =
179 PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
180 Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
181 } else {
182 Cand.RPDelta.CriticalMax =
183 PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
184 Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
185 }
186 }
187}
188
189// This function is mostly cut and pasted from
190// GenericScheduler::pickNodeFromQueue()
192 const CandPolicy &ZonePolicy,
193 const RegPressureTracker &RPTracker,
194 SchedCandidate &Cand) {
195 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
197 unsigned SGPRPressure = 0;
198 unsigned VGPRPressure = 0;
199 if (DAG->isTrackingPressure()) {
200 SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
201 VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
202 }
203 ReadyQueue &Q = Zone.Available;
204 for (SUnit *SU : Q) {
205
206 SchedCandidate TryCand(ZonePolicy);
207 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
208 SGPRPressure, VGPRPressure);
209 // Pass SchedBoundary only when comparing nodes from the same boundary.
210 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
211 tryCandidate(Cand, TryCand, ZoneArg);
212 if (TryCand.Reason != NoCand) {
213 // Initialize resource delta if needed in case future heuristics query it.
214 if (TryCand.ResDelta == SchedResourceDelta())
215 TryCand.initResourceDelta(Zone.DAG, SchedModel);
216 Cand.setBest(TryCand);
218 }
219 }
220}
221
222// This function is mostly cut and pasted from
223// GenericScheduler::pickNodeBidirectional()
225 // Schedule as far as possible in the direction of no choice. This is most
226 // efficient, but also provides the best heuristics for CriticalPSets.
227 if (SUnit *SU = Bot.pickOnlyChoice()) {
228 IsTopNode = false;
229 return SU;
230 }
231 if (SUnit *SU = Top.pickOnlyChoice()) {
232 IsTopNode = true;
233 return SU;
234 }
235 // Set the bottom-up policy based on the state of the current bottom zone and
236 // the instructions outside the zone, including the top zone.
237 CandPolicy BotPolicy;
238 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
239 // Set the top-down policy based on the state of the current top zone and
240 // the instructions outside the zone, including the bottom zone.
241 CandPolicy TopPolicy;
242 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
243
244 // See if BotCand is still valid (because we previously scheduled from Top).
245 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
246 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
247 BotCand.Policy != BotPolicy) {
250 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
251 } else {
253#ifndef NDEBUG
254 if (VerifyScheduling) {
255 SchedCandidate TCand;
256 TCand.reset(CandPolicy());
257 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
258 assert(TCand.SU == BotCand.SU &&
259 "Last pick result should correspond to re-picking right now");
260 }
261#endif
262 }
263
264 // Check if the top Q has a better candidate.
265 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
266 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
267 TopCand.Policy != TopPolicy) {
270 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
271 } else {
273#ifndef NDEBUG
274 if (VerifyScheduling) {
275 SchedCandidate TCand;
276 TCand.reset(CandPolicy());
277 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
278 assert(TCand.SU == TopCand.SU &&
279 "Last pick result should correspond to re-picking right now");
280 }
281#endif
282 }
283
284 // Pick best from BotCand and TopCand.
285 LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
286 dbgs() << "Bot Cand: "; traceCandidate(BotCand););
287 SchedCandidate Cand = BotCand;
289 tryCandidate(Cand, TopCand, nullptr);
290 if (TopCand.Reason != NoCand) {
291 Cand.setBest(TopCand);
292 }
293 LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
294
295 IsTopNode = Cand.AtTop;
296 return Cand.SU;
297}
298
299// This function is mostly cut and pasted from
300// GenericScheduler::pickNode()
302 if (DAG->top() == DAG->bottom()) {
304 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
305 return nullptr;
306 }
307 SUnit *SU;
308 do {
310 SU = Top.pickOnlyChoice();
311 if (!SU) {
312 CandPolicy NoPolicy;
313 TopCand.reset(NoPolicy);
315 assert(TopCand.Reason != NoCand && "failed to find a candidate");
316 SU = TopCand.SU;
317 }
318 IsTopNode = true;
319 } else if (RegionPolicy.OnlyBottomUp) {
320 SU = Bot.pickOnlyChoice();
321 if (!SU) {
322 CandPolicy NoPolicy;
323 BotCand.reset(NoPolicy);
325 assert(BotCand.Reason != NoCand && "failed to find a candidate");
326 SU = BotCand.SU;
327 }
328 IsTopNode = false;
329 } else {
330 SU = pickNodeBidirectional(IsTopNode);
331 }
332 } while (SU->isScheduled);
333
334 if (SU->isTopReady())
335 Top.removeReady(SU);
336 if (SU->isBottomReady())
337 Bot.removeReady(SU);
338
339 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
340 << *SU->getInstr());
341 return SU;
342}
343
346 return *CurrentStage;
347}
348
351 if (!CurrentStage)
353 else
354 CurrentStage++;
355
356 return CurrentStage != SchedStages.end();
357}
358
361 return std::next(CurrentStage) != SchedStages.end();
362}
363
365 assert(CurrentStage && std::next(CurrentStage) != SchedStages.end());
366 return *std::next(CurrentStage);
367}
368
370 const MachineSchedContext *C)
376}
377
381}
382
384 SchedCandidate &TryCand,
385 SchedBoundary *Zone) const {
386 // Initialize the candidate if needed.
387 if (!Cand.isValid()) {
388 TryCand.Reason = NodeOrder;
389 return true;
390 }
391
392 // Avoid spilling by exceeding the register limit.
393 if (DAG->isTrackingPressure() &&
394 tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
395 RegExcess, TRI, DAG->MF))
396 return TryCand.Reason != NoCand;
397
398 // Bias PhysReg Defs and copies to their uses and defined respectively.
399 if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
400 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
401 return TryCand.Reason != NoCand;
402
403 bool SameBoundary = Zone != nullptr;
404 if (SameBoundary) {
405 // Prioritize instructions that read unbuffered resources by stall cycles.
406 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
407 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
408 return TryCand.Reason != NoCand;
409
410 // Avoid critical resource consumption and balance the schedule.
413 TryCand, Cand, ResourceReduce))
414 return TryCand.Reason != NoCand;
416 Cand.ResDelta.DemandedResources, TryCand, Cand,
418 return TryCand.Reason != NoCand;
419
420 // Unconditionally try to reduce latency.
421 if (tryLatency(TryCand, Cand, *Zone))
422 return TryCand.Reason != NoCand;
423
424 // Weak edges are for clustering and other constraints.
425 if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
426 getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))
427 return TryCand.Reason != NoCand;
428 }
429
430 // Keep clustered nodes together to encourage downstream peephole
431 // optimizations which may reduce resource requirements.
432 //
433 // This is a best effort to set things up for a post-RA pass. Optimizations
434 // like generating loads of multiple registers should ideally be done within
435 // the scheduler pass by combining the loads during DAG postprocessing.
436 const SUnit *CandNextClusterSU =
438 const SUnit *TryCandNextClusterSU =
440 if (tryGreater(TryCand.SU == TryCandNextClusterSU,
441 Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
442 return TryCand.Reason != NoCand;
443
444 // Avoid increasing the max critical pressure in the scheduled region.
445 if (DAG->isTrackingPressure() &&
447 TryCand, Cand, RegCritical, TRI, DAG->MF))
448 return TryCand.Reason != NoCand;
449
450 // Avoid increasing the max pressure of the entire region.
451 if (DAG->isTrackingPressure() &&
452 tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,
453 Cand, RegMax, TRI, DAG->MF))
454 return TryCand.Reason != NoCand;
455
456 if (SameBoundary) {
457 // Fall through to original instruction order.
458 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||
459 (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
460 TryCand.Reason = NodeOrder;
461 return true;
462 }
463 }
464 return false;
465}
466
468 MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)
469 : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),
470 MFI(*MF.getInfo<SIMachineFunctionInfo>()),
471 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) {
472
473 LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
474}
475
476std::unique_ptr<GCNSchedStage>
477GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {
478 switch (SchedStageID) {
480 return std::make_unique<OccInitialScheduleStage>(SchedStageID, *this);
482 return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *this);
484 return std::make_unique<ClusteredLowOccStage>(SchedStageID, *this);
486 return std::make_unique<PreRARematStage>(SchedStageID, *this);
488 return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *this);
489 }
490
491 llvm_unreachable("Unknown SchedStageID.");
492}
493
495 // Collect all scheduling regions. The actual scheduling is performed in
496 // GCNScheduleDAGMILive::finalizeSchedule.
497 Regions.push_back(std::pair(RegionBegin, RegionEnd));
498}
499
501GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
503 RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]);
504 return RPTracker.moveMaxPressure();
505}
506
507void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
508 const MachineBasicBlock *MBB) {
510
511 // If the block has the only successor then live-ins of that successor are
512 // live-outs of the current block. We can reuse calculated live set if the
513 // successor will be sent to scheduling past current block.
514
515 // However, due to the bug in LiveInterval analysis it may happen that two
516 // predecessors of the same successor block have different lane bitmasks for
517 // a live-out register. Workaround that by sticking to one-to-one relationship
518 // i.e. one predecessor with one successor block.
519 const MachineBasicBlock *OnlySucc = nullptr;
520 if (MBB->succ_size() == 1) {
521 auto *Candidate = *MBB->succ_begin();
522 if (!Candidate->empty() && Candidate->pred_size() == 1) {
524 if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(Candidate))
525 OnlySucc = Candidate;
526 }
527 }
528
529 // Scheduler sends regions from the end of the block upwards.
530 size_t CurRegion = RegionIdx;
531 for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)
532 if (Regions[CurRegion].first->getParent() != MBB)
533 break;
534 --CurRegion;
535
536 auto I = MBB->begin();
537 auto LiveInIt = MBBLiveIns.find(MBB);
538 auto &Rgn = Regions[CurRegion];
539 auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
540 if (LiveInIt != MBBLiveIns.end()) {
541 auto LiveIn = std::move(LiveInIt->second);
542 RPTracker.reset(*MBB->begin(), &LiveIn);
543 MBBLiveIns.erase(LiveInIt);
544 } else {
545 I = Rgn.first;
546 auto LRS = BBLiveInMap.lookup(NonDbgMI);
547#ifdef EXPENSIVE_CHECKS
548 assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
549#endif
550 RPTracker.reset(*I, &LRS);
551 }
552
553 for (;;) {
554 I = RPTracker.getNext();
555
556 if (Regions[CurRegion].first == I || NonDbgMI == I) {
557 LiveIns[CurRegion] = RPTracker.getLiveRegs();
558 RPTracker.clearMaxPressure();
559 }
560
561 if (Regions[CurRegion].second == I) {
562 Pressure[CurRegion] = RPTracker.moveMaxPressure();
563 if (CurRegion-- == RegionIdx)
564 break;
565 }
566 RPTracker.advanceToNext();
567 RPTracker.advanceBeforeNext();
568 }
569
570 if (OnlySucc) {
571 if (I != MBB->end()) {
572 RPTracker.advanceToNext();
574 }
575 RPTracker.advanceBeforeNext();
576 MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
577 }
578}
579
581GCNScheduleDAGMILive::getBBLiveInMap() const {
582 assert(!Regions.empty());
583 std::vector<MachineInstr *> BBStarters;
584 BBStarters.reserve(Regions.size());
585 auto I = Regions.rbegin(), E = Regions.rend();
586 auto *BB = I->first->getParent();
587 do {
588 auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
589 BBStarters.push_back(MI);
590 do {
591 ++I;
592 } while (I != E && I->first->getParent() == BB);
593 } while (I != E);
594 return getLiveRegMap(BBStarters, false /*After*/, *LIS);
595}
596
598 // Start actual scheduling here. This function is called by the base
599 // MachineScheduler after all regions have been recorded by
600 // GCNScheduleDAGMILive::schedule().
601 LiveIns.resize(Regions.size());
602 Pressure.resize(Regions.size());
603 RescheduleRegions.resize(Regions.size());
604 RegionsWithHighRP.resize(Regions.size());
605 RegionsWithExcessRP.resize(Regions.size());
606 RegionsWithMinOcc.resize(Regions.size());
607 RegionsWithIGLPInstrs.resize(Regions.size());
608 RescheduleRegions.set();
609 RegionsWithHighRP.reset();
610 RegionsWithExcessRP.reset();
611 RegionsWithMinOcc.reset();
612 RegionsWithIGLPInstrs.reset();
613
614 runSchedStages();
615}
616
617void GCNScheduleDAGMILive::runSchedStages() {
618 LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
619
620 if (!Regions.empty())
621 BBLiveInMap = getBBLiveInMap();
622
623 GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
624 while (S.advanceStage()) {
625 auto Stage = createSchedStage(S.getCurrentStage());
626 if (!Stage->initGCNSchedStage())
627 continue;
628
629 for (auto Region : Regions) {
630 RegionBegin = Region.first;
631 RegionEnd = Region.second;
632 // Setup for scheduling the region and check whether it should be skipped.
633 if (!Stage->initGCNRegion()) {
634 Stage->advanceRegion();
635 exitRegion();
636 continue;
637 }
638
640 Stage->finalizeGCNRegion();
641 }
642
643 Stage->finalizeGCNSchedStage();
644 }
645}
646
647#ifndef NDEBUG
649 switch (StageID) {
651 OS << "Max Occupancy Initial Schedule";
652 break;
654 OS << "Unclustered High Register Pressure Reschedule";
655 break;
657 OS << "Clustered Low Occupancy Reschedule";
658 break;
660 OS << "Pre-RA Rematerialize";
661 break;
663 OS << "Max ILP Initial Schedule";
664 break;
665 }
666
667 return OS;
668}
669#endif
670
672 : DAG(DAG), S(static_cast<GCNSchedStrategy &>(*DAG.SchedImpl)), MF(DAG.MF),
673 MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {}
674
676 if (!DAG.LIS)
677 return false;
678
679 LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID << "\n");
680 return true;
681}
682
685 return false;
686
688 return false;
689
690 if (DAG.RegionsWithHighRP.none() && DAG.RegionsWithExcessRP.none())
691 return false;
692
695
696 InitialOccupancy = DAG.MinOccupancy;
697 // Aggressivly try to reduce register pressure in the unclustered high RP
698 // stage. Temporarily increase occupancy target in the region.
701 if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy)
702 MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
703
705 dbgs()
706 << "Retrying function scheduling without clustering. "
707 "Aggressivly try to reduce register pressure to achieve occupancy "
708 << DAG.MinOccupancy << ".\n");
709
710 return true;
711}
712
715 return false;
716
717 // Don't bother trying to improve ILP in lower RP regions if occupancy has not
718 // been dropped. All regions will have already been scheduled with the ideal
719 // occupancy targets.
720 if (DAG.StartingOccupancy <= DAG.MinOccupancy)
721 return false;
722
724 dbgs() << "Retrying function scheduling with lowest recorded occupancy "
725 << DAG.MinOccupancy << ".\n");
726 return true;
727}
728
731 return false;
732
733 if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1)
734 return false;
735
737 // Check maximum occupancy
739 DAG.MinOccupancy)
740 return false;
741
742 // FIXME: This pass will invalidate cached MBBLiveIns for regions
743 // inbetween the defs and region we sinked the def to. Cached pressure
744 // for regions where a def is sinked from will also be invalidated. Will
745 // need to be fixed if there is another pass after this pass.
747
748 collectRematerializableInstructions();
749 if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))
750 return false;
751
753 dbgs() << "Retrying function scheduling with improved occupancy of "
754 << DAG.MinOccupancy << " from rematerializing\n");
755 return true;
756}
757
760 LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n");
761}
762
766 if (DAG.MinOccupancy > InitialOccupancy) {
767 for (unsigned IDX = 0; IDX < DAG.Pressure.size(); ++IDX)
768 DAG.RegionsWithMinOcc[IDX] =
769 DAG.Pressure[IDX].getOccupancy(DAG.ST) == DAG.MinOccupancy;
770
772 << " stage successfully increased occupancy to "
773 << DAG.MinOccupancy << '\n');
774 }
775
777}
778
780 // Check whether this new region is also a new block.
781 if (DAG.RegionBegin->getParent() != CurrentMBB)
783
784 unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end());
785 DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs);
786
787 // Skip empty scheduling regions (0 or 1 schedulable instructions).
788 if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end()))
789 return false;
790
791 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
793 << " " << CurrentMBB->getName()
794 << "\n From: " << *DAG.begin() << " To: ";
796 else dbgs() << "End";
797 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
798
799 // Save original instruction order before scheduling for possible revert.
800 Unsched.clear();
801 Unsched.reserve(DAG.NumRegionInstrs);
804 for (auto &I : DAG) {
805 Unsched.push_back(&I);
806 if (I.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER ||
807 I.getOpcode() == AMDGPU::IGLP_OPT)
808 DAG.RegionsWithIGLPInstrs[RegionIdx] = true;
809 }
810 } else {
811 for (auto &I : DAG)
812 Unsched.push_back(&I);
813 }
814
815 PressureBefore = DAG.Pressure[RegionIdx];
816
818 dbgs() << "Pressure before scheduling:\nRegion live-ins:"
819 << print(DAG.LiveIns[RegionIdx], DAG.MRI)
820 << "Region live-in pressure: "
822 << "Region register pressure: " << print(PressureBefore));
823
824 S.HasHighPressure = false;
826
827 if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&
829 SavedMutations.clear();
832 }
833
834 return true;
835}
836
838 // Only reschedule regions with the minimum occupancy or regions that may have
839 // spilling (excess register pressure).
840 if ((!DAG.RegionsWithMinOcc[RegionIdx] ||
841 DAG.MinOccupancy <= InitialOccupancy) &&
842 !DAG.RegionsWithExcessRP[RegionIdx])
843 return false;
844
846}
847
849 // We may need to reschedule this region if it wasn't rescheduled in the last
850 // stage, or if we found it was testing critical register pressure limits in
851 // the unclustered reschedule stage. The later is because we may not have been
852 // able to raise the min occupancy in the previous stage so the region may be
853 // overly constrained even if it was already rescheduled.
854 if (!DAG.RegionsWithHighRP[RegionIdx])
855 return false;
856
858}
859
861 if (!DAG.RescheduleRegions[RegionIdx])
862 return false;
863
865}
866
868 if (CurrentMBB)
870
871 CurrentMBB = DAG.RegionBegin->getParent();
873 // Get real RP for the region if it hasn't be calculated before. After the
874 // initial schedule stage real RP will be collected after scheduling.
877 DAG.computeBlockPressure(RegionIdx, CurrentMBB);
878}
879
881 DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
882 DAG.RescheduleRegions[RegionIdx] = false;
883 if (S.HasHighPressure)
884 DAG.RegionsWithHighRP[RegionIdx] = true;
885
886 // Revert scheduling if we have dropped occupancy or there is some other
887 // reason that the original schedule is better.
889
890 if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&
893
894 DAG.exitRegion();
895 RegionIdx++;
896}
897
899 // Check the results of scheduling.
900 PressureAfter = DAG.getRealRegPressure(RegionIdx);
901 LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));
902 LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
903
906 DAG.Pressure[RegionIdx] = PressureAfter;
907 DAG.RegionsWithMinOcc[RegionIdx] =
908 PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
909
910 // Early out if we have achieve the occupancy target.
911 LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
912 return;
913 }
914
915 unsigned TargetOccupancy =
917 unsigned WavesAfter =
918 std::min(TargetOccupancy, PressureAfter.getOccupancy(ST));
919 unsigned WavesBefore =
920 std::min(TargetOccupancy, PressureBefore.getOccupancy(ST));
921 LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
922 << ", after " << WavesAfter << ".\n");
923
924 // We may not be able to keep the current target occupancy because of the just
925 // scheduled region. We might still be able to revert scheduling if the
926 // occupancy before was higher, or if the current schedule has register
927 // pressure higher than the excess limits which could lead to more spilling.
928 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
929
930 // Allow memory bound functions to drop to 4 waves if not limited by an
931 // attribute.
932 if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy &&
933 WavesAfter >= MFI.getMinAllowedOccupancy()) {
934 LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
935 << MFI.getMinAllowedOccupancy() << " waves\n");
936 NewOccupancy = WavesAfter;
937 }
938
939 if (NewOccupancy < DAG.MinOccupancy) {
940 DAG.MinOccupancy = NewOccupancy;
941 MFI.limitOccupancy(DAG.MinOccupancy);
942 DAG.RegionsWithMinOcc.reset();
943 LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
944 << DAG.MinOccupancy << ".\n");
945 }
946
947 unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
948 unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
949 if (PressureAfter.getVGPRNum(false) > MaxVGPRs ||
950 PressureAfter.getAGPRNum() > MaxVGPRs ||
951 PressureAfter.getSGPRNum() > MaxSGPRs) {
952 DAG.RescheduleRegions[RegionIdx] = true;
953 DAG.RegionsWithHighRP[RegionIdx] = true;
954 DAG.RegionsWithExcessRP[RegionIdx] = true;
955 }
956
957 // Revert if this region's schedule would cause a drop in occupancy or
958 // spilling.
959 if (shouldRevertScheduling(WavesAfter)) {
961 } else {
962 DAG.Pressure[RegionIdx] = PressureAfter;
963 DAG.RegionsWithMinOcc[RegionIdx] =
964 PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
965 }
966}
967
968unsigned
969GCNSchedStage::computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
970 DenseMap<unsigned, unsigned> &ReadyCycles,
971 const TargetSchedModel &SM) {
972 unsigned ReadyCycle = CurrCycle;
973 for (auto &D : SU.Preds) {
974 if (D.isAssignedRegDep()) {
975 MachineInstr *DefMI = D.getSUnit()->getInstr();
976 unsigned Latency = SM.computeInstrLatency(DefMI);
977 unsigned DefReady = ReadyCycles[DAG.getSUnit(DefMI)->NodeNum];
978 ReadyCycle = std::max(ReadyCycle, DefReady + Latency);
979 }
980 }
981 ReadyCycles[SU.NodeNum] = ReadyCycle;
982 return ReadyCycle;
983}
984
985#ifndef NDEBUG
987 bool operator()(std::pair<MachineInstr *, unsigned> A,
988 std::pair<MachineInstr *, unsigned> B) const {
989 return A.second < B.second;
990 }
991};
992
993static void printScheduleModel(std::set<std::pair<MachineInstr *, unsigned>,
994 EarlierIssuingCycle> &ReadyCycles) {
995 if (ReadyCycles.empty())
996 return;
997 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
998 dbgs() << "\n################## Schedule time ReadyCycles for MBB : " << BBNum
999 << " ##################\n# Cycle #\t\t\tInstruction "
1000 " "
1001 " \n";
1002 unsigned IPrev = 1;
1003 for (auto &I : ReadyCycles) {
1004 if (I.second > IPrev + 1)
1005 dbgs() << "****************************** BUBBLE OF " << I.second - IPrev
1006 << " CYCLES DETECTED ******************************\n\n";
1007 dbgs() << "[ " << I.second << " ] : " << *I.first << "\n";
1008 IPrev = I.second;
1009 }
1010}
1011#endif
1012
1014GCNSchedStage::getScheduleMetrics(const std::vector<SUnit> &InputSchedule) {
1015#ifndef NDEBUG
1016 std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>
1017 ReadyCyclesSorted;
1018#endif
1020 unsigned SumBubbles = 0;
1021 DenseMap<unsigned, unsigned> ReadyCycles;
1022 unsigned CurrCycle = 0;
1023 for (auto &SU : InputSchedule) {
1024 unsigned ReadyCycle =
1025 computeSUnitReadyCycle(SU, CurrCycle, ReadyCycles, SM);
1026 SumBubbles += ReadyCycle - CurrCycle;
1027#ifndef NDEBUG
1028 ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));
1029#endif
1030 CurrCycle = ++ReadyCycle;
1031 }
1032#ifndef NDEBUG
1033 LLVM_DEBUG(
1034 printScheduleModel(ReadyCyclesSorted);
1035 dbgs() << "\n\t"
1036 << "Metric: "
1037 << (SumBubbles
1038 ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1039 : 1)
1040 << "\n\n");
1041#endif
1042
1043 return ScheduleMetrics(CurrCycle, SumBubbles);
1044}
1045
1048#ifndef NDEBUG
1049 std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>
1050 ReadyCyclesSorted;
1051#endif
1053 unsigned SumBubbles = 0;
1054 DenseMap<unsigned, unsigned> ReadyCycles;
1055 unsigned CurrCycle = 0;
1056 for (auto &MI : DAG) {
1057 SUnit *SU = DAG.getSUnit(&MI);
1058 if (!SU)
1059 continue;
1060 unsigned ReadyCycle =
1061 computeSUnitReadyCycle(*SU, CurrCycle, ReadyCycles, SM);
1062 SumBubbles += ReadyCycle - CurrCycle;
1063#ifndef NDEBUG
1064 ReadyCyclesSorted.insert(std::make_pair(SU->getInstr(), ReadyCycle));
1065#endif
1066 CurrCycle = ++ReadyCycle;
1067 }
1068#ifndef NDEBUG
1069 LLVM_DEBUG(
1070 printScheduleModel(ReadyCyclesSorted);
1071 dbgs() << "\n\t"
1072 << "Metric: "
1073 << (SumBubbles
1074 ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1075 : 1)
1076 << "\n\n");
1077#endif
1078
1079 return ScheduleMetrics(CurrCycle, SumBubbles);
1080}
1081
1082bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {
1083 if (WavesAfter < DAG.MinOccupancy)
1084 return true;
1085
1086 return false;
1087}
1088
1091 return false;
1092
1094 return true;
1095
1096 if (mayCauseSpilling(WavesAfter))
1097 return true;
1098
1099 return false;
1100}
1101
1103 // If RP is not reduced in the unclustred reschedule stage, revert to the
1104 // old schedule.
1105 if ((WavesAfter <= PressureBefore.getOccupancy(ST) &&
1106 mayCauseSpilling(WavesAfter)) ||
1108 LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
1109 return true;
1110 }
1111
1112 // Do not attempt to relax schedule even more if we are already spilling.
1114 return false;
1115
1116 LLVM_DEBUG(
1117 dbgs()
1118 << "\n\t *** In shouldRevertScheduling ***\n"
1119 << " *********** BEFORE UnclusteredHighRPStage ***********\n");
1120 ScheduleMetrics MBefore =
1122 LLVM_DEBUG(
1123 dbgs()
1124 << "\n *********** AFTER UnclusteredHighRPStage ***********\n");
1126 unsigned OldMetric = MBefore.getMetric();
1127 unsigned NewMetric = MAfter.getMetric();
1128 unsigned WavesBefore =
1130 unsigned Profit =
1131 ((WavesAfter * ScheduleMetrics::ScaleFactor) / WavesBefore *
1133 NewMetric) /
1135 LLVM_DEBUG(dbgs() << "\tMetric before " << MBefore << "\tMetric after "
1136 << MAfter << "Profit: " << Profit << "\n");
1137 return Profit < ScheduleMetrics::ScaleFactor;
1138}
1139
1142 return false;
1143
1145 return true;
1146
1147 if (mayCauseSpilling(WavesAfter))
1148 return true;
1149
1150 return false;
1151}
1152
1155 return true;
1156
1157 if (mayCauseSpilling(WavesAfter))
1158 return true;
1159
1160 return false;
1161}
1162
1164 if (mayCauseSpilling(WavesAfter))
1165 return true;
1166
1167 return false;
1168}
1169
1170bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
1171 if (WavesAfter <= MFI.getMinWavesPerEU() &&
1174 LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
1175 return true;
1176 }
1177
1178 return false;
1179}
1180
1182 DAG.RegionsWithMinOcc[RegionIdx] =
1183 PressureBefore.getOccupancy(ST) == DAG.MinOccupancy;
1184 LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
1185 DAG.RescheduleRegions[RegionIdx] =
1186 S.hasNextStage() &&
1189 int SkippedDebugInstr = 0;
1190 for (MachineInstr *MI : Unsched) {
1191 if (MI->isDebugInstr()) {
1192 ++SkippedDebugInstr;
1193 continue;
1194 }
1195
1196 if (MI->getIterator() != DAG.RegionEnd) {
1197 DAG.BB->remove(MI);
1199 if (!MI->isDebugInstr())
1200 DAG.LIS->handleMove(*MI, true);
1201 }
1202
1203 // Reset read-undef flags and update them later.
1204 for (auto &Op : MI->operands())
1205 if (Op.isReg() && Op.isDef())
1206 Op.setIsUndef(false);
1207 RegisterOperands RegOpers;
1208 RegOpers.collect(*MI, *DAG.TRI, DAG.MRI, DAG.ShouldTrackLaneMasks, false);
1209 if (!MI->isDebugInstr()) {
1211 // Adjust liveness and add missing dead+read-undef flags.
1213 RegOpers.adjustLaneLiveness(*DAG.LIS, DAG.MRI, SlotIdx, MI);
1214 } else {
1215 // Adjust for missing dead-def flags.
1216 RegOpers.detectDeadDefs(*MI, *DAG.LIS);
1217 }
1218 }
1219 DAG.RegionEnd = MI->getIterator();
1220 ++DAG.RegionEnd;
1221 LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
1222 }
1223
1224 // After reverting schedule, debug instrs will now be at the end of the block
1225 // and RegionEnd will point to the first debug instr. Increment RegionEnd
1226 // pass debug instrs to the actual end of the scheduling region.
1227 while (SkippedDebugInstr-- > 0)
1228 ++DAG.RegionEnd;
1229
1230 // If Unsched.front() instruction is a debug instruction, this will actually
1231 // shrink the region since we moved all debug instructions to the end of the
1232 // block. Find the first instruction that is not a debug instruction.
1233 DAG.RegionBegin = Unsched.front()->getIterator();
1234 if (DAG.RegionBegin->isDebugInstr()) {
1235 for (MachineInstr *MI : Unsched) {
1236 if (MI->isDebugInstr())
1237 continue;
1238 DAG.RegionBegin = MI->getIterator();
1239 break;
1240 }
1241 }
1242
1243 // Then move the debug instructions back into their correct place and set
1244 // RegionBegin and RegionEnd if needed.
1246
1247 DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
1248}
1249
1250void PreRARematStage::collectRematerializableInstructions() {
1251 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
1252 for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
1254 if (!DAG.LIS->hasInterval(Reg))
1255 continue;
1256
1257 // TODO: Handle AGPR and SGPR rematerialization
1258 if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
1259 !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg))
1260 continue;
1261
1262 MachineOperand *Op = DAG.MRI.getOneDef(Reg);
1263 MachineInstr *Def = Op->getParent();
1264 if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
1265 continue;
1266
1268 if (Def->getParent() == UseI->getParent())
1269 continue;
1270
1271 // We are only collecting defs that are defined in another block and are
1272 // live-through or used inside regions at MinOccupancy. This means that the
1273 // register must be in the live-in set for the region.
1274 bool AddedToRematList = false;
1275 for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
1276 auto It = DAG.LiveIns[I].find(Reg);
1277 if (It != DAG.LiveIns[I].end() && !It->second.none()) {
1278 if (DAG.RegionsWithMinOcc[I]) {
1279 RematerializableInsts[I][Def] = UseI;
1280 AddedToRematList = true;
1281 }
1282
1283 // Collect regions with rematerializable reg as live-in to avoid
1284 // searching later when updating RP.
1285 RematDefToLiveInRegions[Def].push_back(I);
1286 }
1287 }
1288 if (!AddedToRematList)
1289 RematDefToLiveInRegions.erase(Def);
1290 }
1291}
1292
1293bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
1294 const TargetInstrInfo *TII) {
1295 // Temporary copies of cached variables we will be modifying and replacing if
1296 // sinking succeeds.
1298 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32>
1299 NewRegions;
1302 BitVector NewRescheduleRegions;
1303 LiveIntervals *LIS = DAG.LIS;
1304
1305 NewRegions.resize(DAG.Regions.size());
1306 NewRescheduleRegions.resize(DAG.Regions.size());
1307
1308 // Collect only regions that has a rematerializable def as a live-in.
1309 SmallSet<unsigned, 16> ImpactedRegions;
1310 for (const auto &It : RematDefToLiveInRegions)
1311 ImpactedRegions.insert(It.second.begin(), It.second.end());
1312
1313 // Make copies of register pressure and live-ins cache that will be updated
1314 // as we rematerialize.
1315 for (auto Idx : ImpactedRegions) {
1316 NewPressure[Idx] = DAG.Pressure[Idx];
1317 NewLiveIns[Idx] = DAG.LiveIns[Idx];
1318 }
1319 NewRegions = DAG.Regions;
1320 NewRescheduleRegions.reset();
1321
1323 bool Improved = false;
1324 for (auto I : ImpactedRegions) {
1325 if (!DAG.RegionsWithMinOcc[I])
1326 continue;
1327
1328 Improved = false;
1329 int VGPRUsage = NewPressure[I].getVGPRNum(ST.hasGFX90AInsts());
1330 int SGPRUsage = NewPressure[I].getSGPRNum();
1331
1332 // TODO: Handle occupancy drop due to AGPR and SGPR.
1333 // Check if cause of occupancy drop is due to VGPR usage and not SGPR.
1334 if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == DAG.MinOccupancy)
1335 break;
1336
1337 // The occupancy of this region could have been improved by a previous
1338 // iteration's sinking of defs.
1339 if (NewPressure[I].getOccupancy(ST) > DAG.MinOccupancy) {
1340 NewRescheduleRegions[I] = true;
1341 Improved = true;
1342 continue;
1343 }
1344
1345 // First check if we have enough trivially rematerializable instructions to
1346 // improve occupancy. Optimistically assume all instructions we are able to
1347 // sink decreased RP.
1348 int TotalSinkableRegs = 0;
1349 for (const auto &It : RematerializableInsts[I]) {
1350 MachineInstr *Def = It.first;
1351 Register DefReg = Def->getOperand(0).getReg();
1352 TotalSinkableRegs +=
1353 SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]);
1354 }
1355 int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
1356 unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink);
1357 // If in the most optimistic scenario, we cannot improve occupancy, then do
1358 // not attempt to sink any instructions.
1359 if (OptimisticOccupancy <= DAG.MinOccupancy)
1360 break;
1361
1362 unsigned ImproveOccupancy = 0;
1364 for (auto &It : RematerializableInsts[I]) {
1365 MachineInstr *Def = It.first;
1366 MachineBasicBlock::iterator InsertPos =
1367 MachineBasicBlock::iterator(It.second);
1368 Register Reg = Def->getOperand(0).getReg();
1369 // Rematerialize MI to its use block. Since we are only rematerializing
1370 // instructions that do not have any virtual reg uses, we do not need to
1371 // call LiveRangeEdit::allUsesAvailableAt() and
1372 // LiveRangeEdit::canRematerializeAt().
1373 TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
1374 Def->getOperand(0).getSubReg(), *Def, *DAG.TRI);
1375 MachineInstr *NewMI = &*std::prev(InsertPos);
1376 LIS->InsertMachineInstrInMaps(*NewMI);
1377 LIS->removeInterval(Reg);
1379 InsertedMIToOldDef[NewMI] = Def;
1380
1381 // Update region boundaries in scheduling region we sinked from since we
1382 // may sink an instruction that was at the beginning or end of its region
1383 DAG.updateRegionBoundaries(NewRegions, Def, /*NewMI =*/nullptr,
1384 /*Removing =*/true);
1385
1386 // Update region boundaries in region we sinked to.
1387 DAG.updateRegionBoundaries(NewRegions, InsertPos, NewMI);
1388
1389 LaneBitmask PrevMask = NewLiveIns[I][Reg];
1390 // FIXME: Also update cached pressure for where the def was sinked from.
1391 // Update RP for all regions that has this reg as a live-in and remove
1392 // the reg from all regions as a live-in.
1393 for (auto Idx : RematDefToLiveInRegions[Def]) {
1394 NewLiveIns[Idx].erase(Reg);
1395 if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) {
1396 // Def is live-through and not used in this block.
1397 NewPressure[Idx].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI);
1398 } else {
1399 // Def is used and rematerialized into this block.
1400 GCNDownwardRPTracker RPT(*LIS);
1401 auto *NonDbgMI = &*skipDebugInstructionsForward(
1402 NewRegions[Idx].first, NewRegions[Idx].second);
1403 RPT.reset(*NonDbgMI, &NewLiveIns[Idx]);
1404 RPT.advance(NewRegions[Idx].second);
1405 NewPressure[Idx] = RPT.moveMaxPressure();
1406 }
1407 }
1408
1409 SinkedDefs.push_back(Def);
1410 ImproveOccupancy = NewPressure[I].getOccupancy(ST);
1411 if (ImproveOccupancy > DAG.MinOccupancy)
1412 break;
1413 }
1414
1415 // Remove defs we just sinked from all regions' list of sinkable defs
1416 for (auto &Def : SinkedDefs)
1417 for (auto TrackedIdx : RematDefToLiveInRegions[Def])
1418 RematerializableInsts[TrackedIdx].erase(Def);
1419
1420 if (ImproveOccupancy <= DAG.MinOccupancy)
1421 break;
1422
1423 NewRescheduleRegions[I] = true;
1424 Improved = true;
1425 }
1426
1427 if (!Improved) {
1428 // Occupancy was not improved for all regions that were at MinOccupancy.
1429 // Undo sinking and remove newly rematerialized instructions.
1430 for (auto &Entry : InsertedMIToOldDef) {
1431 MachineInstr *MI = Entry.first;
1432 MachineInstr *OldMI = Entry.second;
1433 Register Reg = MI->getOperand(0).getReg();
1435 MI->eraseFromParent();
1436 OldMI->clearRegisterDeads(Reg);
1437 LIS->removeInterval(Reg);
1439 }
1440 return false;
1441 }
1442
1443 // Occupancy was improved for all regions.
1444 for (auto &Entry : InsertedMIToOldDef) {
1445 MachineInstr *MI = Entry.first;
1446 MachineInstr *OldMI = Entry.second;
1447
1448 // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
1449 DAG.BBLiveInMap.erase(OldMI);
1450
1451 // Remove OldMI and update LIS
1452 Register Reg = MI->getOperand(0).getReg();
1453 LIS->RemoveMachineInstrFromMaps(*OldMI);
1454 OldMI->eraseFromParent();
1455 LIS->removeInterval(Reg);
1457 }
1458
1459 // Update live-ins, register pressure, and regions caches.
1460 for (auto Idx : ImpactedRegions) {
1461 DAG.LiveIns[Idx] = NewLiveIns[Idx];
1462 DAG.Pressure[Idx] = NewPressure[Idx];
1463 DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent());
1464 }
1465 DAG.Regions = NewRegions;
1466 DAG.RescheduleRegions = NewRescheduleRegions;
1467
1469 MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
1470
1471 return true;
1472}
1473
1474// Copied from MachineLICM
1475bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
1477 return false;
1478
1479 for (const MachineOperand &MO : MI.operands())
1480 if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual())
1481 return false;
1482
1483 return true;
1484}
1485
1486// When removing, we will have to check both beginning and ending of the region.
1487// When inserting, we will only have to check if we are inserting NewMI in front
1488// of a scheduling region and do not need to check the ending since we will only
1489// ever be inserting before an already existing MI.
1490void GCNScheduleDAGMILive::updateRegionBoundaries(
1492 MachineBasicBlock::iterator>> &RegionBoundaries,
1493 MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing) {
1494 unsigned I = 0, E = RegionBoundaries.size();
1495 // Search for first region of the block where MI is located
1496 while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent())
1497 ++I;
1498
1499 for (; I != E; ++I) {
1500 if (MI->getParent() != RegionBoundaries[I].first->getParent())
1501 return;
1502
1503 if (Removing && MI == RegionBoundaries[I].first &&
1504 MI == RegionBoundaries[I].second) {
1505 // MI is in a region with size 1, after removing, the region will be
1506 // size 0, set RegionBegin and RegionEnd to pass end of block iterator.
1507 RegionBoundaries[I] =
1508 std::pair(MI->getParent()->end(), MI->getParent()->end());
1509 return;
1510 }
1511 if (MI == RegionBoundaries[I].first) {
1512 if (Removing)
1513 RegionBoundaries[I] =
1514 std::pair(std::next(MI), RegionBoundaries[I].second);
1515 else
1516 // Inserted NewMI in front of region, set new RegionBegin to NewMI
1517 RegionBoundaries[I] = std::pair(MachineBasicBlock::iterator(NewMI),
1518 RegionBoundaries[I].second);
1519 return;
1520 }
1521 if (Removing && MI == RegionBoundaries[I].second) {
1522 RegionBoundaries[I] = std::pair(RegionBoundaries[I].first, std::prev(MI));
1523 return;
1524 }
1525 }
1526}
1527
1529 return std::any_of(
1530 DAG->begin(), DAG->end(), [](MachineBasicBlock::iterator MI) {
1531 unsigned Opc = MI->getOpcode();
1532 return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT;
1533 });
1534}
1535
1537 MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
1538 bool RemoveKillFlags)
1539 : ScheduleDAGMI(C, std::move(S), RemoveKillFlags) {}
1540
1542 HasIGLPInstrs = hasIGLPInstrs(this);
1543 if (HasIGLPInstrs) {
1544 SavedMutations.clear();
1545 SavedMutations.swap(Mutations);
1547 }
1548
1550}
1551
1553 if (HasIGLPInstrs)
1554 SavedMutations.swap(Mutations);
1555
1557}
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static void printScheduleModel(std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG)
static cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustred-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustred high register pressure " "reduction scheduling stage."), cl::init(false))
static cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
if(VerifyEach)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
BitVector & reset()
Definition: BitVector.h:392
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341
BitVector & set()
Definition: BitVector.h:351
bool none() const
none - Returns true if none of the bits are set.
Definition: BitVector.h:188
bool shouldRevertScheduling(unsigned WavesAfter) override
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
virtual bool initGCNRegion()
GCNSchedStrategy & S
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
MachineFunction & MF
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
MachineBasicBlock * CurrentMBB
const GCNSubtarget & ST
This is a minimal scheduler strategy.
const unsigned HighRPSGPRBias
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand)
SUnit * pickNodeBidirectional(bool &IsTopNode)
std::vector< unsigned > MaxPressure
GCNSchedStageID getCurrentStage()
MachineFunction * MF
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure)
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
const unsigned HighRPVGPRBias
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
bool hasGFX90AInsts() const
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:228
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
void traceCandidate(const SchedCandidate &Cand)
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
const TargetSchedModel * SchedModel
const MachineSchedContext * Context
const TargetRegisterInfo * TRI
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
SchedCandidate BotCand
Candidate last picked from Bot boundary.
SchedCandidate TopCand
Candidate last picked from Top boundary.
MachineSchedPolicy RegionPolicy
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
ScheduleDAGMILive * DAG
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
bool shouldRevertScheduling(unsigned WavesAfter) override
bool hasInterval(Register Reg) const
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndexes * getSlotIndexes() const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
void removeInterval(Register Reg)
Interval removal.
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
unsigned succ_size() const
MachineInstrBundleIterator< MachineInstr > iterator
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
Definition: MachineInstr.h:68
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:320
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void clearRegisterDeads(Register Reg)
Clear all dead flags on operands defining register Reg.
MachineOperand class - Representation of each machine instruction operand.
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineOperand * getOneDef(Register Reg) const
Returns the defining operand if there is exactly one operand defining the specified register,...
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
bool hasOneDef(Register RegNo) const
Return true if there is exactly one operand defining the specified register.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
bool initGCNSchedStage() override
Capture a change in pressure for a single pressure set.
void setUnitInc(int Inc)
Helpers for implementing custom MachineSchedStrategy classes.
bool empty() const
Track the current register pressure at some position in the instruction stream, and remember the high...
void advance()
Advance across the current instruction.
void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
const TargetSchedModel & getSchedModel() const
Definition: SIInstrInfo.h:1227
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
void increaseOccupancy(const MachineFunction &MF, unsigned Limit)
void limitOccupancy(const MachineFunction &MF)
static unsigned getNumCoveredRegs(LaneBitmask LM)
static bool isVGPRClass(const TargetRegisterClass *RC)
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
bool isBottomReady() const
Definition: ScheduleDAG.h:449
bool isTopReady() const
Definition: ScheduleDAG.h:446
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
Each Scheduling boundary is associated with ready queues.
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
ScheduleDAGMI * DAG
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
A ScheduleDAG for scheduling lists of MachineInstr.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
MachineBasicBlock * BB
The block in which to insert instructions.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
const RegPressureTracker & getBotRPTracker() const
bool isTrackingPressure() const
Return true if register pressure tracking is enabled.
const RegPressureTracker & getTopRPTracker() const
RegPressureTracker RPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
MachineBasicBlock::iterator top() const
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
MachineBasicBlock::iterator bottom() const
void finishBlock() override
Cleans up after scheduling in the given block.
LiveIntervals * LIS
const SUnit * getNextClusterPred() const
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
const SUnit * getNextClusterSucc() const
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:560
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:557
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:561
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:558
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:559
static const unsigned ScaleFactor
unsigned getMetric() const
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:82
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:259
SlotIndexes pass.
Definition: SlotIndexes.h:319
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition: SlotIndexes.h:471
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void resize(size_type N)
Definition: SmallVector.h:642
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
TargetInstrInfo - Interface to description of machine instruction set.
bool isTriviallyReMaterializable(const MachineInstr &MI) const
Return true if the instruction is trivially rematerializable, meaning it has no side effects and requ...
Provide an instruction scheduling machine model to CodeGen passes.
virtual const TargetInstrInfo * getInstrInfo() const
bool shouldRevertScheduling(unsigned WavesAfter) override
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
unsigned getWeakLeft(const SUnit *SU, bool isTop)
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
cl::opt< bool > VerifyScheduling
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1946
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:533
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Definition: BitVector.h:858
bool operator()(std::pair< MachineInstr *, unsigned > A, std::pair< MachineInstr *, unsigned > B) const
unsigned getOccupancy(const GCNSubtarget &ST) const
unsigned getVGPRNum(bool UnifiedVGPRFile) const
unsigned getAGPRNum() const
unsigned getSGPRNum() const
bool less(const GCNSubtarget &ST, const GCNRegPressure &O, unsigned MaxOccupancy=std::numeric_limits< unsigned >::max()) const
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void reset(const CandPolicy &NewPolicy)
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Status of an instruction's critical resource consumption.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:81
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
RegisterClassInfo * RegClassInfo
PressureChange CriticalMax