LLVM 20.0.0git
GCNSchedStrategy.cpp
Go to the documentation of this file.
1//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This contains a MachineSchedStrategy implementation for maximizing wave
11/// occupancy on GCN hardware.
12///
13/// This pass will apply multiple scheduling stages to the same function.
14/// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual
15/// entry point for the scheduling of those regions is
16/// GCNScheduleDAGMILive::runSchedStages.
17
18/// Generally, the reason for having multiple scheduling stages is to account
19/// for the kernel-wide effect of register usage on occupancy. Usually, only a
20/// few scheduling regions will have register pressure high enough to limit
21/// occupancy for the kernel, so constraints can be relaxed to improve ILP in
22/// other regions.
23///
24//===----------------------------------------------------------------------===//
25
26#include "GCNSchedStrategy.h"
27#include "AMDGPUIGroupLP.h"
30
31#define DEBUG_TYPE "machine-scheduler"
32
33using namespace llvm;
34
36 "amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden,
37 cl::desc("Disable unclustered high register pressure "
38 "reduction scheduling stage."),
39 cl::init(false));
40
42 "amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden,
43 cl::desc("Disable clustered low occupancy "
44 "rescheduling for ILP scheduling stage."),
45 cl::init(false));
46
48 "amdgpu-schedule-metric-bias", cl::Hidden,
50 "Sets the bias which adds weight to occupancy vs latency. Set it to "
51 "100 to chase the occupancy only."),
52 cl::init(10));
53
54static cl::opt<bool>
55 RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden,
56 cl::desc("Relax occupancy targets for kernels which are memory "
57 "bound (amdgpu-membound-threshold), or "
58 "Wave Limited (amdgpu-limit-wave-threshold)."),
59 cl::init(false));
60
61const unsigned ScheduleMetrics::ScaleFactor = 100;
62
64 : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
65 HasHighPressure(false) {}
66
69
70 MF = &DAG->MF;
71
73
75 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
77 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
78
80 // Set the initial TargetOccupnacy to the maximum occupancy that we can
81 // achieve for this function. This effectively sets a lower bound on the
82 // 'Critical' register limits in the scheduler.
83 // Allow for lower occupancy targets if kernel is wave limited or memory
84 // bound, and using the relaxed occupancy feature.
88 std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit);
89
90 if (!KnownExcessRP) {
92 std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit);
93 } else {
94 // This is similar to ST.getMaxNumVGPRs(TargetOccupancy) result except
95 // returns a reasonably small number for targets with lots of VGPRs, such
96 // as GFX10 and GFX11.
97 LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative "
98 "VGPRCriticalLimit calculation method.\n");
99
100 unsigned Granule = AMDGPU::IsaInfo::getVGPRAllocGranule(&ST);
101 unsigned Addressable = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST);
102 unsigned VGPRBudget = alignDown(Addressable / TargetOccupancy, Granule);
103 VGPRBudget = std::max(VGPRBudget, Granule);
104 VGPRCriticalLimit = std::min(VGPRBudget, VGPRExcessLimit);
105 }
106
107 // Subtract error margin and bias from register limits and avoid overflow.
112
113 LLVM_DEBUG(dbgs() << "VGPRCriticalLimit = " << VGPRCriticalLimit
114 << ", VGPRExcessLimit = " << VGPRExcessLimit
115 << ", SGPRCriticalLimit = " << SGPRCriticalLimit
116 << ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n");
117}
118
119/// Checks whether \p SU can use the cached DAG pressure diffs to compute the
120/// current register pressure.
121///
122/// This works for the common case, but it has a few exceptions that have been
123/// observed through trial and error:
124/// - Explicit physical register operands
125/// - Subregister definitions
126///
127/// In both of those cases, PressureDiff doesn't represent the actual pressure,
128/// and querying LiveIntervals through the RegPressureTracker is needed to get
129/// an accurate value.
130///
131/// We should eventually only use PressureDiff for maximum performance, but this
132/// already allows 80% of SUs to take the fast path without changing scheduling
133/// at all. Further changes would either change scheduling, or require a lot
134/// more logic to recover an accurate pressure estimate from the PressureDiffs.
135static bool canUsePressureDiffs(const SUnit &SU) {
136 if (!SU.isInstr())
137 return false;
138
139 // Cannot use pressure diffs for subregister defs or with physregs, it's
140 // imprecise in both cases.
141 for (const auto &Op : SU.getInstr()->operands()) {
142 if (!Op.isReg() || Op.isImplicit())
143 continue;
144 if (Op.getReg().isPhysical() ||
145 (Op.isDef() && Op.getSubReg() != AMDGPU::NoSubRegister))
146 return false;
147 }
148 return true;
149}
150
151static void getRegisterPressures(bool AtTop,
152 const RegPressureTracker &RPTracker, SUnit *SU,
153 std::vector<unsigned> &Pressure,
154 std::vector<unsigned> &MaxPressure) {
155 // getDownwardPressure() and getUpwardPressure() make temporary changes to
156 // the tracker, so we need to pass those function a non-const copy.
157 RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
158 if (AtTop)
159 TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
160 else
161 TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
162}
163
165 bool AtTop,
166 const RegPressureTracker &RPTracker,
167 const SIRegisterInfo *SRI,
168 unsigned SGPRPressure,
169 unsigned VGPRPressure, bool IsBottomUp) {
170 Cand.SU = SU;
171 Cand.AtTop = AtTop;
172
173 if (!DAG->isTrackingPressure())
174 return;
175
176 Pressure.clear();
177 MaxPressure.clear();
178
179 // We try to use the cached PressureDiffs in the ScheduleDAG whenever
180 // possible over querying the RegPressureTracker.
181 //
182 // RegPressureTracker will make a lot of LIS queries which are very
183 // expensive, it is considered a slow function in this context.
184 //
185 // PressureDiffs are precomputed and cached, and getPressureDiff is just a
186 // trivial lookup into an array. It is pretty much free.
187 //
188 // In EXPENSIVE_CHECKS, we always query RPTracker to verify the results of
189 // PressureDiffs.
190 if (AtTop || !canUsePressureDiffs(*SU)) {
191 getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure);
192 } else {
193 // Reserve 4 slots.
194 Pressure.resize(4, 0);
195 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
196 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
197
198 for (const auto &Diff : DAG->getPressureDiff(SU)) {
199 if (!Diff.isValid())
200 continue;
201 // PressureDiffs is always bottom-up so if we're working top-down we need
202 // to invert its sign.
203 Pressure[Diff.getPSet()] +=
204 (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());
205 }
206
207#ifdef EXPENSIVE_CHECKS
208 std::vector<unsigned> CheckPressure, CheckMaxPressure;
209 getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure);
210 if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
211 CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
212 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
213 CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
214 errs() << "Register Pressure is inaccurate when calculated through "
215 "PressureDiff\n"
216 << "SGPR got " << Pressure[AMDGPU::RegisterPressureSets::SReg_32]
217 << ", expected "
218 << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] << "\n"
219 << "VGPR got " << Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
220 << ", expected "
221 << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n";
222 report_fatal_error("inaccurate register pressure calculation");
223 }
224#endif
225 }
226
227 unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
228 unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
229
230 // If two instructions increase the pressure of different register sets
231 // by the same amount, the generic scheduler will prefer to schedule the
232 // instruction that increases the set with the least amount of registers,
233 // which in our case would be SGPRs. This is rarely what we want, so
234 // when we report excess/critical register pressure, we do it either
235 // only for VGPRs or only for SGPRs.
236
237 // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
238 const unsigned MaxVGPRPressureInc = 16;
239 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
240 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
241
242 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
243 // to increase the likelihood we don't go over the limits. We should improve
244 // the analysis to look through dependencies to find the path with the least
245 // register pressure.
246
247 // We only need to update the RPDelta for instructions that increase register
248 // pressure. Instructions that decrease or keep reg pressure the same will be
249 // marked as RegExcess in tryCandidate() when they are compared with
250 // instructions that increase the register pressure.
251 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
252 HasHighPressure = true;
253 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
254 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
255 }
256
257 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
258 HasHighPressure = true;
259 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
260 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
261 }
262
263 // Register pressure is considered 'CRITICAL' if it is approaching a value
264 // that would reduce the wave occupancy for the execution unit. When
265 // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both
266 // has the same cost, so we don't need to prefer one over the other.
267
268 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
269 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
270
271 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
272 HasHighPressure = true;
273 if (SGPRDelta > VGPRDelta) {
274 Cand.RPDelta.CriticalMax =
275 PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
276 Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
277 } else {
278 Cand.RPDelta.CriticalMax =
279 PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
280 Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
281 }
282 }
283}
284
285// This function is mostly cut and pasted from
286// GenericScheduler::pickNodeFromQueue()
288 const CandPolicy &ZonePolicy,
289 const RegPressureTracker &RPTracker,
290 SchedCandidate &Cand,
291 bool IsBottomUp) {
292 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
294 unsigned SGPRPressure = 0;
295 unsigned VGPRPressure = 0;
296 if (DAG->isTrackingPressure()) {
297 SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
298 VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
299 }
300 ReadyQueue &Q = Zone.Available;
301 for (SUnit *SU : Q) {
302
303 SchedCandidate TryCand(ZonePolicy);
304 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,
305 VGPRPressure, IsBottomUp);
306 // Pass SchedBoundary only when comparing nodes from the same boundary.
307 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
308 tryCandidate(Cand, TryCand, ZoneArg);
309 if (TryCand.Reason != NoCand) {
310 // Initialize resource delta if needed in case future heuristics query it.
311 if (TryCand.ResDelta == SchedResourceDelta())
312 TryCand.initResourceDelta(Zone.DAG, SchedModel);
313 Cand.setBest(TryCand);
315 }
316 }
317}
318
319// This function is mostly cut and pasted from
320// GenericScheduler::pickNodeBidirectional()
322 // Schedule as far as possible in the direction of no choice. This is most
323 // efficient, but also provides the best heuristics for CriticalPSets.
324 if (SUnit *SU = Bot.pickOnlyChoice()) {
325 IsTopNode = false;
326 return SU;
327 }
328 if (SUnit *SU = Top.pickOnlyChoice()) {
329 IsTopNode = true;
330 return SU;
331 }
332 // Set the bottom-up policy based on the state of the current bottom zone and
333 // the instructions outside the zone, including the top zone.
334 CandPolicy BotPolicy;
335 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
336 // Set the top-down policy based on the state of the current top zone and
337 // the instructions outside the zone, including the bottom zone.
338 CandPolicy TopPolicy;
339 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
340
341 // See if BotCand is still valid (because we previously scheduled from Top).
342 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
343 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
344 BotCand.Policy != BotPolicy) {
347 /*IsBottomUp=*/true);
348 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
349 } else {
351#ifndef NDEBUG
352 if (VerifyScheduling) {
353 SchedCandidate TCand;
354 TCand.reset(CandPolicy());
355 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand,
356 /*IsBottomUp=*/true);
357 assert(TCand.SU == BotCand.SU &&
358 "Last pick result should correspond to re-picking right now");
359 }
360#endif
361 }
362
363 // Check if the top Q has a better candidate.
364 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
365 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
366 TopCand.Policy != TopPolicy) {
369 /*IsBottomUp=*/false);
370 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
371 } else {
373#ifndef NDEBUG
374 if (VerifyScheduling) {
375 SchedCandidate TCand;
376 TCand.reset(CandPolicy());
377 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand,
378 /*IsBottomUp=*/false);
379 assert(TCand.SU == TopCand.SU &&
380 "Last pick result should correspond to re-picking right now");
381 }
382#endif
383 }
384
385 // Pick best from BotCand and TopCand.
386 LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
387 dbgs() << "Bot Cand: "; traceCandidate(BotCand););
388 SchedCandidate Cand = BotCand;
390 tryCandidate(Cand, TopCand, nullptr);
391 if (TopCand.Reason != NoCand) {
392 Cand.setBest(TopCand);
393 }
394 LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
395
396 IsTopNode = Cand.AtTop;
397 return Cand.SU;
398}
399
400// This function is mostly cut and pasted from
401// GenericScheduler::pickNode()
403 if (DAG->top() == DAG->bottom()) {
405 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
406 return nullptr;
407 }
408 SUnit *SU;
409 do {
411 SU = Top.pickOnlyChoice();
412 if (!SU) {
413 CandPolicy NoPolicy;
414 TopCand.reset(NoPolicy);
416 /*IsBottomUp=*/false);
417 assert(TopCand.Reason != NoCand && "failed to find a candidate");
418 SU = TopCand.SU;
419 }
420 IsTopNode = true;
421 } else if (RegionPolicy.OnlyBottomUp) {
422 SU = Bot.pickOnlyChoice();
423 if (!SU) {
424 CandPolicy NoPolicy;
425 BotCand.reset(NoPolicy);
427 /*IsBottomUp=*/true);
428 assert(BotCand.Reason != NoCand && "failed to find a candidate");
429 SU = BotCand.SU;
430 }
431 IsTopNode = false;
432 } else {
433 SU = pickNodeBidirectional(IsTopNode);
434 }
435 } while (SU->isScheduled);
436
437 if (SU->isTopReady())
438 Top.removeReady(SU);
439 if (SU->isBottomReady())
440 Bot.removeReady(SU);
441
442 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
443 << *SU->getInstr());
444 return SU;
445}
446
449 return *CurrentStage;
450}
451
454 if (!CurrentStage)
456 else
457 CurrentStage++;
458
459 return CurrentStage != SchedStages.end();
460}
461
464 return std::next(CurrentStage) != SchedStages.end();
465}
466
468 assert(CurrentStage && std::next(CurrentStage) != SchedStages.end());
469 return *std::next(CurrentStage);
470}
471
473 const MachineSchedContext *C)
479}
480
484}
485
487 SchedCandidate &TryCand,
488 SchedBoundary *Zone) const {
489 // Initialize the candidate if needed.
490 if (!Cand.isValid()) {
491 TryCand.Reason = NodeOrder;
492 return true;
493 }
494
495 // Avoid spilling by exceeding the register limit.
496 if (DAG->isTrackingPressure() &&
497 tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
498 RegExcess, TRI, DAG->MF))
499 return TryCand.Reason != NoCand;
500
501 // Bias PhysReg Defs and copies to their uses and defined respectively.
502 if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
503 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
504 return TryCand.Reason != NoCand;
505
506 bool SameBoundary = Zone != nullptr;
507 if (SameBoundary) {
508 // Prioritize instructions that read unbuffered resources by stall cycles.
509 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
510 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
511 return TryCand.Reason != NoCand;
512
513 // Avoid critical resource consumption and balance the schedule.
516 TryCand, Cand, ResourceReduce))
517 return TryCand.Reason != NoCand;
519 Cand.ResDelta.DemandedResources, TryCand, Cand,
521 return TryCand.Reason != NoCand;
522
523 // Unconditionally try to reduce latency.
524 if (tryLatency(TryCand, Cand, *Zone))
525 return TryCand.Reason != NoCand;
526
527 // Weak edges are for clustering and other constraints.
528 if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
529 getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))
530 return TryCand.Reason != NoCand;
531 }
532
533 // Keep clustered nodes together to encourage downstream peephole
534 // optimizations which may reduce resource requirements.
535 //
536 // This is a best effort to set things up for a post-RA pass. Optimizations
537 // like generating loads of multiple registers should ideally be done within
538 // the scheduler pass by combining the loads during DAG postprocessing.
539 const SUnit *CandNextClusterSU =
541 const SUnit *TryCandNextClusterSU =
543 if (tryGreater(TryCand.SU == TryCandNextClusterSU,
544 Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
545 return TryCand.Reason != NoCand;
546
547 // Avoid increasing the max critical pressure in the scheduled region.
548 if (DAG->isTrackingPressure() &&
550 TryCand, Cand, RegCritical, TRI, DAG->MF))
551 return TryCand.Reason != NoCand;
552
553 // Avoid increasing the max pressure of the entire region.
554 if (DAG->isTrackingPressure() &&
555 tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,
556 Cand, RegMax, TRI, DAG->MF))
557 return TryCand.Reason != NoCand;
558
559 if (SameBoundary) {
560 // Fall through to original instruction order.
561 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||
562 (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
563 TryCand.Reason = NodeOrder;
564 return true;
565 }
566 }
567 return false;
568}
569
571 MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)
572 : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),
573 MFI(*MF.getInfo<SIMachineFunctionInfo>()),
574 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) {
575
576 LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
577 if (RelaxedOcc) {
578 MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy);
579 if (MinOccupancy != StartingOccupancy)
580 LLVM_DEBUG(dbgs() << "Allowing Occupancy drops to " << MinOccupancy
581 << ".\n");
582 }
583}
584
585std::unique_ptr<GCNSchedStage>
586GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {
587 switch (SchedStageID) {
589 return std::make_unique<OccInitialScheduleStage>(SchedStageID, *this);
591 return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *this);
593 return std::make_unique<ClusteredLowOccStage>(SchedStageID, *this);
595 return std::make_unique<PreRARematStage>(SchedStageID, *this);
597 return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *this);
598 }
599
600 llvm_unreachable("Unknown SchedStageID.");
601}
602
604 // Collect all scheduling regions. The actual scheduling is performed in
605 // GCNScheduleDAGMILive::finalizeSchedule.
606 Regions.push_back(std::pair(RegionBegin, RegionEnd));
607}
608
610GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
612 RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]);
613 return RPTracker.moveMaxPressure();
614}
615
616void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
617 const MachineBasicBlock *MBB) {
619
620 // If the block has the only successor then live-ins of that successor are
621 // live-outs of the current block. We can reuse calculated live set if the
622 // successor will be sent to scheduling past current block.
623
624 // However, due to the bug in LiveInterval analysis it may happen that two
625 // predecessors of the same successor block have different lane bitmasks for
626 // a live-out register. Workaround that by sticking to one-to-one relationship
627 // i.e. one predecessor with one successor block.
628 const MachineBasicBlock *OnlySucc = nullptr;
629 if (MBB->succ_size() == 1) {
630 auto *Candidate = *MBB->succ_begin();
631 if (!Candidate->empty() && Candidate->pred_size() == 1) {
633 if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(Candidate))
634 OnlySucc = Candidate;
635 }
636 }
637
638 // Scheduler sends regions from the end of the block upwards.
639 size_t CurRegion = RegionIdx;
640 for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)
641 if (Regions[CurRegion].first->getParent() != MBB)
642 break;
643 --CurRegion;
644
645 auto I = MBB->begin();
646 auto LiveInIt = MBBLiveIns.find(MBB);
647 auto &Rgn = Regions[CurRegion];
648 auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
649 if (LiveInIt != MBBLiveIns.end()) {
650 auto LiveIn = std::move(LiveInIt->second);
651 RPTracker.reset(*MBB->begin(), &LiveIn);
652 MBBLiveIns.erase(LiveInIt);
653 } else {
654 I = Rgn.first;
655 auto LRS = BBLiveInMap.lookup(NonDbgMI);
656#ifdef EXPENSIVE_CHECKS
657 assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
658#endif
659 RPTracker.reset(*I, &LRS);
660 }
661
662 for (;;) {
663 I = RPTracker.getNext();
664
665 if (Regions[CurRegion].first == I || NonDbgMI == I) {
666 LiveIns[CurRegion] = RPTracker.getLiveRegs();
667 RPTracker.clearMaxPressure();
668 }
669
670 if (Regions[CurRegion].second == I) {
671 Pressure[CurRegion] = RPTracker.moveMaxPressure();
672 if (CurRegion-- == RegionIdx)
673 break;
674 }
675 RPTracker.advanceToNext();
676 RPTracker.advanceBeforeNext();
677 }
678
679 if (OnlySucc) {
680 if (I != MBB->end()) {
681 RPTracker.advanceToNext();
683 }
684 RPTracker.advanceBeforeNext();
685 MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
686 }
687}
688
690GCNScheduleDAGMILive::getBBLiveInMap() const {
691 assert(!Regions.empty());
692 std::vector<MachineInstr *> BBStarters;
693 BBStarters.reserve(Regions.size());
694 auto I = Regions.rbegin(), E = Regions.rend();
695 auto *BB = I->first->getParent();
696 do {
697 auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
698 BBStarters.push_back(MI);
699 do {
700 ++I;
701 } while (I != E && I->first->getParent() == BB);
702 } while (I != E);
703 return getLiveRegMap(BBStarters, false /*After*/, *LIS);
704}
705
707 // Start actual scheduling here. This function is called by the base
708 // MachineScheduler after all regions have been recorded by
709 // GCNScheduleDAGMILive::schedule().
710 LiveIns.resize(Regions.size());
711 Pressure.resize(Regions.size());
712 RescheduleRegions.resize(Regions.size());
713 RegionsWithHighRP.resize(Regions.size());
714 RegionsWithExcessRP.resize(Regions.size());
715 RegionsWithMinOcc.resize(Regions.size());
716 RegionsWithIGLPInstrs.resize(Regions.size());
717 RescheduleRegions.set();
718 RegionsWithHighRP.reset();
719 RegionsWithExcessRP.reset();
720 RegionsWithMinOcc.reset();
721 RegionsWithIGLPInstrs.reset();
722
723 runSchedStages();
724}
725
726void GCNScheduleDAGMILive::runSchedStages() {
727 LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
728
729 if (!Regions.empty())
730 BBLiveInMap = getBBLiveInMap();
731
732 GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
733 while (S.advanceStage()) {
734 auto Stage = createSchedStage(S.getCurrentStage());
735 if (!Stage->initGCNSchedStage())
736 continue;
737
738 for (auto Region : Regions) {
739 RegionBegin = Region.first;
740 RegionEnd = Region.second;
741 // Setup for scheduling the region and check whether it should be skipped.
742 if (!Stage->initGCNRegion()) {
743 Stage->advanceRegion();
744 exitRegion();
745 continue;
746 }
747
749 Stage->finalizeGCNRegion();
750 }
751
752 Stage->finalizeGCNSchedStage();
753 }
754}
755
756#ifndef NDEBUG
758 switch (StageID) {
760 OS << "Max Occupancy Initial Schedule";
761 break;
763 OS << "Unclustered High Register Pressure Reschedule";
764 break;
766 OS << "Clustered Low Occupancy Reschedule";
767 break;
769 OS << "Pre-RA Rematerialize";
770 break;
772 OS << "Max ILP Initial Schedule";
773 break;
774 }
775
776 return OS;
777}
778#endif
779
781 : DAG(DAG), S(static_cast<GCNSchedStrategy &>(*DAG.SchedImpl)), MF(DAG.MF),
782 MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {}
783
785 if (!DAG.LIS)
786 return false;
787
788 LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID << "\n");
789 return true;
790}
791
794 return false;
795
797 return false;
798
799 if (DAG.RegionsWithHighRP.none() && DAG.RegionsWithExcessRP.none())
800 return false;
801
805
806 InitialOccupancy = DAG.MinOccupancy;
807 // Aggressivly try to reduce register pressure in the unclustered high RP
808 // stage. Temporarily increase occupancy target in the region.
811 if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy)
812 MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
813
815 dbgs()
816 << "Retrying function scheduling without clustering. "
817 "Aggressivly try to reduce register pressure to achieve occupancy "
818 << DAG.MinOccupancy << ".\n");
819
820 return true;
821}
822
825 return false;
826
828 return false;
829
830 // Don't bother trying to improve ILP in lower RP regions if occupancy has not
831 // been dropped. All regions will have already been scheduled with the ideal
832 // occupancy targets.
833 if (DAG.StartingOccupancy <= DAG.MinOccupancy)
834 return false;
835
837 dbgs() << "Retrying function scheduling with lowest recorded occupancy "
838 << DAG.MinOccupancy << ".\n");
839 return true;
840}
841
844 return false;
845
846 if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1)
847 return false;
848
850 // Check maximum occupancy
852 DAG.MinOccupancy)
853 return false;
854
855 // FIXME: This pass will invalidate cached MBBLiveIns for regions
856 // inbetween the defs and region we sinked the def to. Cached pressure
857 // for regions where a def is sinked from will also be invalidated. Will
858 // need to be fixed if there is another pass after this pass.
860
861 collectRematerializableInstructions();
862 if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))
863 return false;
864
866 dbgs() << "Retrying function scheduling with improved occupancy of "
867 << DAG.MinOccupancy << " from rematerializing\n");
868 return true;
869}
870
873 LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n");
874}
875
879 if (DAG.MinOccupancy > InitialOccupancy) {
880 for (unsigned IDX = 0; IDX < DAG.Pressure.size(); ++IDX)
881 DAG.RegionsWithMinOcc[IDX] =
882 DAG.Pressure[IDX].getOccupancy(DAG.ST) == DAG.MinOccupancy;
883
885 << " stage successfully increased occupancy to "
886 << DAG.MinOccupancy << '\n');
887 }
888
890}
891
893 // Check whether this new region is also a new block.
894 if (DAG.RegionBegin->getParent() != CurrentMBB)
896
897 unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end());
898 DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs);
899
900 // Skip empty scheduling regions (0 or 1 schedulable instructions).
901 if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end()))
902 return false;
903
904 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
906 << " " << CurrentMBB->getName()
907 << "\n From: " << *DAG.begin() << " To: ";
909 else dbgs() << "End";
910 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
911
912 // Save original instruction order before scheduling for possible revert.
913 Unsched.clear();
914 Unsched.reserve(DAG.NumRegionInstrs);
917 for (auto &I : DAG) {
918 Unsched.push_back(&I);
919 if (I.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER ||
920 I.getOpcode() == AMDGPU::IGLP_OPT)
921 DAG.RegionsWithIGLPInstrs[RegionIdx] = true;
922 }
923 } else {
924 for (auto &I : DAG)
925 Unsched.push_back(&I);
926 }
927
928 PressureBefore = DAG.Pressure[RegionIdx];
929
931 dbgs() << "Pressure before scheduling:\nRegion live-ins:"
932 << print(DAG.LiveIns[RegionIdx], DAG.MRI)
933 << "Region live-in pressure: "
935 << "Region register pressure: " << print(PressureBefore));
936
937 S.HasHighPressure = false;
939
940 if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&
942 SavedMutations.clear();
944 bool IsInitialStage = StageID == GCNSchedStageID::OccInitialSchedule ||
947 IsInitialStage ? AMDGPU::SchedulingPhase::Initial
949 }
950
951 return true;
952}
953
955 // Only reschedule regions with the minimum occupancy or regions that may have
956 // spilling (excess register pressure).
957 if ((!DAG.RegionsWithMinOcc[RegionIdx] ||
958 DAG.MinOccupancy <= InitialOccupancy) &&
959 !DAG.RegionsWithExcessRP[RegionIdx])
960 return false;
961
963}
964
966 // We may need to reschedule this region if it wasn't rescheduled in the last
967 // stage, or if we found it was testing critical register pressure limits in
968 // the unclustered reschedule stage. The later is because we may not have been
969 // able to raise the min occupancy in the previous stage so the region may be
970 // overly constrained even if it was already rescheduled.
971 if (!DAG.RegionsWithHighRP[RegionIdx])
972 return false;
973
975}
976
978 if (!DAG.RescheduleRegions[RegionIdx])
979 return false;
980
982}
983
985 if (CurrentMBB)
987
988 CurrentMBB = DAG.RegionBegin->getParent();
990 // Get real RP for the region if it hasn't be calculated before. After the
991 // initial schedule stage real RP will be collected after scheduling.
994 DAG.computeBlockPressure(RegionIdx, CurrentMBB);
995}
996
998 DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
999 DAG.RescheduleRegions[RegionIdx] = false;
1000 if (S.HasHighPressure)
1001 DAG.RegionsWithHighRP[RegionIdx] = true;
1002
1003 // Revert scheduling if we have dropped occupancy or there is some other
1004 // reason that the original schedule is better.
1006
1007 if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&
1010
1011 DAG.exitRegion();
1012 RegionIdx++;
1013}
1014
1016 // Check the results of scheduling.
1017 PressureAfter = DAG.getRealRegPressure(RegionIdx);
1018 LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));
1019 LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
1020
1023 DAG.Pressure[RegionIdx] = PressureAfter;
1024 DAG.RegionsWithMinOcc[RegionIdx] =
1025 PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
1026
1027 // Early out if we have achieved the occupancy target.
1028 LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
1029 return;
1030 }
1031
1032 unsigned TargetOccupancy =
1034 unsigned WavesAfter =
1035 std::min(TargetOccupancy, PressureAfter.getOccupancy(ST));
1036 unsigned WavesBefore =
1037 std::min(TargetOccupancy, PressureBefore.getOccupancy(ST));
1038 LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
1039 << ", after " << WavesAfter << ".\n");
1040
1041 // We may not be able to keep the current target occupancy because of the just
1042 // scheduled region. We might still be able to revert scheduling if the
1043 // occupancy before was higher, or if the current schedule has register
1044 // pressure higher than the excess limits which could lead to more spilling.
1045 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
1046
1047 // Allow memory bound functions to drop to 4 waves if not limited by an
1048 // attribute.
1049 if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy &&
1050 WavesAfter >= MFI.getMinAllowedOccupancy()) {
1051 LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
1052 << MFI.getMinAllowedOccupancy() << " waves\n");
1053 NewOccupancy = WavesAfter;
1054 }
1055
1056 if (NewOccupancy < DAG.MinOccupancy) {
1057 DAG.MinOccupancy = NewOccupancy;
1058 MFI.limitOccupancy(DAG.MinOccupancy);
1059 DAG.RegionsWithMinOcc.reset();
1060 LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
1061 << DAG.MinOccupancy << ".\n");
1062 }
1063 // The maximum number of arch VGPR on non-unified register file, or the
1064 // maximum VGPR + AGPR in the unified register file case.
1065 unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
1066 // The maximum number of arch VGPR for both unified and non-unified register
1067 // file.
1068 unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
1069 unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
1070
1071 if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) > MaxVGPRs ||
1072 PressureAfter.getVGPRNum(false) > MaxArchVGPRs ||
1073 PressureAfter.getAGPRNum() > MaxArchVGPRs ||
1074 PressureAfter.getSGPRNum() > MaxSGPRs) {
1075 DAG.RescheduleRegions[RegionIdx] = true;
1076 DAG.RegionsWithHighRP[RegionIdx] = true;
1077 DAG.RegionsWithExcessRP[RegionIdx] = true;
1078 }
1079
1080 // Revert if this region's schedule would cause a drop in occupancy or
1081 // spilling.
1082 if (shouldRevertScheduling(WavesAfter)) {
1084 } else {
1085 DAG.Pressure[RegionIdx] = PressureAfter;
1086 DAG.RegionsWithMinOcc[RegionIdx] =
1087 PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
1088 }
1089}
1090
1091unsigned
1092GCNSchedStage::computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
1093 DenseMap<unsigned, unsigned> &ReadyCycles,
1094 const TargetSchedModel &SM) {
1095 unsigned ReadyCycle = CurrCycle;
1096 for (auto &D : SU.Preds) {
1097 if (D.isAssignedRegDep()) {
1098 MachineInstr *DefMI = D.getSUnit()->getInstr();
1099 unsigned Latency = SM.computeInstrLatency(DefMI);
1100 unsigned DefReady = ReadyCycles[DAG.getSUnit(DefMI)->NodeNum];
1101 ReadyCycle = std::max(ReadyCycle, DefReady + Latency);
1102 }
1103 }
1104 ReadyCycles[SU.NodeNum] = ReadyCycle;
1105 return ReadyCycle;
1106}
1107
1108#ifndef NDEBUG
1110 bool operator()(std::pair<MachineInstr *, unsigned> A,
1111 std::pair<MachineInstr *, unsigned> B) const {
1112 return A.second < B.second;
1113 }
1114};
1115
1116static void printScheduleModel(std::set<std::pair<MachineInstr *, unsigned>,
1117 EarlierIssuingCycle> &ReadyCycles) {
1118 if (ReadyCycles.empty())
1119 return;
1120 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
1121 dbgs() << "\n################## Schedule time ReadyCycles for MBB : " << BBNum
1122 << " ##################\n# Cycle #\t\t\tInstruction "
1123 " "
1124 " \n";
1125 unsigned IPrev = 1;
1126 for (auto &I : ReadyCycles) {
1127 if (I.second > IPrev + 1)
1128 dbgs() << "****************************** BUBBLE OF " << I.second - IPrev
1129 << " CYCLES DETECTED ******************************\n\n";
1130 dbgs() << "[ " << I.second << " ] : " << *I.first << "\n";
1131 IPrev = I.second;
1132 }
1133}
1134#endif
1135
1137GCNSchedStage::getScheduleMetrics(const std::vector<SUnit> &InputSchedule) {
1138#ifndef NDEBUG
1139 std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>
1140 ReadyCyclesSorted;
1141#endif
1143 unsigned SumBubbles = 0;
1144 DenseMap<unsigned, unsigned> ReadyCycles;
1145 unsigned CurrCycle = 0;
1146 for (auto &SU : InputSchedule) {
1147 unsigned ReadyCycle =
1148 computeSUnitReadyCycle(SU, CurrCycle, ReadyCycles, SM);
1149 SumBubbles += ReadyCycle - CurrCycle;
1150#ifndef NDEBUG
1151 ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));
1152#endif
1153 CurrCycle = ++ReadyCycle;
1154 }
1155#ifndef NDEBUG
1156 LLVM_DEBUG(
1157 printScheduleModel(ReadyCyclesSorted);
1158 dbgs() << "\n\t"
1159 << "Metric: "
1160 << (SumBubbles
1161 ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1162 : 1)
1163 << "\n\n");
1164#endif
1165
1166 return ScheduleMetrics(CurrCycle, SumBubbles);
1167}
1168
1171#ifndef NDEBUG
1172 std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>
1173 ReadyCyclesSorted;
1174#endif
1176 unsigned SumBubbles = 0;
1177 DenseMap<unsigned, unsigned> ReadyCycles;
1178 unsigned CurrCycle = 0;
1179 for (auto &MI : DAG) {
1180 SUnit *SU = DAG.getSUnit(&MI);
1181 if (!SU)
1182 continue;
1183 unsigned ReadyCycle =
1184 computeSUnitReadyCycle(*SU, CurrCycle, ReadyCycles, SM);
1185 SumBubbles += ReadyCycle - CurrCycle;
1186#ifndef NDEBUG
1187 ReadyCyclesSorted.insert(std::make_pair(SU->getInstr(), ReadyCycle));
1188#endif
1189 CurrCycle = ++ReadyCycle;
1190 }
1191#ifndef NDEBUG
1192 LLVM_DEBUG(
1193 printScheduleModel(ReadyCyclesSorted);
1194 dbgs() << "\n\t"
1195 << "Metric: "
1196 << (SumBubbles
1197 ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1198 : 1)
1199 << "\n\n");
1200#endif
1201
1202 return ScheduleMetrics(CurrCycle, SumBubbles);
1203}
1204
1205bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {
1206 if (WavesAfter < DAG.MinOccupancy)
1207 return true;
1208
1209 return false;
1210}
1211
1214 return false;
1215
1217 return true;
1218
1219 if (mayCauseSpilling(WavesAfter))
1220 return true;
1221
1222 return false;
1223}
1224
1226 // If RP is not reduced in the unclustered reschedule stage, revert to the
1227 // old schedule.
1228 if ((WavesAfter <= PressureBefore.getOccupancy(ST) &&
1229 mayCauseSpilling(WavesAfter)) ||
1231 LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
1232 return true;
1233 }
1234
1235 // Do not attempt to relax schedule even more if we are already spilling.
1237 return false;
1238
1239 LLVM_DEBUG(
1240 dbgs()
1241 << "\n\t *** In shouldRevertScheduling ***\n"
1242 << " *********** BEFORE UnclusteredHighRPStage ***********\n");
1243 ScheduleMetrics MBefore =
1245 LLVM_DEBUG(
1246 dbgs()
1247 << "\n *********** AFTER UnclusteredHighRPStage ***********\n");
1249 unsigned OldMetric = MBefore.getMetric();
1250 unsigned NewMetric = MAfter.getMetric();
1251 unsigned WavesBefore =
1253 unsigned Profit =
1254 ((WavesAfter * ScheduleMetrics::ScaleFactor) / WavesBefore *
1256 NewMetric) /
1258 LLVM_DEBUG(dbgs() << "\tMetric before " << MBefore << "\tMetric after "
1259 << MAfter << "Profit: " << Profit << "\n");
1260 return Profit < ScheduleMetrics::ScaleFactor;
1261}
1262
1265 return false;
1266
1268 return true;
1269
1270 if (mayCauseSpilling(WavesAfter))
1271 return true;
1272
1273 return false;
1274}
1275
1278 return true;
1279
1280 if (mayCauseSpilling(WavesAfter))
1281 return true;
1282
1283 return false;
1284}
1285
1287 if (mayCauseSpilling(WavesAfter))
1288 return true;
1289
1290 return false;
1291}
1292
1293bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
1294 if (WavesAfter <= MFI.getMinWavesPerEU() && isRegionWithExcessRP() &&
1296 LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
1297 return true;
1298 }
1299
1300 return false;
1301}
1302
1304 DAG.RegionsWithMinOcc[RegionIdx] =
1305 PressureBefore.getOccupancy(ST) == DAG.MinOccupancy;
1306 LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
1307 DAG.RescheduleRegions[RegionIdx] =
1308 S.hasNextStage() &&
1311 int SkippedDebugInstr = 0;
1312 for (MachineInstr *MI : Unsched) {
1313 if (MI->isDebugInstr()) {
1314 ++SkippedDebugInstr;
1315 continue;
1316 }
1317
1318 if (MI->getIterator() != DAG.RegionEnd) {
1319 DAG.BB->remove(MI);
1321 if (!MI->isDebugInstr())
1322 DAG.LIS->handleMove(*MI, true);
1323 }
1324
1325 // Reset read-undef flags and update them later.
1326 for (auto &Op : MI->all_defs())
1327 Op.setIsUndef(false);
1328 RegisterOperands RegOpers;
1329 RegOpers.collect(*MI, *DAG.TRI, DAG.MRI, DAG.ShouldTrackLaneMasks, false);
1330 if (!MI->isDebugInstr()) {
1332 // Adjust liveness and add missing dead+read-undef flags.
1334 RegOpers.adjustLaneLiveness(*DAG.LIS, DAG.MRI, SlotIdx, MI);
1335 } else {
1336 // Adjust for missing dead-def flags.
1337 RegOpers.detectDeadDefs(*MI, *DAG.LIS);
1338 }
1339 }
1340 DAG.RegionEnd = MI->getIterator();
1341 ++DAG.RegionEnd;
1342 LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
1343 }
1344
1345 // After reverting schedule, debug instrs will now be at the end of the block
1346 // and RegionEnd will point to the first debug instr. Increment RegionEnd
1347 // pass debug instrs to the actual end of the scheduling region.
1348 while (SkippedDebugInstr-- > 0)
1349 ++DAG.RegionEnd;
1350
1351 // If Unsched.front() instruction is a debug instruction, this will actually
1352 // shrink the region since we moved all debug instructions to the end of the
1353 // block. Find the first instruction that is not a debug instruction.
1354 DAG.RegionBegin = Unsched.front()->getIterator();
1355 if (DAG.RegionBegin->isDebugInstr()) {
1356 for (MachineInstr *MI : Unsched) {
1357 if (MI->isDebugInstr())
1358 continue;
1359 DAG.RegionBegin = MI->getIterator();
1360 break;
1361 }
1362 }
1363
1364 // Then move the debug instructions back into their correct place and set
1365 // RegionBegin and RegionEnd if needed.
1367
1368 DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
1369}
1370
1371void PreRARematStage::collectRematerializableInstructions() {
1372 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
1373 for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
1375 if (!DAG.LIS->hasInterval(Reg))
1376 continue;
1377
1378 // TODO: Handle AGPR and SGPR rematerialization
1379 if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
1380 !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg))
1381 continue;
1382
1384 MachineInstr *Def = Op->getParent();
1385 if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
1386 continue;
1387
1389 if (Def->getParent() == UseI->getParent())
1390 continue;
1391
1392 // We are only collecting defs that are defined in another block and are
1393 // live-through or used inside regions at MinOccupancy. This means that the
1394 // register must be in the live-in set for the region.
1395 bool AddedToRematList = false;
1396 for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
1397 auto It = DAG.LiveIns[I].find(Reg);
1398 if (It != DAG.LiveIns[I].end() && !It->second.none()) {
1399 if (DAG.RegionsWithMinOcc[I]) {
1400 RematerializableInsts[I][Def] = UseI;
1401 AddedToRematList = true;
1402 }
1403
1404 // Collect regions with rematerializable reg as live-in to avoid
1405 // searching later when updating RP.
1406 RematDefToLiveInRegions[Def].push_back(I);
1407 }
1408 }
1409 if (!AddedToRematList)
1410 RematDefToLiveInRegions.erase(Def);
1411 }
1412}
1413
1414bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
1415 const TargetInstrInfo *TII) {
1416 // Temporary copies of cached variables we will be modifying and replacing if
1417 // sinking succeeds.
1419 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32>
1420 NewRegions;
1423 BitVector NewRescheduleRegions;
1424 LiveIntervals *LIS = DAG.LIS;
1425
1426 NewRegions.resize(DAG.Regions.size());
1427 NewRescheduleRegions.resize(DAG.Regions.size());
1428
1429 // Collect only regions that has a rematerializable def as a live-in.
1430 SmallSet<unsigned, 16> ImpactedRegions;
1431 for (const auto &It : RematDefToLiveInRegions)
1432 ImpactedRegions.insert(It.second.begin(), It.second.end());
1433
1434 // Make copies of register pressure and live-ins cache that will be updated
1435 // as we rematerialize.
1436 for (auto Idx : ImpactedRegions) {
1437 NewPressure[Idx] = DAG.Pressure[Idx];
1438 NewLiveIns[Idx] = DAG.LiveIns[Idx];
1439 }
1440 NewRegions = DAG.Regions;
1441 NewRescheduleRegions.reset();
1442
1444 bool Improved = false;
1445 for (auto I : ImpactedRegions) {
1446 if (!DAG.RegionsWithMinOcc[I])
1447 continue;
1448
1449 Improved = false;
1450 int VGPRUsage = NewPressure[I].getVGPRNum(ST.hasGFX90AInsts());
1451 int SGPRUsage = NewPressure[I].getSGPRNum();
1452
1453 // TODO: Handle occupancy drop due to AGPR and SGPR.
1454 // Check if cause of occupancy drop is due to VGPR usage and not SGPR.
1455 if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == DAG.MinOccupancy)
1456 break;
1457
1458 // The occupancy of this region could have been improved by a previous
1459 // iteration's sinking of defs.
1460 if (NewPressure[I].getOccupancy(ST) > DAG.MinOccupancy) {
1461 NewRescheduleRegions[I] = true;
1462 Improved = true;
1463 continue;
1464 }
1465
1466 // First check if we have enough trivially rematerializable instructions to
1467 // improve occupancy. Optimistically assume all instructions we are able to
1468 // sink decreased RP.
1469 int TotalSinkableRegs = 0;
1470 for (const auto &It : RematerializableInsts[I]) {
1471 MachineInstr *Def = It.first;
1472 Register DefReg = Def->getOperand(0).getReg();
1473 TotalSinkableRegs +=
1474 SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]);
1475 }
1476 int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
1477 unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink);
1478 // If in the most optimistic scenario, we cannot improve occupancy, then do
1479 // not attempt to sink any instructions.
1480 if (OptimisticOccupancy <= DAG.MinOccupancy)
1481 break;
1482
1483 unsigned ImproveOccupancy = 0;
1485 for (auto &It : RematerializableInsts[I]) {
1486 MachineInstr *Def = It.first;
1487 MachineBasicBlock::iterator InsertPos =
1488 MachineBasicBlock::iterator(It.second);
1489 Register Reg = Def->getOperand(0).getReg();
1490 // Rematerialize MI to its use block. Since we are only rematerializing
1491 // instructions that do not have any virtual reg uses, we do not need to
1492 // call LiveRangeEdit::allUsesAvailableAt() and
1493 // LiveRangeEdit::canRematerializeAt().
1494 TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
1495 Def->getOperand(0).getSubReg(), *Def, *DAG.TRI);
1496 MachineInstr *NewMI = &*std::prev(InsertPos);
1497 LIS->InsertMachineInstrInMaps(*NewMI);
1498 LIS->removeInterval(Reg);
1500 InsertedMIToOldDef[NewMI] = Def;
1501
1502 // Update region boundaries in scheduling region we sinked from since we
1503 // may sink an instruction that was at the beginning or end of its region
1504 DAG.updateRegionBoundaries(NewRegions, Def, /*NewMI =*/nullptr,
1505 /*Removing =*/true);
1506
1507 // Update region boundaries in region we sinked to.
1508 DAG.updateRegionBoundaries(NewRegions, InsertPos, NewMI);
1509
1510 LaneBitmask PrevMask = NewLiveIns[I][Reg];
1511 // FIXME: Also update cached pressure for where the def was sinked from.
1512 // Update RP for all regions that has this reg as a live-in and remove
1513 // the reg from all regions as a live-in.
1514 for (auto Idx : RematDefToLiveInRegions[Def]) {
1515 NewLiveIns[Idx].erase(Reg);
1516 if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) {
1517 // Def is live-through and not used in this block.
1518 NewPressure[Idx].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI);
1519 } else {
1520 // Def is used and rematerialized into this block.
1521 GCNDownwardRPTracker RPT(*LIS);
1522 auto *NonDbgMI = &*skipDebugInstructionsForward(
1523 NewRegions[Idx].first, NewRegions[Idx].second);
1524 RPT.reset(*NonDbgMI, &NewLiveIns[Idx]);
1525 RPT.advance(NewRegions[Idx].second);
1526 NewPressure[Idx] = RPT.moveMaxPressure();
1527 }
1528 }
1529
1530 SinkedDefs.push_back(Def);
1531 ImproveOccupancy = NewPressure[I].getOccupancy(ST);
1532 if (ImproveOccupancy > DAG.MinOccupancy)
1533 break;
1534 }
1535
1536 // Remove defs we just sinked from all regions' list of sinkable defs
1537 for (auto &Def : SinkedDefs)
1538 for (auto TrackedIdx : RematDefToLiveInRegions[Def])
1539 RematerializableInsts[TrackedIdx].erase(Def);
1540
1541 if (ImproveOccupancy <= DAG.MinOccupancy)
1542 break;
1543
1544 NewRescheduleRegions[I] = true;
1545 Improved = true;
1546 }
1547
1548 if (!Improved) {
1549 // Occupancy was not improved for all regions that were at MinOccupancy.
1550 // Undo sinking and remove newly rematerialized instructions.
1551 for (auto &Entry : InsertedMIToOldDef) {
1552 MachineInstr *MI = Entry.first;
1553 MachineInstr *OldMI = Entry.second;
1554 Register Reg = MI->getOperand(0).getReg();
1556 MI->eraseFromParent();
1557 OldMI->clearRegisterDeads(Reg);
1558 LIS->removeInterval(Reg);
1560 }
1561 return false;
1562 }
1563
1564 // Occupancy was improved for all regions.
1565 for (auto &Entry : InsertedMIToOldDef) {
1566 MachineInstr *MI = Entry.first;
1567 MachineInstr *OldMI = Entry.second;
1568
1569 // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
1570 DAG.BBLiveInMap.erase(OldMI);
1571
1572 // Remove OldMI and update LIS
1573 Register Reg = MI->getOperand(0).getReg();
1574 LIS->RemoveMachineInstrFromMaps(*OldMI);
1575 OldMI->eraseFromParent();
1576 LIS->removeInterval(Reg);
1578 }
1579
1580 // Update live-ins, register pressure, and regions caches.
1581 for (auto Idx : ImpactedRegions) {
1582 DAG.LiveIns[Idx] = NewLiveIns[Idx];
1583 DAG.Pressure[Idx] = NewPressure[Idx];
1584 DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent());
1585 }
1586 DAG.Regions = NewRegions;
1587 DAG.RescheduleRegions = NewRescheduleRegions;
1588
1590 MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
1591
1592 return true;
1593}
1594
1595// Copied from MachineLICM
1596bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
1598 return false;
1599
1600 for (const MachineOperand &MO : MI.all_uses())
1601 if (MO.getReg().isVirtual())
1602 return false;
1603
1604 return true;
1605}
1606
1607// When removing, we will have to check both beginning and ending of the region.
1608// When inserting, we will only have to check if we are inserting NewMI in front
1609// of a scheduling region and do not need to check the ending since we will only
1610// ever be inserting before an already existing MI.
1611void GCNScheduleDAGMILive::updateRegionBoundaries(
1613 MachineBasicBlock::iterator>> &RegionBoundaries,
1614 MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing) {
1615 unsigned I = 0, E = RegionBoundaries.size();
1616 // Search for first region of the block where MI is located
1617 while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent())
1618 ++I;
1619
1620 for (; I != E; ++I) {
1621 if (MI->getParent() != RegionBoundaries[I].first->getParent())
1622 return;
1623
1624 if (Removing && MI == RegionBoundaries[I].first &&
1625 MI == RegionBoundaries[I].second) {
1626 // MI is in a region with size 1, after removing, the region will be
1627 // size 0, set RegionBegin and RegionEnd to pass end of block iterator.
1628 RegionBoundaries[I] =
1629 std::pair(MI->getParent()->end(), MI->getParent()->end());
1630 return;
1631 }
1632 if (MI == RegionBoundaries[I].first) {
1633 if (Removing)
1634 RegionBoundaries[I] =
1635 std::pair(std::next(MI), RegionBoundaries[I].second);
1636 else
1637 // Inserted NewMI in front of region, set new RegionBegin to NewMI
1638 RegionBoundaries[I] = std::pair(MachineBasicBlock::iterator(NewMI),
1639 RegionBoundaries[I].second);
1640 return;
1641 }
1642 if (Removing && MI == RegionBoundaries[I].second) {
1643 RegionBoundaries[I] = std::pair(RegionBoundaries[I].first, std::prev(MI));
1644 return;
1645 }
1646 }
1647}
1648
1650 return std::any_of(
1651 DAG->begin(), DAG->end(), [](MachineBasicBlock::iterator MI) {
1652 unsigned Opc = MI->getOpcode();
1653 return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT;
1654 });
1655}
1656
1658 MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
1659 bool RemoveKillFlags)
1660 : ScheduleDAGMI(C, std::move(S), RemoveKillFlags) {}
1661
1663 HasIGLPInstrs = hasIGLPInstrs(this);
1664 if (HasIGLPInstrs) {
1665 SavedMutations.clear();
1666 SavedMutations.swap(Mutations);
1668 }
1669
1671}
1672
1674 if (HasIGLPInstrs)
1675 SavedMutations.swap(Mutations);
1676
1678}
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))
static cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))
static cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))
static void printScheduleModel(std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG)
static void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure)
static bool canUsePressureDiffs(const SUnit &SU)
Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.
static cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
if(VerifyEach)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
BitVector & reset()
Definition: BitVector.h:392
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341
BitVector & set()
Definition: BitVector.h:351
bool none() const
none - Returns true if none of the bits are set.
Definition: BitVector.h:188
bool shouldRevertScheduling(unsigned WavesAfter) override
This class represents an Operation in the Expression.
bool erase(const KeyT &Val)
Definition: DenseMap.h:336
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
virtual bool initGCNRegion()
GCNSchedStrategy & S
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
MachineFunction & MF
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
MachineBasicBlock * CurrentMBB
const GCNSubtarget & ST
This is a minimal scheduler strategy.
const unsigned HighRPSGPRBias
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
SUnit * pickNodeBidirectional(bool &IsTopNode)
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool IsBottomUp)
std::vector< unsigned > MaxPressure
GCNSchedStageID getCurrentStage()
MachineFunction * MF
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
const unsigned HighRPVGPRBias
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
unsigned getAddressableNumArchVGPRs() const
bool hasGFX90AInsts() const
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:266
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
void traceCandidate(const SchedCandidate &Cand)
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
const TargetSchedModel * SchedModel
const MachineSchedContext * Context
const TargetRegisterInfo * TRI
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
SchedCandidate BotCand
Candidate last picked from Bot boundary.
SchedCandidate TopCand
Candidate last picked from Top boundary.
MachineSchedPolicy RegionPolicy
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
ScheduleDAGMILive * DAG
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
bool shouldRevertScheduling(unsigned WavesAfter) override
bool hasInterval(Register Reg) const
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndexes * getSlotIndexes() const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
void removeInterval(Register Reg)
Interval removal.
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
unsigned succ_size() const
MachineInstrBundleIterator< MachineInstr > iterator
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:685
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void clearRegisterDeads(Register Reg)
Clear all dead flags on operands defining register Reg.
MachineOperand class - Representation of each machine instruction operand.
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineOperand * getOneDef(Register Reg) const
Returns the defining operand if there is exactly one operand defining the specified register,...
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
bool hasOneDef(Register RegNo) const
Return true if there is exactly one operand defining the specified register.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
bool initGCNSchedStage() override
Capture a change in pressure for a single pressure set.
void setUnitInc(int Inc)
Helpers for implementing custom MachineSchedStrategy classes.
bool empty() const
Track the current register pressure at some position in the instruction stream, and remember the high...
void advance()
Advance across the current instruction.
void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
const TargetSchedModel & getSchedModel() const
Definition: SIInstrInfo.h:1433
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
void increaseOccupancy(const MachineFunction &MF, unsigned Limit)
void limitOccupancy(const MachineFunction &MF)
static unsigned getNumCoveredRegs(LaneBitmask LM)
static bool isVGPRClass(const TargetRegisterClass *RC)
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
Definition: ScheduleDAG.h:378
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:270
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:296
bool isBottomReady() const
Definition: ScheduleDAG.h:467
bool isTopReady() const
Definition: ScheduleDAG.h:464
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:262
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:390
Each Scheduling boundary is associated with ready queues.
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
ScheduleDAGMI * DAG
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
A ScheduleDAG for scheduling lists of MachineInstr.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
MachineBasicBlock * BB
The block in which to insert instructions.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
PressureDiff & getPressureDiff(const SUnit *SU)
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
const RegPressureTracker & getBotRPTracker() const
bool isTrackingPressure() const
Return true if register pressure tracking is enabled.
const RegPressureTracker & getTopRPTracker() const
RegPressureTracker RPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
MachineBasicBlock::iterator top() const
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
MachineBasicBlock::iterator bottom() const
void finishBlock() override
Cleans up after scheduling in the given block.
LiveIntervals * LIS
const SUnit * getNextClusterPred() const
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
const SUnit * getNextClusterSucc() const
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:578
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:575
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:579
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:576
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:577
static const unsigned ScaleFactor
unsigned getMetric() const
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:237
SlotIndexes pass.
Definition: SlotIndexes.h:297
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition: SlotIndexes.h:460
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
void resize(size_type N)
Definition: SmallVector.h:652
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
TargetInstrInfo - Interface to description of machine instruction set.
bool isTriviallyReMaterializable(const MachineInstr &MI) const
Return true if the instruction is trivially rematerializable, meaning it has no side effects and requ...
Provide an instruction scheduling machine model to CodeGen passes.
virtual const TargetInstrInfo * getInstrInfo() const
bool shouldRevertScheduling(unsigned WavesAfter) override
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
@ Entry
Definition: COFF.h:826
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
unsigned getWeakLeft(const SUnit *SU, bool isTop)
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)
Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:547
cl::opt< bool > VerifyScheduling
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1849
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
bool operator()(std::pair< MachineInstr *, unsigned > A, std::pair< MachineInstr *, unsigned > B) const
unsigned getOccupancy(const GCNSubtarget &ST) const
unsigned getVGPRNum(bool UnifiedVGPRFile) const
unsigned getAGPRNum() const
unsigned getSGPRNum() const
bool less(const MachineFunction &MF, const GCNRegPressure &O, unsigned MaxOccupancy=std::numeric_limits< unsigned >::max()) const
Compares this GCNRegpressure to O, returning true if this is less.
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void reset(const CandPolicy &NewPolicy)
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Status of an instruction's critical resource consumption.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:81
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
RegisterClassInfo * RegClassInfo
PressureChange CriticalMax