LLVM 20.0.0git
GCNSchedStrategy.cpp
Go to the documentation of this file.
1//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This contains a MachineSchedStrategy implementation for maximizing wave
11/// occupancy on GCN hardware.
12///
13/// This pass will apply multiple scheduling stages to the same function.
14/// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual
15/// entry point for the scheduling of those regions is
16/// GCNScheduleDAGMILive::runSchedStages.
17
18/// Generally, the reason for having multiple scheduling stages is to account
19/// for the kernel-wide effect of register usage on occupancy. Usually, only a
20/// few scheduling regions will have register pressure high enough to limit
21/// occupancy for the kernel, so constraints can be relaxed to improve ILP in
22/// other regions.
23///
24//===----------------------------------------------------------------------===//
25
26#include "GCNSchedStrategy.h"
27#include "AMDGPUIGroupLP.h"
30
31#define DEBUG_TYPE "machine-scheduler"
32
33using namespace llvm;
34
36 "amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden,
37 cl::desc("Disable unclustered high register pressure "
38 "reduction scheduling stage."),
39 cl::init(false));
40
42 "amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden,
43 cl::desc("Disable clustered low occupancy "
44 "rescheduling for ILP scheduling stage."),
45 cl::init(false));
46
48 "amdgpu-schedule-metric-bias", cl::Hidden,
50 "Sets the bias which adds weight to occupancy vs latency. Set it to "
51 "100 to chase the occupancy only."),
52 cl::init(10));
53
54static cl::opt<bool>
55 RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden,
56 cl::desc("Relax occupancy targets for kernels which are memory "
57 "bound (amdgpu-membound-threshold), or "
58 "Wave Limited (amdgpu-limit-wave-threshold)."),
59 cl::init(false));
60
62 "amdgpu-use-amdgpu-trackers", cl::Hidden,
63 cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),
64 cl::init(false));
65
66const unsigned ScheduleMetrics::ScaleFactor = 100;
67
69 : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
70 DownwardTracker(*C->LIS), UpwardTracker(*C->LIS), HasHighPressure(false) {
71}
72
75
76 MF = &DAG->MF;
77
79
81 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
83 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
84
86 // Set the initial TargetOccupnacy to the maximum occupancy that we can
87 // achieve for this function. This effectively sets a lower bound on the
88 // 'Critical' register limits in the scheduler.
89 // Allow for lower occupancy targets if kernel is wave limited or memory
90 // bound, and using the relaxed occupancy feature.
94 std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit);
95
96 if (!KnownExcessRP) {
98 std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit);
99 } else {
100 // This is similar to ST.getMaxNumVGPRs(TargetOccupancy) result except
101 // returns a reasonably small number for targets with lots of VGPRs, such
102 // as GFX10 and GFX11.
103 LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative "
104 "VGPRCriticalLimit calculation method.\n");
105
106 unsigned Granule = AMDGPU::IsaInfo::getVGPRAllocGranule(&ST);
107 unsigned Addressable = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST);
108 unsigned VGPRBudget = alignDown(Addressable / TargetOccupancy, Granule);
109 VGPRBudget = std::max(VGPRBudget, Granule);
110 VGPRCriticalLimit = std::min(VGPRBudget, VGPRExcessLimit);
111 }
112
113 // Subtract error margin and bias from register limits and avoid overflow.
118
119 LLVM_DEBUG(dbgs() << "VGPRCriticalLimit = " << VGPRCriticalLimit
120 << ", VGPRExcessLimit = " << VGPRExcessLimit
121 << ", SGPRCriticalLimit = " << SGPRCriticalLimit
122 << ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n");
123}
124
125/// Checks whether \p SU can use the cached DAG pressure diffs to compute the
126/// current register pressure.
127///
128/// This works for the common case, but it has a few exceptions that have been
129/// observed through trial and error:
130/// - Explicit physical register operands
131/// - Subregister definitions
132///
133/// In both of those cases, PressureDiff doesn't represent the actual pressure,
134/// and querying LiveIntervals through the RegPressureTracker is needed to get
135/// an accurate value.
136///
137/// We should eventually only use PressureDiff for maximum performance, but this
138/// already allows 80% of SUs to take the fast path without changing scheduling
139/// at all. Further changes would either change scheduling, or require a lot
140/// more logic to recover an accurate pressure estimate from the PressureDiffs.
141static bool canUsePressureDiffs(const SUnit &SU) {
142 if (!SU.isInstr())
143 return false;
144
145 // Cannot use pressure diffs for subregister defs or with physregs, it's
146 // imprecise in both cases.
147 for (const auto &Op : SU.getInstr()->operands()) {
148 if (!Op.isReg() || Op.isImplicit())
149 continue;
150 if (Op.getReg().isPhysical() ||
151 (Op.isDef() && Op.getSubReg() != AMDGPU::NoSubRegister))
152 return false;
153 }
154 return true;
155}
156
158 bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU,
159 std::vector<unsigned> &Pressure, std::vector<unsigned> &MaxPressure,
160 GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker,
161 ScheduleDAGMI *DAG, const SIRegisterInfo *SRI) {
162 // getDownwardPressure() and getUpwardPressure() make temporary changes to
163 // the tracker, so we need to pass those function a non-const copy.
164 RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
165 if (!GCNTrackers) {
166 AtTop
167 ? TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure)
168 : TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
169
170 return;
171 }
172
173 // GCNTrackers
174 Pressure.resize(4, 0);
175 MachineInstr *MI = SU->getInstr();
176 GCNRegPressure NewPressure;
177 if (AtTop) {
178 GCNDownwardRPTracker TempDownwardTracker(DownwardTracker);
179 NewPressure = TempDownwardTracker.bumpDownwardPressure(MI, SRI);
180 } else {
181 GCNUpwardRPTracker TempUpwardTracker(UpwardTracker);
182 TempUpwardTracker.recede(*MI);
183 NewPressure = TempUpwardTracker.getPressure();
184 }
185 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = NewPressure.getSGPRNum();
186 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
187 NewPressure.getArchVGPRNum();
188 Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = NewPressure.getAGPRNum();
189}
190
192 bool AtTop,
193 const RegPressureTracker &RPTracker,
194 const SIRegisterInfo *SRI,
195 unsigned SGPRPressure,
196 unsigned VGPRPressure, bool IsBottomUp) {
197 Cand.SU = SU;
198 Cand.AtTop = AtTop;
199
200 if (!DAG->isTrackingPressure())
201 return;
202
203 Pressure.clear();
204 MaxPressure.clear();
205
206 // We try to use the cached PressureDiffs in the ScheduleDAG whenever
207 // possible over querying the RegPressureTracker.
208 //
209 // RegPressureTracker will make a lot of LIS queries which are very
210 // expensive, it is considered a slow function in this context.
211 //
212 // PressureDiffs are precomputed and cached, and getPressureDiff is just a
213 // trivial lookup into an array. It is pretty much free.
214 //
215 // In EXPENSIVE_CHECKS, we always query RPTracker to verify the results of
216 // PressureDiffs.
217 if (AtTop || !canUsePressureDiffs(*SU) || GCNTrackers) {
218 getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure,
220 } else {
221 // Reserve 4 slots.
222 Pressure.resize(4, 0);
223 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
224 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
225
226 for (const auto &Diff : DAG->getPressureDiff(SU)) {
227 if (!Diff.isValid())
228 continue;
229 // PressureDiffs is always bottom-up so if we're working top-down we need
230 // to invert its sign.
231 Pressure[Diff.getPSet()] +=
232 (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());
233 }
234
235#ifdef EXPENSIVE_CHECKS
236 std::vector<unsigned> CheckPressure, CheckMaxPressure;
237 getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure,
239 if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
240 CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
241 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
242 CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
243 errs() << "Register Pressure is inaccurate when calculated through "
244 "PressureDiff\n"
245 << "SGPR got " << Pressure[AMDGPU::RegisterPressureSets::SReg_32]
246 << ", expected "
247 << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] << "\n"
248 << "VGPR got " << Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
249 << ", expected "
250 << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n";
251 report_fatal_error("inaccurate register pressure calculation");
252 }
253#endif
254 }
255
256 unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
257 unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
258
259 // If two instructions increase the pressure of different register sets
260 // by the same amount, the generic scheduler will prefer to schedule the
261 // instruction that increases the set with the least amount of registers,
262 // which in our case would be SGPRs. This is rarely what we want, so
263 // when we report excess/critical register pressure, we do it either
264 // only for VGPRs or only for SGPRs.
265
266 // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
267 const unsigned MaxVGPRPressureInc = 16;
268 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
269 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
270
271 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
272 // to increase the likelihood we don't go over the limits. We should improve
273 // the analysis to look through dependencies to find the path with the least
274 // register pressure.
275
276 // We only need to update the RPDelta for instructions that increase register
277 // pressure. Instructions that decrease or keep reg pressure the same will be
278 // marked as RegExcess in tryCandidate() when they are compared with
279 // instructions that increase the register pressure.
280 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
281 HasHighPressure = true;
282 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
283 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
284 }
285
286 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
287 HasHighPressure = true;
288 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
289 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
290 }
291
292 // Register pressure is considered 'CRITICAL' if it is approaching a value
293 // that would reduce the wave occupancy for the execution unit. When
294 // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both
295 // has the same cost, so we don't need to prefer one over the other.
296
297 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
298 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
299
300 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
301 HasHighPressure = true;
302 if (SGPRDelta > VGPRDelta) {
303 Cand.RPDelta.CriticalMax =
304 PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
305 Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
306 } else {
307 Cand.RPDelta.CriticalMax =
308 PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
309 Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
310 }
311 }
312}
313
314// This function is mostly cut and pasted from
315// GenericScheduler::pickNodeFromQueue()
317 const CandPolicy &ZonePolicy,
318 const RegPressureTracker &RPTracker,
319 SchedCandidate &Cand,
320 bool IsBottomUp) {
321 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
323 unsigned SGPRPressure = 0;
324 unsigned VGPRPressure = 0;
325 if (DAG->isTrackingPressure()) {
326 if (!GCNTrackers) {
327 SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
328 VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
329 } else {
330 GCNRPTracker *T = IsBottomUp
331 ? static_cast<GCNRPTracker *>(&UpwardTracker)
332 : static_cast<GCNRPTracker *>(&DownwardTracker);
333 SGPRPressure = T->getPressure().getSGPRNum();
334 VGPRPressure = T->getPressure().getArchVGPRNum();
335 }
336 }
337 ReadyQueue &Q = Zone.Available;
338 for (SUnit *SU : Q) {
339
340 SchedCandidate TryCand(ZonePolicy);
341 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,
342 VGPRPressure, IsBottomUp);
343 // Pass SchedBoundary only when comparing nodes from the same boundary.
344 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
345 tryCandidate(Cand, TryCand, ZoneArg);
346 if (TryCand.Reason != NoCand) {
347 // Initialize resource delta if needed in case future heuristics query it.
348 if (TryCand.ResDelta == SchedResourceDelta())
349 TryCand.initResourceDelta(Zone.DAG, SchedModel);
350 Cand.setBest(TryCand);
352 }
353 }
354}
355
356// This function is mostly cut and pasted from
357// GenericScheduler::pickNodeBidirectional()
359 // Schedule as far as possible in the direction of no choice. This is most
360 // efficient, but also provides the best heuristics for CriticalPSets.
361 if (SUnit *SU = Bot.pickOnlyChoice()) {
362 IsTopNode = false;
363 return SU;
364 }
365 if (SUnit *SU = Top.pickOnlyChoice()) {
366 IsTopNode = true;
367 return SU;
368 }
369 // Set the bottom-up policy based on the state of the current bottom zone and
370 // the instructions outside the zone, including the top zone.
371 CandPolicy BotPolicy;
372 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
373 // Set the top-down policy based on the state of the current top zone and
374 // the instructions outside the zone, including the bottom zone.
375 CandPolicy TopPolicy;
376 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
377
378 // See if BotCand is still valid (because we previously scheduled from Top).
379 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
380 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
381 BotCand.Policy != BotPolicy) {
384 /*IsBottomUp=*/true);
385 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
386 } else {
388#ifndef NDEBUG
389 if (VerifyScheduling) {
390 SchedCandidate TCand;
391 TCand.reset(CandPolicy());
392 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand,
393 /*IsBottomUp=*/true);
394 assert(TCand.SU == BotCand.SU &&
395 "Last pick result should correspond to re-picking right now");
396 }
397#endif
398 }
399
400 // Check if the top Q has a better candidate.
401 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
402 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
403 TopCand.Policy != TopPolicy) {
406 /*IsBottomUp=*/false);
407 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
408 } else {
410#ifndef NDEBUG
411 if (VerifyScheduling) {
412 SchedCandidate TCand;
413 TCand.reset(CandPolicy());
414 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand,
415 /*IsBottomUp=*/false);
416 assert(TCand.SU == TopCand.SU &&
417 "Last pick result should correspond to re-picking right now");
418 }
419#endif
420 }
421
422 // Pick best from BotCand and TopCand.
423 LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
424 dbgs() << "Bot Cand: "; traceCandidate(BotCand););
425 SchedCandidate Cand = BotCand;
427 tryCandidate(Cand, TopCand, nullptr);
428 if (TopCand.Reason != NoCand) {
429 Cand.setBest(TopCand);
430 }
431 LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
432
433 IsTopNode = Cand.AtTop;
434 return Cand.SU;
435}
436
437// This function is mostly cut and pasted from
438// GenericScheduler::pickNode()
440 if (DAG->top() == DAG->bottom()) {
442 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
443 return nullptr;
444 }
445 SUnit *SU;
446 do {
448 SU = Top.pickOnlyChoice();
449 if (!SU) {
450 CandPolicy NoPolicy;
451 TopCand.reset(NoPolicy);
453 /*IsBottomUp=*/false);
454 assert(TopCand.Reason != NoCand && "failed to find a candidate");
455 SU = TopCand.SU;
456 }
457 IsTopNode = true;
458 } else if (RegionPolicy.OnlyBottomUp) {
459 SU = Bot.pickOnlyChoice();
460 if (!SU) {
461 CandPolicy NoPolicy;
462 BotCand.reset(NoPolicy);
464 /*IsBottomUp=*/true);
465 assert(BotCand.Reason != NoCand && "failed to find a candidate");
466 SU = BotCand.SU;
467 }
468 IsTopNode = false;
469 } else {
470 SU = pickNodeBidirectional(IsTopNode);
471 }
472 } while (SU->isScheduled);
473
474 if (SU->isTopReady())
475 Top.removeReady(SU);
476 if (SU->isBottomReady())
477 Bot.removeReady(SU);
478
479 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
480 << *SU->getInstr());
481 return SU;
482}
483
484void GCNSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
485 if (GCNTrackers) {
486 MachineInstr *MI = SU->getInstr();
487 IsTopNode ? (void)DownwardTracker.advance(MI, false)
489 }
490
491 return GenericScheduler::schedNode(SU, IsTopNode);
492}
493
496 return *CurrentStage;
497}
498
501 if (!CurrentStage)
503 else
504 CurrentStage++;
505
506 return CurrentStage != SchedStages.end();
507}
508
511 return std::next(CurrentStage) != SchedStages.end();
512}
513
515 assert(CurrentStage && std::next(CurrentStage) != SchedStages.end());
516 return *std::next(CurrentStage);
517}
518
520 const MachineSchedContext *C, bool IsLegacyScheduler)
526 GCNTrackers = GCNTrackers & !IsLegacyScheduler;
527}
528
532}
533
535 SchedCandidate &TryCand,
536 SchedBoundary *Zone) const {
537 // Initialize the candidate if needed.
538 if (!Cand.isValid()) {
539 TryCand.Reason = NodeOrder;
540 return true;
541 }
542
543 // Avoid spilling by exceeding the register limit.
544 if (DAG->isTrackingPressure() &&
545 tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
546 RegExcess, TRI, DAG->MF))
547 return TryCand.Reason != NoCand;
548
549 // Bias PhysReg Defs and copies to their uses and defined respectively.
550 if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
551 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
552 return TryCand.Reason != NoCand;
553
554 bool SameBoundary = Zone != nullptr;
555 if (SameBoundary) {
556 // Prioritize instructions that read unbuffered resources by stall cycles.
557 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
558 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
559 return TryCand.Reason != NoCand;
560
561 // Avoid critical resource consumption and balance the schedule.
564 TryCand, Cand, ResourceReduce))
565 return TryCand.Reason != NoCand;
567 Cand.ResDelta.DemandedResources, TryCand, Cand,
569 return TryCand.Reason != NoCand;
570
571 // Unconditionally try to reduce latency.
572 if (tryLatency(TryCand, Cand, *Zone))
573 return TryCand.Reason != NoCand;
574
575 // Weak edges are for clustering and other constraints.
576 if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
577 getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))
578 return TryCand.Reason != NoCand;
579 }
580
581 // Keep clustered nodes together to encourage downstream peephole
582 // optimizations which may reduce resource requirements.
583 //
584 // This is a best effort to set things up for a post-RA pass. Optimizations
585 // like generating loads of multiple registers should ideally be done within
586 // the scheduler pass by combining the loads during DAG postprocessing.
587 const SUnit *CandNextClusterSU =
589 const SUnit *TryCandNextClusterSU =
591 if (tryGreater(TryCand.SU == TryCandNextClusterSU,
592 Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
593 return TryCand.Reason != NoCand;
594
595 // Avoid increasing the max critical pressure in the scheduled region.
596 if (DAG->isTrackingPressure() &&
598 TryCand, Cand, RegCritical, TRI, DAG->MF))
599 return TryCand.Reason != NoCand;
600
601 // Avoid increasing the max pressure of the entire region.
602 if (DAG->isTrackingPressure() &&
603 tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,
604 Cand, RegMax, TRI, DAG->MF))
605 return TryCand.Reason != NoCand;
606
607 if (SameBoundary) {
608 // Fall through to original instruction order.
609 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||
610 (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
611 TryCand.Reason = NodeOrder;
612 return true;
613 }
614 }
615 return false;
616}
617
619 const MachineSchedContext *C)
622}
623
624/// GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as
625/// much as possible. This is achieved by:
626// 1. Prioritize clustered operations before stall latency heuristic.
627// 2. Prioritize long-latency-load before stall latency heuristic.
628///
629/// \param Cand provides the policy and current best candidate.
630/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
631/// \param Zone describes the scheduled zone that we are extending, or nullptr
632/// if Cand is from a different zone than TryCand.
633/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
635 SchedCandidate &TryCand,
636 SchedBoundary *Zone) const {
637 // Initialize the candidate if needed.
638 if (!Cand.isValid()) {
639 TryCand.Reason = NodeOrder;
640 return true;
641 }
642
643 // Bias PhysReg Defs and copies to their uses and defined respectively.
644 if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
645 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
646 return TryCand.Reason != NoCand;
647
648 if (DAG->isTrackingPressure()) {
649 // Avoid exceeding the target's limit.
650 if (tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
651 RegExcess, TRI, DAG->MF))
652 return TryCand.Reason != NoCand;
653
654 // Avoid increasing the max critical pressure in the scheduled region.
656 TryCand, Cand, RegCritical, TRI, DAG->MF))
657 return TryCand.Reason != NoCand;
658 }
659
660 // MaxMemoryClause-specific: We prioritize clustered instructions as we would
661 // get more benefit from clausing these memory instructions.
662 const SUnit *CandNextClusterSU =
664 const SUnit *TryCandNextClusterSU =
666 if (tryGreater(TryCand.SU == TryCandNextClusterSU,
667 Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
668 return TryCand.Reason != NoCand;
669
670 // We only compare a subset of features when comparing nodes between
671 // Top and Bottom boundary. Some properties are simply incomparable, in many
672 // other instances we should only override the other boundary if something
673 // is a clear good pick on one boundary. Skip heuristics that are more
674 // "tie-breaking" in nature.
675 bool SameBoundary = Zone != nullptr;
676 if (SameBoundary) {
677 // For loops that are acyclic path limited, aggressively schedule for
678 // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
679 // heuristics to take precedence.
680 if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
681 tryLatency(TryCand, Cand, *Zone))
682 return TryCand.Reason != NoCand;
683
684 // MaxMemoryClause-specific: Prioritize long latency memory load
685 // instructions in top-bottom order to hide more latency. The mayLoad check
686 // is used to exclude store-like instructions, which we do not want to
687 // scheduler them too early.
688 bool TryMayLoad =
689 TryCand.SU->isInstr() && TryCand.SU->getInstr()->mayLoad();
690 bool CandMayLoad = Cand.SU->isInstr() && Cand.SU->getInstr()->mayLoad();
691
692 if (TryMayLoad || CandMayLoad) {
693 bool TryLongLatency =
694 TryCand.SU->Latency > 10 * Cand.SU->Latency && TryMayLoad;
695 bool CandLongLatency =
696 10 * TryCand.SU->Latency < Cand.SU->Latency && CandMayLoad;
697
698 if (tryGreater(Zone->isTop() ? TryLongLatency : CandLongLatency,
699 Zone->isTop() ? CandLongLatency : TryLongLatency, TryCand,
700 Cand, Stall))
701 return TryCand.Reason != NoCand;
702 }
703 // Prioritize instructions that read unbuffered resources by stall cycles.
704 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
705 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
706 return TryCand.Reason != NoCand;
707 }
708
709 if (SameBoundary) {
710 // Weak edges are for clustering and other constraints.
711 if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
712 getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))
713 return TryCand.Reason != NoCand;
714 }
715
716 // Avoid increasing the max pressure of the entire region.
717 if (DAG->isTrackingPressure() &&
718 tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,
719 Cand, RegMax, TRI, DAG->MF))
720 return TryCand.Reason != NoCand;
721
722 if (SameBoundary) {
723 // Avoid critical resource consumption and balance the schedule.
726 TryCand, Cand, ResourceReduce))
727 return TryCand.Reason != NoCand;
729 Cand.ResDelta.DemandedResources, TryCand, Cand,
731 return TryCand.Reason != NoCand;
732
733 // Avoid serializing long latency dependence chains.
734 // For acyclic path limited loops, latency was already checked above.
736 !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
737 return TryCand.Reason != NoCand;
738
739 // Fall through to original instruction order.
740 if (Zone->isTop() == (TryCand.SU->NodeNum < Cand.SU->NodeNum)) {
741 assert(TryCand.SU->NodeNum != Cand.SU->NodeNum);
742 TryCand.Reason = NodeOrder;
743 return true;
744 }
745 }
746
747 return false;
748}
749
751 MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)
752 : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),
753 MFI(*MF.getInfo<SIMachineFunctionInfo>()),
754 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy),
755 RegionLiveOuts(this, /*IsLiveOut=*/true) {
756
757 LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
758 if (RelaxedOcc) {
759 MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy);
760 if (MinOccupancy != StartingOccupancy)
761 LLVM_DEBUG(dbgs() << "Allowing Occupancy drops to " << MinOccupancy
762 << ".\n");
763 }
764}
765
766std::unique_ptr<GCNSchedStage>
767GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {
768 switch (SchedStageID) {
770 return std::make_unique<OccInitialScheduleStage>(SchedStageID, *this);
772 return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *this);
774 return std::make_unique<ClusteredLowOccStage>(SchedStageID, *this);
776 return std::make_unique<PreRARematStage>(SchedStageID, *this);
778 return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *this);
780 return std::make_unique<MemoryClauseInitialScheduleStage>(SchedStageID,
781 *this);
782 }
783
784 llvm_unreachable("Unknown SchedStageID.");
785}
786
788 // Collect all scheduling regions. The actual scheduling is performed in
789 // GCNScheduleDAGMILive::finalizeSchedule.
790 Regions.push_back(std::pair(RegionBegin, RegionEnd));
791}
792
794GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
796 RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]);
797 return RPTracker.moveMaxPressure();
798}
799
801 MachineBasicBlock::iterator RegionEnd) {
802 auto REnd = RegionEnd == RegionBegin->getParent()->end()
803 ? std::prev(RegionEnd)
804 : RegionEnd;
805 return &*skipDebugInstructionsBackward(REnd, RegionBegin);
806}
807
808void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
809 const MachineBasicBlock *MBB) {
811
812 // If the block has the only successor then live-ins of that successor are
813 // live-outs of the current block. We can reuse calculated live set if the
814 // successor will be sent to scheduling past current block.
815
816 // However, due to the bug in LiveInterval analysis it may happen that two
817 // predecessors of the same successor block have different lane bitmasks for
818 // a live-out register. Workaround that by sticking to one-to-one relationship
819 // i.e. one predecessor with one successor block.
820 const MachineBasicBlock *OnlySucc = nullptr;
821 if (MBB->succ_size() == 1) {
822 auto *Candidate = *MBB->succ_begin();
823 if (!Candidate->empty() && Candidate->pred_size() == 1) {
825 if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(Candidate))
826 OnlySucc = Candidate;
827 }
828 }
829
830 // Scheduler sends regions from the end of the block upwards.
831 size_t CurRegion = RegionIdx;
832 for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)
833 if (Regions[CurRegion].first->getParent() != MBB)
834 break;
835 --CurRegion;
836
837 auto I = MBB->begin();
838 auto LiveInIt = MBBLiveIns.find(MBB);
839 auto &Rgn = Regions[CurRegion];
840 auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
841 if (LiveInIt != MBBLiveIns.end()) {
842 auto LiveIn = std::move(LiveInIt->second);
843 RPTracker.reset(*MBB->begin(), &LiveIn);
844 MBBLiveIns.erase(LiveInIt);
845 } else {
846 I = Rgn.first;
847 auto LRS = BBLiveInMap.lookup(NonDbgMI);
848#ifdef EXPENSIVE_CHECKS
849 assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
850#endif
851 RPTracker.reset(*I, &LRS);
852 }
853
854 for (;;) {
855 I = RPTracker.getNext();
856
857 if (Regions[CurRegion].first == I || NonDbgMI == I) {
858 LiveIns[CurRegion] = RPTracker.getLiveRegs();
859 RPTracker.clearMaxPressure();
860 }
861
862 if (Regions[CurRegion].second == I) {
863 Pressure[CurRegion] = RPTracker.moveMaxPressure();
864 if (CurRegion-- == RegionIdx)
865 break;
866 }
867 RPTracker.advanceToNext();
868 RPTracker.advanceBeforeNext();
869 }
870
871 if (OnlySucc) {
872 if (I != MBB->end()) {
873 RPTracker.advanceToNext();
875 }
876 RPTracker.advanceBeforeNext();
877 MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
878 }
879}
880
882GCNScheduleDAGMILive::getRegionLiveInMap() const {
883 assert(!Regions.empty());
884 std::vector<MachineInstr *> RegionFirstMIs;
885 RegionFirstMIs.reserve(Regions.size());
886 auto I = Regions.rbegin(), E = Regions.rend();
887 auto *BB = I->first->getParent();
888 do {
889 auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
890 RegionFirstMIs.push_back(MI);
891 do {
892 ++I;
893 } while (I != E && I->first->getParent() == BB);
894 } while (I != E);
895 return getLiveRegMap(RegionFirstMIs, /*After=*/false, *LIS);
896}
897
899GCNScheduleDAGMILive::getRegionLiveOutMap() const {
900 assert(!Regions.empty());
901 std::vector<MachineInstr *> RegionLastMIs;
902 RegionLastMIs.reserve(Regions.size());
903 for (auto &[RegionBegin, RegionEnd] : reverse(Regions))
904 RegionLastMIs.push_back(getLastMIForRegion(RegionBegin, RegionEnd));
905
906 return getLiveRegMap(RegionLastMIs, /*After=*/true, *LIS);
907}
908
910 IdxToInstruction.clear();
911
912 RegionLiveRegMap =
913 IsLiveOut ? DAG->getRegionLiveOutMap() : DAG->getRegionLiveInMap();
914 for (unsigned I = 0; I < DAG->Regions.size(); I++) {
915 MachineInstr *RegionKey =
916 IsLiveOut
917 ? getLastMIForRegion(DAG->Regions[I].first, DAG->Regions[I].second)
918 : &*DAG->Regions[I].first;
919 IdxToInstruction[I] = RegionKey;
920 }
921}
922
924 // Start actual scheduling here. This function is called by the base
925 // MachineScheduler after all regions have been recorded by
926 // GCNScheduleDAGMILive::schedule().
927 LiveIns.resize(Regions.size());
928 Pressure.resize(Regions.size());
929 RescheduleRegions.resize(Regions.size());
930 RegionsWithHighRP.resize(Regions.size());
931 RegionsWithExcessRP.resize(Regions.size());
932 RegionsWithMinOcc.resize(Regions.size());
933 RegionsWithIGLPInstrs.resize(Regions.size());
934 RescheduleRegions.set();
935 RegionsWithHighRP.reset();
936 RegionsWithExcessRP.reset();
937 RegionsWithMinOcc.reset();
938 RegionsWithIGLPInstrs.reset();
939
940 runSchedStages();
941}
942
943void GCNScheduleDAGMILive::runSchedStages() {
944 LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
945
946 if (!Regions.empty()) {
947 BBLiveInMap = getRegionLiveInMap();
948 if (GCNTrackers)
949 RegionLiveOuts.buildLiveRegMap();
950 }
951
952 GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
953 while (S.advanceStage()) {
954 auto Stage = createSchedStage(S.getCurrentStage());
955 if (!Stage->initGCNSchedStage())
956 continue;
957
958 for (auto Region : Regions) {
959 RegionBegin = Region.first;
960 RegionEnd = Region.second;
961 // Setup for scheduling the region and check whether it should be skipped.
962 if (!Stage->initGCNRegion()) {
963 Stage->advanceRegion();
964 exitRegion();
965 continue;
966 }
967
968 if (GCNTrackers) {
969 GCNDownwardRPTracker *DownwardTracker = S.getDownwardTracker();
970 GCNUpwardRPTracker *UpwardTracker = S.getUpwardTracker();
971 GCNRPTracker::LiveRegSet *RegionLiveIns =
972 &LiveIns[Stage->getRegionIdx()];
973
974 reinterpret_cast<GCNRPTracker *>(DownwardTracker)
975 ->reset(MRI, *RegionLiveIns);
976 reinterpret_cast<GCNRPTracker *>(UpwardTracker)
977 ->reset(MRI, RegionLiveOuts.getLiveRegsForRegionIdx(
978 Stage->getRegionIdx()));
979 }
980
982 Stage->finalizeGCNRegion();
983 }
984
985 Stage->finalizeGCNSchedStage();
986 }
987}
988
989#ifndef NDEBUG
991 switch (StageID) {
993 OS << "Max Occupancy Initial Schedule";
994 break;
996 OS << "Unclustered High Register Pressure Reschedule";
997 break;
999 OS << "Clustered Low Occupancy Reschedule";
1000 break;
1002 OS << "Pre-RA Rematerialize";
1003 break;
1005 OS << "Max ILP Initial Schedule";
1006 break;
1008 OS << "Max memory clause Initial Schedule";
1009 break;
1010 }
1011
1012 return OS;
1013}
1014#endif
1015
1017 : DAG(DAG), S(static_cast<GCNSchedStrategy &>(*DAG.SchedImpl)), MF(DAG.MF),
1018 MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {}
1019
1021 if (!DAG.LIS)
1022 return false;
1023
1024 LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID << "\n");
1025 return true;
1026}
1027
1030 return false;
1031
1033 return false;
1034
1035 if (DAG.RegionsWithHighRP.none() && DAG.RegionsWithExcessRP.none())
1036 return false;
1037
1041
1042 InitialOccupancy = DAG.MinOccupancy;
1043 // Aggressivly try to reduce register pressure in the unclustered high RP
1044 // stage. Temporarily increase occupancy target in the region.
1047 if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy)
1048 MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
1049
1050 LLVM_DEBUG(
1051 dbgs()
1052 << "Retrying function scheduling without clustering. "
1053 "Aggressivly try to reduce register pressure to achieve occupancy "
1054 << DAG.MinOccupancy << ".\n");
1055
1056 return true;
1057}
1058
1061 return false;
1062
1064 return false;
1065
1066 // Don't bother trying to improve ILP in lower RP regions if occupancy has not
1067 // been dropped. All regions will have already been scheduled with the ideal
1068 // occupancy targets.
1069 if (DAG.StartingOccupancy <= DAG.MinOccupancy)
1070 return false;
1071
1072 LLVM_DEBUG(
1073 dbgs() << "Retrying function scheduling with lowest recorded occupancy "
1074 << DAG.MinOccupancy << ".\n");
1075 return true;
1076}
1077
1080 return false;
1081
1082 if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1)
1083 return false;
1084
1086 // Check maximum occupancy
1088 DAG.MinOccupancy)
1089 return false;
1090
1091 // FIXME: This pass will invalidate cached MBBLiveIns for regions
1092 // inbetween the defs and region we sinked the def to. Cached pressure
1093 // for regions where a def is sinked from will also be invalidated. Will
1094 // need to be fixed if there is another pass after this pass.
1095 assert(!S.hasNextStage());
1096
1097 collectRematerializableInstructions();
1098 if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))
1099 return false;
1100
1101 LLVM_DEBUG(
1102 dbgs() << "Retrying function scheduling with improved occupancy of "
1103 << DAG.MinOccupancy << " from rematerializing\n");
1104 return true;
1105}
1106
1108 DAG.finishBlock();
1109 LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n");
1110}
1111
1115 if (DAG.MinOccupancy > InitialOccupancy) {
1116 for (unsigned IDX = 0; IDX < DAG.Pressure.size(); ++IDX)
1117 DAG.RegionsWithMinOcc[IDX] =
1118 DAG.Pressure[IDX].getOccupancy(DAG.ST) == DAG.MinOccupancy;
1119
1121 << " stage successfully increased occupancy to "
1122 << DAG.MinOccupancy << '\n');
1123 }
1124
1126}
1127
1129 // Check whether this new region is also a new block.
1130 if (DAG.RegionBegin->getParent() != CurrentMBB)
1131 setupNewBlock();
1132
1133 unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end());
1134 DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs);
1135
1136 // Skip empty scheduling regions (0 or 1 schedulable instructions).
1137 if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end()))
1138 return false;
1139
1140 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
1142 << " " << CurrentMBB->getName()
1143 << "\n From: " << *DAG.begin() << " To: ";
1145 else dbgs() << "End";
1146 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
1147
1148 // Save original instruction order before scheduling for possible revert.
1149 Unsched.clear();
1150 Unsched.reserve(DAG.NumRegionInstrs);
1153 for (auto &I : DAG) {
1154 Unsched.push_back(&I);
1155 if (I.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER ||
1156 I.getOpcode() == AMDGPU::IGLP_OPT)
1157 DAG.RegionsWithIGLPInstrs[RegionIdx] = true;
1158 }
1159 } else {
1160 for (auto &I : DAG)
1161 Unsched.push_back(&I);
1162 }
1163
1164 PressureBefore = DAG.Pressure[RegionIdx];
1165
1166 LLVM_DEBUG(
1167 dbgs() << "Pressure before scheduling:\nRegion live-ins:"
1168 << print(DAG.LiveIns[RegionIdx], DAG.MRI)
1169 << "Region live-in pressure: "
1171 << "Region register pressure: " << print(PressureBefore));
1172
1173 S.HasHighPressure = false;
1175
1176 if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&
1178 SavedMutations.clear();
1180 bool IsInitialStage = StageID == GCNSchedStageID::OccInitialSchedule ||
1183 IsInitialStage ? AMDGPU::SchedulingPhase::Initial
1185 }
1186
1187 return true;
1188}
1189
1191 // Only reschedule regions with the minimum occupancy or regions that may have
1192 // spilling (excess register pressure).
1193 if ((!DAG.RegionsWithMinOcc[RegionIdx] ||
1194 DAG.MinOccupancy <= InitialOccupancy) &&
1195 !DAG.RegionsWithExcessRP[RegionIdx])
1196 return false;
1197
1199}
1200
1202 // We may need to reschedule this region if it wasn't rescheduled in the last
1203 // stage, or if we found it was testing critical register pressure limits in
1204 // the unclustered reschedule stage. The later is because we may not have been
1205 // able to raise the min occupancy in the previous stage so the region may be
1206 // overly constrained even if it was already rescheduled.
1207 if (!DAG.RegionsWithHighRP[RegionIdx])
1208 return false;
1209
1211}
1212
1214 if (!DAG.RescheduleRegions[RegionIdx])
1215 return false;
1216
1218}
1219
1221 if (CurrentMBB)
1222 DAG.finishBlock();
1223
1224 CurrentMBB = DAG.RegionBegin->getParent();
1226 // Get real RP for the region if it hasn't be calculated before. After the
1227 // initial schedule stage real RP will be collected after scheduling.
1231 DAG.computeBlockPressure(RegionIdx, CurrentMBB);
1232}
1233
1235 DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
1236 DAG.RescheduleRegions[RegionIdx] = false;
1237 if (S.HasHighPressure)
1238 DAG.RegionsWithHighRP[RegionIdx] = true;
1239
1240 // Revert scheduling if we have dropped occupancy or there is some other
1241 // reason that the original schedule is better.
1243
1244 if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&
1247
1248 DAG.exitRegion();
1249 RegionIdx++;
1250}
1251
1253 // Check the results of scheduling.
1254 PressureAfter = DAG.getRealRegPressure(RegionIdx);
1255
1256 LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));
1257 LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
1258
1261 DAG.Pressure[RegionIdx] = PressureAfter;
1262 DAG.RegionsWithMinOcc[RegionIdx] =
1263 PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
1264
1265 // Early out if we have achieved the occupancy target.
1266 LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
1267 return;
1268 }
1269
1270 unsigned TargetOccupancy =
1272 unsigned WavesAfter =
1273 std::min(TargetOccupancy, PressureAfter.getOccupancy(ST));
1274 unsigned WavesBefore =
1275 std::min(TargetOccupancy, PressureBefore.getOccupancy(ST));
1276 LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
1277 << ", after " << WavesAfter << ".\n");
1278
1279 // We may not be able to keep the current target occupancy because of the just
1280 // scheduled region. We might still be able to revert scheduling if the
1281 // occupancy before was higher, or if the current schedule has register
1282 // pressure higher than the excess limits which could lead to more spilling.
1283 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
1284
1285 // Allow memory bound functions to drop to 4 waves if not limited by an
1286 // attribute.
1287 if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy &&
1288 WavesAfter >= MFI.getMinAllowedOccupancy()) {
1289 LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
1290 << MFI.getMinAllowedOccupancy() << " waves\n");
1291 NewOccupancy = WavesAfter;
1292 }
1293
1294 if (NewOccupancy < DAG.MinOccupancy) {
1295 DAG.MinOccupancy = NewOccupancy;
1296 MFI.limitOccupancy(DAG.MinOccupancy);
1297 DAG.RegionsWithMinOcc.reset();
1298 LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
1299 << DAG.MinOccupancy << ".\n");
1300 }
1301 // The maximum number of arch VGPR on non-unified register file, or the
1302 // maximum VGPR + AGPR in the unified register file case.
1303 unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
1304 // The maximum number of arch VGPR for both unified and non-unified register
1305 // file.
1306 unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
1307 unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
1308
1309 if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) > MaxVGPRs ||
1310 PressureAfter.getVGPRNum(false) > MaxArchVGPRs ||
1311 PressureAfter.getAGPRNum() > MaxArchVGPRs ||
1312 PressureAfter.getSGPRNum() > MaxSGPRs) {
1313 DAG.RescheduleRegions[RegionIdx] = true;
1314 DAG.RegionsWithHighRP[RegionIdx] = true;
1315 DAG.RegionsWithExcessRP[RegionIdx] = true;
1316 }
1317
1318 // Revert if this region's schedule would cause a drop in occupancy or
1319 // spilling.
1320 if (shouldRevertScheduling(WavesAfter)) {
1322 } else {
1323 DAG.Pressure[RegionIdx] = PressureAfter;
1324 DAG.RegionsWithMinOcc[RegionIdx] =
1325 PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
1326 }
1327}
1328
1329unsigned
1330GCNSchedStage::computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
1331 DenseMap<unsigned, unsigned> &ReadyCycles,
1332 const TargetSchedModel &SM) {
1333 unsigned ReadyCycle = CurrCycle;
1334 for (auto &D : SU.Preds) {
1335 if (D.isAssignedRegDep()) {
1336 MachineInstr *DefMI = D.getSUnit()->getInstr();
1337 unsigned Latency = SM.computeInstrLatency(DefMI);
1338 unsigned DefReady = ReadyCycles[DAG.getSUnit(DefMI)->NodeNum];
1339 ReadyCycle = std::max(ReadyCycle, DefReady + Latency);
1340 }
1341 }
1342 ReadyCycles[SU.NodeNum] = ReadyCycle;
1343 return ReadyCycle;
1344}
1345
1346#ifndef NDEBUG
1348 bool operator()(std::pair<MachineInstr *, unsigned> A,
1349 std::pair<MachineInstr *, unsigned> B) const {
1350 return A.second < B.second;
1351 }
1352};
1353
1354static void printScheduleModel(std::set<std::pair<MachineInstr *, unsigned>,
1355 EarlierIssuingCycle> &ReadyCycles) {
1356 if (ReadyCycles.empty())
1357 return;
1358 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
1359 dbgs() << "\n################## Schedule time ReadyCycles for MBB : " << BBNum
1360 << " ##################\n# Cycle #\t\t\tInstruction "
1361 " "
1362 " \n";
1363 unsigned IPrev = 1;
1364 for (auto &I : ReadyCycles) {
1365 if (I.second > IPrev + 1)
1366 dbgs() << "****************************** BUBBLE OF " << I.second - IPrev
1367 << " CYCLES DETECTED ******************************\n\n";
1368 dbgs() << "[ " << I.second << " ] : " << *I.first << "\n";
1369 IPrev = I.second;
1370 }
1371}
1372#endif
1373
1375GCNSchedStage::getScheduleMetrics(const std::vector<SUnit> &InputSchedule) {
1376#ifndef NDEBUG
1377 std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>
1378 ReadyCyclesSorted;
1379#endif
1381 unsigned SumBubbles = 0;
1382 DenseMap<unsigned, unsigned> ReadyCycles;
1383 unsigned CurrCycle = 0;
1384 for (auto &SU : InputSchedule) {
1385 unsigned ReadyCycle =
1386 computeSUnitReadyCycle(SU, CurrCycle, ReadyCycles, SM);
1387 SumBubbles += ReadyCycle - CurrCycle;
1388#ifndef NDEBUG
1389 ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));
1390#endif
1391 CurrCycle = ++ReadyCycle;
1392 }
1393#ifndef NDEBUG
1394 LLVM_DEBUG(
1395 printScheduleModel(ReadyCyclesSorted);
1396 dbgs() << "\n\t"
1397 << "Metric: "
1398 << (SumBubbles
1399 ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1400 : 1)
1401 << "\n\n");
1402#endif
1403
1404 return ScheduleMetrics(CurrCycle, SumBubbles);
1405}
1406
1409#ifndef NDEBUG
1410 std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>
1411 ReadyCyclesSorted;
1412#endif
1414 unsigned SumBubbles = 0;
1415 DenseMap<unsigned, unsigned> ReadyCycles;
1416 unsigned CurrCycle = 0;
1417 for (auto &MI : DAG) {
1418 SUnit *SU = DAG.getSUnit(&MI);
1419 if (!SU)
1420 continue;
1421 unsigned ReadyCycle =
1422 computeSUnitReadyCycle(*SU, CurrCycle, ReadyCycles, SM);
1423 SumBubbles += ReadyCycle - CurrCycle;
1424#ifndef NDEBUG
1425 ReadyCyclesSorted.insert(std::make_pair(SU->getInstr(), ReadyCycle));
1426#endif
1427 CurrCycle = ++ReadyCycle;
1428 }
1429#ifndef NDEBUG
1430 LLVM_DEBUG(
1431 printScheduleModel(ReadyCyclesSorted);
1432 dbgs() << "\n\t"
1433 << "Metric: "
1434 << (SumBubbles
1435 ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1436 : 1)
1437 << "\n\n");
1438#endif
1439
1440 return ScheduleMetrics(CurrCycle, SumBubbles);
1441}
1442
1443bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {
1444 if (WavesAfter < DAG.MinOccupancy)
1445 return true;
1446
1447 return false;
1448}
1449
1452 return false;
1453
1455 return true;
1456
1457 if (mayCauseSpilling(WavesAfter))
1458 return true;
1459
1460 return false;
1461}
1462
1464 // If RP is not reduced in the unclustered reschedule stage, revert to the
1465 // old schedule.
1466 if ((WavesAfter <= PressureBefore.getOccupancy(ST) &&
1467 mayCauseSpilling(WavesAfter)) ||
1469 LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
1470 return true;
1471 }
1472
1473 // Do not attempt to relax schedule even more if we are already spilling.
1475 return false;
1476
1477 LLVM_DEBUG(
1478 dbgs()
1479 << "\n\t *** In shouldRevertScheduling ***\n"
1480 << " *********** BEFORE UnclusteredHighRPStage ***********\n");
1481 ScheduleMetrics MBefore =
1483 LLVM_DEBUG(
1484 dbgs()
1485 << "\n *********** AFTER UnclusteredHighRPStage ***********\n");
1487 unsigned OldMetric = MBefore.getMetric();
1488 unsigned NewMetric = MAfter.getMetric();
1489 unsigned WavesBefore =
1491 unsigned Profit =
1492 ((WavesAfter * ScheduleMetrics::ScaleFactor) / WavesBefore *
1494 NewMetric) /
1496 LLVM_DEBUG(dbgs() << "\tMetric before " << MBefore << "\tMetric after "
1497 << MAfter << "Profit: " << Profit << "\n");
1498 return Profit < ScheduleMetrics::ScaleFactor;
1499}
1500
1503 return false;
1504
1506 return true;
1507
1508 if (mayCauseSpilling(WavesAfter))
1509 return true;
1510
1511 return false;
1512}
1513
1516 return true;
1517
1518 if (mayCauseSpilling(WavesAfter))
1519 return true;
1520
1521 return false;
1522}
1523
1525 if (mayCauseSpilling(WavesAfter))
1526 return true;
1527
1528 return false;
1529}
1530
1532 unsigned WavesAfter) {
1533 return mayCauseSpilling(WavesAfter);
1534}
1535
1536bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
1537 if (WavesAfter <= MFI.getMinWavesPerEU() && isRegionWithExcessRP() &&
1539 LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
1540 return true;
1541 }
1542
1543 return false;
1544}
1545
1547 DAG.RegionsWithMinOcc[RegionIdx] =
1548 PressureBefore.getOccupancy(ST) == DAG.MinOccupancy;
1549 LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
1550 DAG.RescheduleRegions[RegionIdx] =
1551 S.hasNextStage() &&
1554 int SkippedDebugInstr = 0;
1555 for (MachineInstr *MI : Unsched) {
1556 if (MI->isDebugInstr()) {
1557 ++SkippedDebugInstr;
1558 continue;
1559 }
1560
1561 if (MI->getIterator() != DAG.RegionEnd) {
1562 DAG.BB->remove(MI);
1564 if (!MI->isDebugInstr())
1565 DAG.LIS->handleMove(*MI, true);
1566 }
1567
1568 // Reset read-undef flags and update them later.
1569 for (auto &Op : MI->all_defs())
1570 Op.setIsUndef(false);
1571 RegisterOperands RegOpers;
1572 RegOpers.collect(*MI, *DAG.TRI, DAG.MRI, DAG.ShouldTrackLaneMasks, false);
1573 if (!MI->isDebugInstr()) {
1575 // Adjust liveness and add missing dead+read-undef flags.
1577 RegOpers.adjustLaneLiveness(*DAG.LIS, DAG.MRI, SlotIdx, MI);
1578 } else {
1579 // Adjust for missing dead-def flags.
1580 RegOpers.detectDeadDefs(*MI, *DAG.LIS);
1581 }
1582 }
1583 DAG.RegionEnd = MI->getIterator();
1584 ++DAG.RegionEnd;
1585 LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
1586 }
1587
1588 // After reverting schedule, debug instrs will now be at the end of the block
1589 // and RegionEnd will point to the first debug instr. Increment RegionEnd
1590 // pass debug instrs to the actual end of the scheduling region.
1591 while (SkippedDebugInstr-- > 0)
1592 ++DAG.RegionEnd;
1593
1594 // If Unsched.front() instruction is a debug instruction, this will actually
1595 // shrink the region since we moved all debug instructions to the end of the
1596 // block. Find the first instruction that is not a debug instruction.
1597 DAG.RegionBegin = Unsched.front()->getIterator();
1598 if (DAG.RegionBegin->isDebugInstr()) {
1599 for (MachineInstr *MI : Unsched) {
1600 if (MI->isDebugInstr())
1601 continue;
1602 DAG.RegionBegin = MI->getIterator();
1603 break;
1604 }
1605 }
1606
1607 // Then move the debug instructions back into their correct place and set
1608 // RegionBegin and RegionEnd if needed.
1610
1611 DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
1612}
1613
1614void PreRARematStage::collectRematerializableInstructions() {
1615 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
1616 for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
1618 if (!DAG.LIS->hasInterval(Reg))
1619 continue;
1620
1621 // TODO: Handle AGPR and SGPR rematerialization
1622 if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
1623 !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg))
1624 continue;
1625
1627 MachineInstr *Def = Op->getParent();
1628 if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
1629 continue;
1630
1632 if (Def->getParent() == UseI->getParent())
1633 continue;
1634
1635 // We are only collecting defs that are defined in another block and are
1636 // live-through or used inside regions at MinOccupancy. This means that the
1637 // register must be in the live-in set for the region.
1638 bool AddedToRematList = false;
1639 for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
1640 auto It = DAG.LiveIns[I].find(Reg);
1641 if (It != DAG.LiveIns[I].end() && !It->second.none()) {
1642 if (DAG.RegionsWithMinOcc[I]) {
1643 RematerializableInsts[I][Def] = UseI;
1644 AddedToRematList = true;
1645 }
1646
1647 // Collect regions with rematerializable reg as live-in to avoid
1648 // searching later when updating RP.
1649 RematDefToLiveInRegions[Def].push_back(I);
1650 }
1651 }
1652 if (!AddedToRematList)
1653 RematDefToLiveInRegions.erase(Def);
1654 }
1655}
1656
1657bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
1658 const TargetInstrInfo *TII) {
1659 // Temporary copies of cached variables we will be modifying and replacing if
1660 // sinking succeeds.
1662 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32>
1663 NewRegions;
1666 BitVector NewRescheduleRegions;
1667 LiveIntervals *LIS = DAG.LIS;
1668
1669 NewRegions.resize(DAG.Regions.size());
1670 NewRescheduleRegions.resize(DAG.Regions.size());
1671
1672 // Collect only regions that has a rematerializable def as a live-in.
1673 SmallSet<unsigned, 16> ImpactedRegions;
1674 for (const auto &It : RematDefToLiveInRegions)
1675 ImpactedRegions.insert(It.second.begin(), It.second.end());
1676
1677 // Make copies of register pressure and live-ins cache that will be updated
1678 // as we rematerialize.
1679 for (auto Idx : ImpactedRegions) {
1680 NewPressure[Idx] = DAG.Pressure[Idx];
1681 NewLiveIns[Idx] = DAG.LiveIns[Idx];
1682 }
1683 NewRegions = DAG.Regions;
1684 NewRescheduleRegions.reset();
1685
1687 bool Improved = false;
1688 for (auto I : ImpactedRegions) {
1689 if (!DAG.RegionsWithMinOcc[I])
1690 continue;
1691
1692 Improved = false;
1693 int VGPRUsage = NewPressure[I].getVGPRNum(ST.hasGFX90AInsts());
1694 int SGPRUsage = NewPressure[I].getSGPRNum();
1695
1696 // TODO: Handle occupancy drop due to AGPR and SGPR.
1697 // Check if cause of occupancy drop is due to VGPR usage and not SGPR.
1698 if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == DAG.MinOccupancy)
1699 break;
1700
1701 // The occupancy of this region could have been improved by a previous
1702 // iteration's sinking of defs.
1703 if (NewPressure[I].getOccupancy(ST) > DAG.MinOccupancy) {
1704 NewRescheduleRegions[I] = true;
1705 Improved = true;
1706 continue;
1707 }
1708
1709 // First check if we have enough trivially rematerializable instructions to
1710 // improve occupancy. Optimistically assume all instructions we are able to
1711 // sink decreased RP.
1712 int TotalSinkableRegs = 0;
1713 for (const auto &It : RematerializableInsts[I]) {
1714 MachineInstr *Def = It.first;
1715 Register DefReg = Def->getOperand(0).getReg();
1716 TotalSinkableRegs +=
1717 SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]);
1718 }
1719 int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
1720 unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink);
1721 // If in the most optimistic scenario, we cannot improve occupancy, then do
1722 // not attempt to sink any instructions.
1723 if (OptimisticOccupancy <= DAG.MinOccupancy)
1724 break;
1725
1726 unsigned ImproveOccupancy = 0;
1728 for (auto &It : RematerializableInsts[I]) {
1729 MachineInstr *Def = It.first;
1730 MachineBasicBlock::iterator InsertPos =
1731 MachineBasicBlock::iterator(It.second);
1732 Register Reg = Def->getOperand(0).getReg();
1733 // Rematerialize MI to its use block. Since we are only rematerializing
1734 // instructions that do not have any virtual reg uses, we do not need to
1735 // call LiveRangeEdit::allUsesAvailableAt() and
1736 // LiveRangeEdit::canRematerializeAt().
1737 TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
1738 Def->getOperand(0).getSubReg(), *Def, *DAG.TRI);
1739 MachineInstr *NewMI = &*std::prev(InsertPos);
1740 LIS->InsertMachineInstrInMaps(*NewMI);
1741 LIS->removeInterval(Reg);
1743 InsertedMIToOldDef[NewMI] = Def;
1744
1745 // Update region boundaries in scheduling region we sinked from since we
1746 // may sink an instruction that was at the beginning or end of its region
1747 DAG.updateRegionBoundaries(NewRegions, Def, /*NewMI =*/nullptr,
1748 /*Removing =*/true);
1749
1750 // Update region boundaries in region we sinked to.
1751 DAG.updateRegionBoundaries(NewRegions, InsertPos, NewMI);
1752
1753 LaneBitmask PrevMask = NewLiveIns[I][Reg];
1754 // FIXME: Also update cached pressure for where the def was sinked from.
1755 // Update RP for all regions that has this reg as a live-in and remove
1756 // the reg from all regions as a live-in.
1757 for (auto Idx : RematDefToLiveInRegions[Def]) {
1758 NewLiveIns[Idx].erase(Reg);
1759 if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) {
1760 // Def is live-through and not used in this block.
1761 NewPressure[Idx].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI);
1762 } else {
1763 // Def is used and rematerialized into this block.
1764 GCNDownwardRPTracker RPT(*LIS);
1765 auto *NonDbgMI = &*skipDebugInstructionsForward(
1766 NewRegions[Idx].first, NewRegions[Idx].second);
1767 RPT.reset(*NonDbgMI, &NewLiveIns[Idx]);
1768 RPT.advance(NewRegions[Idx].second);
1769 NewPressure[Idx] = RPT.moveMaxPressure();
1770 }
1771 }
1772
1773 SinkedDefs.push_back(Def);
1774 ImproveOccupancy = NewPressure[I].getOccupancy(ST);
1775 if (ImproveOccupancy > DAG.MinOccupancy)
1776 break;
1777 }
1778
1779 // Remove defs we just sinked from all regions' list of sinkable defs
1780 for (auto &Def : SinkedDefs)
1781 for (auto TrackedIdx : RematDefToLiveInRegions[Def])
1782 RematerializableInsts[TrackedIdx].erase(Def);
1783
1784 if (ImproveOccupancy <= DAG.MinOccupancy)
1785 break;
1786
1787 NewRescheduleRegions[I] = true;
1788 Improved = true;
1789 }
1790
1791 if (!Improved) {
1792 // Occupancy was not improved for all regions that were at MinOccupancy.
1793 // Undo sinking and remove newly rematerialized instructions.
1794 for (auto &Entry : InsertedMIToOldDef) {
1795 MachineInstr *MI = Entry.first;
1796 MachineInstr *OldMI = Entry.second;
1797 Register Reg = MI->getOperand(0).getReg();
1799 MI->eraseFromParent();
1800 OldMI->clearRegisterDeads(Reg);
1801 LIS->removeInterval(Reg);
1803 }
1804 return false;
1805 }
1806
1807 // Occupancy was improved for all regions.
1808 for (auto &Entry : InsertedMIToOldDef) {
1809 MachineInstr *MI = Entry.first;
1810 MachineInstr *OldMI = Entry.second;
1811
1812 // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
1813 DAG.BBLiveInMap.erase(OldMI);
1814
1815 // Remove OldMI and update LIS
1816 Register Reg = MI->getOperand(0).getReg();
1817 LIS->RemoveMachineInstrFromMaps(*OldMI);
1818 OldMI->eraseFromParent();
1819 LIS->removeInterval(Reg);
1821 }
1822
1823 // Update live-ins, register pressure, and regions caches.
1824 for (auto Idx : ImpactedRegions) {
1825 DAG.LiveIns[Idx] = NewLiveIns[Idx];
1826 DAG.Pressure[Idx] = NewPressure[Idx];
1827 DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent());
1828 }
1829 DAG.Regions = NewRegions;
1830 DAG.RescheduleRegions = NewRescheduleRegions;
1831
1832 if (GCNTrackers)
1833 DAG.RegionLiveOuts.buildLiveRegMap();
1834
1836 MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
1837
1838 return true;
1839}
1840
1841// Copied from MachineLICM
1842bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
1844 return false;
1845
1846 for (const MachineOperand &MO : MI.all_uses())
1847 if (MO.getReg().isVirtual())
1848 return false;
1849
1850 return true;
1851}
1852
1853// When removing, we will have to check both beginning and ending of the region.
1854// When inserting, we will only have to check if we are inserting NewMI in front
1855// of a scheduling region and do not need to check the ending since we will only
1856// ever be inserting before an already existing MI.
1857void GCNScheduleDAGMILive::updateRegionBoundaries(
1859 MachineBasicBlock::iterator>> &RegionBoundaries,
1860 MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing) {
1861 unsigned I = 0, E = RegionBoundaries.size();
1862 // Search for first region of the block where MI is located
1863 while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent())
1864 ++I;
1865
1866 for (; I != E; ++I) {
1867 if (MI->getParent() != RegionBoundaries[I].first->getParent())
1868 return;
1869
1870 if (Removing && MI == RegionBoundaries[I].first &&
1871 MI == RegionBoundaries[I].second) {
1872 // MI is in a region with size 1, after removing, the region will be
1873 // size 0, set RegionBegin and RegionEnd to pass end of block iterator.
1874 RegionBoundaries[I] =
1875 std::pair(MI->getParent()->end(), MI->getParent()->end());
1876 return;
1877 }
1878 if (MI == RegionBoundaries[I].first) {
1879 if (Removing)
1880 RegionBoundaries[I] =
1881 std::pair(std::next(MI), RegionBoundaries[I].second);
1882 else
1883 // Inserted NewMI in front of region, set new RegionBegin to NewMI
1884 RegionBoundaries[I] = std::pair(MachineBasicBlock::iterator(NewMI),
1885 RegionBoundaries[I].second);
1886 return;
1887 }
1888 if (Removing && MI == RegionBoundaries[I].second) {
1889 RegionBoundaries[I] = std::pair(RegionBoundaries[I].first, std::prev(MI));
1890 return;
1891 }
1892 }
1893}
1894
1896 return any_of(*DAG, [](MachineBasicBlock::iterator MI) {
1897 unsigned Opc = MI->getOpcode();
1898 return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT;
1899 });
1900}
1901
1903 MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
1904 bool RemoveKillFlags)
1905 : ScheduleDAGMI(C, std::move(S), RemoveKillFlags) {}
1906
1908 HasIGLPInstrs = hasIGLPInstrs(this);
1909 if (HasIGLPInstrs) {
1910 SavedMutations.clear();
1911 SavedMutations.swap(Mutations);
1913 }
1914
1916}
1917
1919 if (HasIGLPInstrs)
1920 SavedMutations.swap(Mutations);
1921
1923}
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
static cl::opt< bool > GCNTrackers("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false))
static cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))
static MachineInstr * getLastMIForRegion(MachineBasicBlock::iterator RegionBegin, MachineBasicBlock::iterator RegionEnd)
static cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))
static cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))
static void printScheduleModel(std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG)
static bool canUsePressureDiffs(const SUnit &SU)
Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.
static void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)
static cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
if(PassOpts->AAPipeline)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
BitVector & reset()
Definition: BitVector.h:392
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341
BitVector & set()
Definition: BitVector.h:351
bool none() const
none - Returns true if none of the bits are set.
Definition: BitVector.h:188
bool shouldRevertScheduling(unsigned WavesAfter) override
This class represents an Operation in the Expression.
bool erase(const KeyT &Val)
Definition: DenseMap.h:321
bool advance(MachineInstr *MI=nullptr, bool UseInternalIterator=true)
Move to the state at the next MI.
GCNRegPressure bumpDownwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI) const
Mostly copy/paste from CodeGen/RegisterPressure.cpp Calculate the impact MI will have on CurPressure ...
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
GCNRegPressure getPressure() const
virtual bool initGCNRegion()
GCNSchedStrategy & S
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
MachineFunction & MF
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
MachineBasicBlock * CurrentMBB
const GCNSubtarget & ST
This is a minimal scheduler strategy.
const unsigned HighRPSGPRBias
GCNDownwardRPTracker DownwardTracker
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
SUnit * pickNodeBidirectional(bool &IsTopNode)
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool IsBottomUp)
std::vector< unsigned > MaxPressure
GCNSchedStageID getCurrentStage()
MachineFunction * MF
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
GCNDownwardRPTracker * getDownwardTracker()
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
GCNUpwardRPTracker UpwardTracker
const unsigned HighRPVGPRBias
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
GCNUpwardRPTracker * getUpwardTracker()
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
unsigned getAddressableNumArchVGPRs() const
bool hasGFX90AInsts() const
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:279
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
void recede(const MachineInstr &MI)
Move to the state of RP just before the MI .
void traceCandidate(const SchedCandidate &Cand)
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
MachineSchedPolicy RegionPolicy
const TargetSchedModel * SchedModel
const MachineSchedContext * Context
const TargetRegisterInfo * TRI
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
SchedCandidate BotCand
Candidate last picked from Bot boundary.
SchedCandidate TopCand
Candidate last picked from Top boundary.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
ScheduleDAGMILive * DAG
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
bool shouldRevertScheduling(unsigned WavesAfter) override
bool hasInterval(Register Reg) const
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndexes * getSlotIndexes() const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
void removeInterval(Register Reg)
Interval removal.
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
unsigned succ_size() const
MachineInstrBundleIterator< MachineInstr > iterator
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:691
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void clearRegisterDeads(Register Reg)
Clear all dead flags on operands defining register Reg.
MachineOperand class - Representation of each machine instruction operand.
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineOperand * getOneDef(Register Reg) const
Returns the defining operand if there is exactly one operand defining the specified register,...
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
bool hasOneDef(Register RegNo) const
Return true if there is exactly one operand defining the specified register.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
bool initGCNSchedStage() override
Capture a change in pressure for a single pressure set.
void setUnitInc(int Inc)
Helpers for implementing custom MachineSchedStrategy classes.
bool empty() const
Track the current register pressure at some position in the instruction stream, and remember the high...
void advance()
Advance across the current instruction.
void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
GCNRPTracker::LiveRegSet & getLiveRegsForRegionIdx(unsigned RegionIdx)
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
const TargetSchedModel & getSchedModel() const
Definition: SIInstrInfo.h:1447
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
void increaseOccupancy(const MachineFunction &MF, unsigned Limit)
void limitOccupancy(const MachineFunction &MF)
static unsigned getNumCoveredRegs(LaneBitmask LM)
static bool isVGPRClass(const TargetRegisterClass *RC)
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
Definition: ScheduleDAG.h:378
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:270
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:303
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:296
bool isBottomReady() const
Definition: ScheduleDAG.h:467
bool isTopReady() const
Definition: ScheduleDAG.h:464
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:262
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:390
Each Scheduling boundary is associated with ready queues.
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
ScheduleDAGMI * DAG
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
A ScheduleDAG for scheduling lists of MachineInstr.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
MachineBasicBlock * BB
The block in which to insert instructions.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
PressureDiff & getPressureDiff(const SUnit *SU)
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
const RegPressureTracker & getBotRPTracker() const
bool isTrackingPressure() const
Return true if register pressure tracking is enabled.
const RegPressureTracker & getTopRPTracker() const
RegPressureTracker RPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
MachineBasicBlock::iterator top() const
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
MachineBasicBlock::iterator bottom() const
void finishBlock() override
Cleans up after scheduling in the given block.
LiveIntervals * LIS
const SUnit * getNextClusterPred() const
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
const SUnit * getNextClusterSucc() const
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:578
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:575
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:579
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:576
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:577
static const unsigned ScaleFactor
unsigned getMetric() const
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:237
SlotIndexes pass.
Definition: SlotIndexes.h:297
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition: SlotIndexes.h:460
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
TargetInstrInfo - Interface to description of machine instruction set.
bool isTriviallyReMaterializable(const MachineInstr &MI) const
Return true if the instruction is trivially rematerializable, meaning it has no side effects and requ...
Provide an instruction scheduling machine model to CodeGen passes.
virtual const TargetInstrInfo * getInstrInfo() const
bool shouldRevertScheduling(unsigned WavesAfter) override
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
@ Entry
Definition: COFF.h:844
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
unsigned getWeakLeft(const SUnit *SU, bool isTop)
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)
Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:555
cl::opt< bool > VerifyScheduling
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:303
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1873
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
bool operator()(std::pair< MachineInstr *, unsigned > A, std::pair< MachineInstr *, unsigned > B) const
unsigned getOccupancy(const GCNSubtarget &ST) const
unsigned getVGPRNum(bool UnifiedVGPRFile) const
unsigned getArchVGPRNum() const
unsigned getAGPRNum() const
unsigned getSGPRNum() const
bool less(const MachineFunction &MF, const GCNRegPressure &O, unsigned MaxOccupancy=std::numeric_limits< unsigned >::max()) const
Compares this GCNRegpressure to O, returning true if this is less.
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void reset(const CandPolicy &NewPolicy)
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Status of an instruction's critical resource consumption.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:81
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
RegisterClassInfo * RegClassInfo
PressureChange CriticalMax