LLVM 22.0.0git
GCNSchedStrategy.h
Go to the documentation of this file.
1//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
14#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
15
16#include "GCNRegPressure.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/MapVector.h"
21
22namespace llvm {
23
25class SIRegisterInfo;
26class GCNSubtarget;
27class GCNSchedStage;
28
37
38#ifndef NDEBUG
39raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
40#endif
41
42/// This is a minimal scheduler strategy. The main difference between this
43/// and the GenericScheduler is that GCNSchedStrategy uses different
44/// heuristics to determine excess/critical pressure sets.
46protected:
47 SUnit *pickNodeBidirectional(bool &IsTopNode, bool &PickedPending);
48
49 void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
50 const RegPressureTracker &RPTracker,
51 SchedCandidate &Cand, bool &IsPending,
52 bool IsBottomUp);
53
54 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
55 const RegPressureTracker &RPTracker,
56 const SIRegisterInfo *SRI, unsigned SGPRPressure,
57 unsigned VGPRPressure, bool IsBottomUp);
58
59 /// Evaluates instructions in the pending queue using a subset of scheduling
60 /// heuristics.
61 ///
62 /// Instructions that cannot be issued due to hardware constraints are placed
63 /// in the pending queue rather than the available queue, making them normally
64 /// invisible to scheduling heuristics. However, in certain scenarios (such as
65 /// avoiding register spilling), it may be beneficial to consider scheduling
66 /// these not-yet-ready instructions.
68 SchedBoundary *Zone) const;
69
70 void printCandidateDecision(const SchedCandidate &Current,
71 const SchedCandidate &Preferred);
72
73 std::vector<unsigned> Pressure;
74
75 std::vector<unsigned> MaxPressure;
76
78
80
82
84
85 // Scheduling stages for this strategy.
87
88 // Pointer to the current SchedStageID.
90
91 // GCN RP Tracker for top-down scheduling
93
94 // GCN RP Tracker for botttom-up scheduling
96
97public:
98 // schedule() have seen register pressure over the critical limits and had to
99 // track register pressure for actual scheduling heuristics.
101
102 // Schedule known to have excess register pressure. Be more conservative in
103 // increasing ILP and preserving VGPRs.
104 bool KnownExcessRP = false;
105
106 // An error margin is necessary because of poor performance of the generic RP
107 // tracker and can be adjusted up for tuning heuristics to try and more
108 // aggressively reduce register pressure.
109 unsigned ErrorMargin = 3;
110
111 // Bias for SGPR limits under a high register pressure.
112 const unsigned HighRPSGPRBias = 7;
113
114 // Bias for VGPR limits under a high register pressure.
115 const unsigned HighRPVGPRBias = 7;
116
118
120
121 unsigned SGPRLimitBias = 0;
122
123 unsigned VGPRLimitBias = 0;
124
126
127 SUnit *pickNode(bool &IsTopNode) override;
128
129 void schedNode(SUnit *SU, bool IsTopNode) override;
130
131 void initialize(ScheduleDAGMI *DAG) override;
132
133 unsigned getTargetOccupancy() { return TargetOccupancy; }
134
135 void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
136
138
139 // Advances stage. Returns true if there are remaining stages.
140 bool advanceStage();
141
142 bool hasNextStage() const;
143
145
147
149};
150
151/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
152/// maximum number of waves per simd).
154public:
156 bool IsLegacyScheduler = false);
157};
158
159/// The goal of this scheduling strategy is to maximize ILP for a single wave
160/// (i.e. latency hiding).
162protected:
163 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
164 SchedBoundary *Zone) const override;
165
166public:
168};
169
170/// The goal of this scheduling strategy is to maximize memory clause for a
171/// single wave.
173protected:
174 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
175 SchedBoundary *Zone) const override;
176
177public:
179};
180
182 unsigned ScheduleLength;
183 unsigned BubbleCycles;
184
185public:
187 ScheduleMetrics(unsigned L, unsigned BC)
188 : ScheduleLength(L), BubbleCycles(BC) {}
189 unsigned getLength() const { return ScheduleLength; }
190 unsigned getBubbles() const { return BubbleCycles; }
191 unsigned getMetric() const {
192 unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength;
193 // Metric is zero if the amount of bubbles is less than 1% which is too
194 // small. So, return 1.
195 return Metric ? Metric : 1;
196 }
197 static const unsigned ScaleFactor;
198};
199
201 dbgs() << "\n Schedule Metric (scaled by "
203 << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
204 << Sm.getLength() << " ]\n";
205 return OS;
206}
207
208class GCNScheduleDAGMILive;
211 // The live in/out pressure as indexed by the first or last MI in the region
212 // before scheduling.
214 // The mapping of RegionIDx to key instruction
215 DenseMap<unsigned, MachineInstr *> IdxToInstruction;
216 // Whether we are calculating LiveOuts or LiveIns
217 bool IsLiveOut;
218
219public:
222 : DAG(GCNDAG), IsLiveOut(LiveOut) {}
223 // Build the Instr->LiveReg and RegionIdx->Instr maps
224 void buildLiveRegMap();
225
226 // Retrieve the LiveReg for a given RegionIdx
228 assert(IdxToInstruction.contains(RegionIdx));
229 MachineInstr *Key = IdxToInstruction[RegionIdx];
230 return RegionLiveRegMap[Key];
231 }
232};
233
234/// A region's boundaries i.e. a pair of instruction bundle iterators. The lower
235/// boundary is inclusive, the upper boundary is exclusive.
237 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>;
238
240 friend class GCNSchedStage;
244 friend class PreRARematStage;
246 friend class RegionPressureMap;
247
248 const GCNSubtarget &ST;
249
251
252 // Occupancy target at the beginning of function scheduling cycle.
253 unsigned StartingOccupancy;
254
255 // Minimal real occupancy recorder for the function.
256 unsigned MinOccupancy;
257
258 // Vector of regions recorder for later rescheduling
260
261 // Record regions with high register pressure.
262 BitVector RegionsWithHighRP;
263
264 // Record regions with excess register pressure over the physical register
265 // limit. Register pressure in these regions usually will result in spilling.
266 BitVector RegionsWithExcessRP;
267
268 // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
269 BitVector RegionsWithIGLPInstrs;
270
271 // Region live-in cache.
273
274 // Region pressure cache.
276
277 // Temporary basic block live-in cache.
279
280 // The map of the initial first region instruction to region live in registers
282
283 // Calculate the map of the initial first region instruction to region live in
284 // registers
286
287 // Calculate the map of the initial last region instruction to region live out
288 // registers
290 getRegionLiveOutMap() const;
291
292 // The live out registers per region. These are internally stored as a map of
293 // the initial last region instruction to region live out registers, but can
294 // be retreived with the regionIdx by calls to getLiveRegsForRegionIdx.
295 RegionPressureMap RegionLiveOuts;
296
297 // Return current region pressure.
298 GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
299
300 // Compute and cache live-ins and pressure for all regions in block.
301 void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
302
303 /// If necessary, updates a region's boundaries following insertion ( \p NewMI
304 /// != nullptr) or removal ( \p NewMI == nullptr) of a \p MI in the region.
305 /// For an MI removal, this must be called before the MI is actually erased
306 /// from its parent MBB.
307 void updateRegionBoundaries(RegionBoundaries &RegionBounds,
309 MachineInstr *NewMI);
310
311 void runSchedStages();
312
313 std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
314
315public:
317 std::unique_ptr<MachineSchedStrategy> S);
318
319 void schedule() override;
320
321 void finalizeSchedule() override;
322};
323
324// GCNSchedStrategy applies multiple scheduling stages to a function.
326protected:
328
330
332
334
336
338
339 // The current block being scheduled.
341
342 // Current region index.
343 unsigned RegionIdx = 0;
344
345 // Record the original order of instructions before scheduling.
346 std::vector<MachineInstr *> Unsched;
347
348 // RP before scheduling the current region.
350
351 // RP after scheduling the current region.
353
354 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
355
357
358public:
359 // Initialize state for a scheduling stage. Returns false if the current stage
360 // should be skipped.
361 virtual bool initGCNSchedStage();
362
363 // Finalize state after finishing a scheduling pass on the function.
364 virtual void finalizeGCNSchedStage();
365
366 // Setup for scheduling a region. Returns false if the current region should
367 // be skipped.
368 virtual bool initGCNRegion();
369
370 // Track whether a new region is also a new MBB.
371 void setupNewBlock();
372
373 // Finalize state after scheudling a region.
374 void finalizeGCNRegion();
375
376 // Check result of scheduling.
377 void checkScheduling();
378
379 // computes the given schedule virtual execution time in clocks
380 ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule);
382 unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
383 DenseMap<unsigned, unsigned> &ReadyCycles,
384 const TargetSchedModel &SM);
385
386 // Returns true if scheduling should be reverted.
387 virtual bool shouldRevertScheduling(unsigned WavesAfter);
388
389 // Returns true if current region has known excess pressure.
390 bool isRegionWithExcessRP() const {
391 return DAG.RegionsWithExcessRP[RegionIdx];
392 }
393
394 // The region number this stage is currently working on
395 unsigned getRegionIdx() { return RegionIdx; }
396
397 // Returns true if the new schedule may result in more spilling.
398 bool mayCauseSpilling(unsigned WavesAfter);
399
400 // Attempt to revert scheduling for this region.
401 void revertScheduling();
402
404
405 virtual ~GCNSchedStage() = default;
406};
407
415
417private:
418 // Save the initial occupancy before starting this stage.
419 unsigned InitialOccupancy;
420
421public:
422 bool initGCNSchedStage() override;
423
424 void finalizeGCNSchedStage() override;
425
426 bool initGCNRegion() override;
427
428 bool shouldRevertScheduling(unsigned WavesAfter) override;
429
432};
433
434// Retry function scheduling if we found resulting occupancy and it is
435// lower than used for other scheduling passes. This will give more freedom
436// to schedule low register pressure blocks.
438public:
439 bool initGCNSchedStage() override;
440
441 bool initGCNRegion() override;
442
443 bool shouldRevertScheduling(unsigned WavesAfter) override;
444
447};
448
449/// Attempts to reduce function spilling or, if there is no spilling, to
450/// increase function occupancy by one with respect to ArchVGPR usage by sinking
451/// rematerializable instructions to their use. When the stage
452/// estimates reducing spilling or increasing occupancy is possible, as few
453/// instructions as possible are rematerialized to reduce potential negative
454/// effects on function latency.
456private:
457 /// Useful information about a rematerializable instruction.
458 struct RematInstruction {
459 /// Single use of the rematerializable instruction's defined register,
460 /// located in a different block.
462 /// Rematerialized version of \p DefMI, set in
463 /// PreRARematStage::rematerialize. Used for reverting rematerializations.
464 MachineInstr *RematMI;
465 /// Set of regions in which the rematerializable instruction's defined
466 /// register is a live-in.
467 SmallDenseSet<unsigned, 4> LiveInRegions;
468
469 RematInstruction(MachineInstr *UseMI) : UseMI(UseMI) {}
470 };
471
472 /// Maps all MIs to their parent region. MI terminators are considered to be
473 /// outside the region they delimitate, and as such are not stored in the map.
475 /// Parent MBB to each region, in region order.
477 /// Collects instructions to rematerialize.
479 /// Collects regions whose live-ins or register pressure will change due to
480 /// rematerializations.
482 /// In case we need to rollback rematerializations, save lane masks for all
483 /// rematerialized registers in all regions in which they are live-ins.
485 /// After successful stage initialization, indicates which regions should be
486 /// rescheduled.
487 BitVector RescheduleRegions;
488 /// The target occupancy the stage is trying to achieve. Empty when the
489 /// objective is spilling reduction.
490 std::optional<unsigned> TargetOcc;
491 /// Achieved occupancy *only* through rematerializations (pre-rescheduling).
492 /// Smaller than or equal to the target occupancy.
493 unsigned AchievedOcc;
494
495 /// Returns whether remat can reduce spilling or increase function occupancy
496 /// by 1 through rematerialization. If it can do one, collects instructions in
497 /// PreRARematStage::Rematerializations and sets the target occupancy in
498 /// PreRARematStage::TargetOccupancy.
499 bool canIncreaseOccupancyOrReduceSpill();
500
501 /// Whether the MI is rematerializable
502 bool isReMaterializable(const MachineInstr &MI);
503
504 /// Rematerializes all instructions in PreRARematStage::Rematerializations
505 /// and stores the achieved occupancy after remat in
506 /// PreRARematStage::AchievedOcc.
507 void rematerialize();
508
509 /// If remat alone did not increase occupancy to the target one, rollbacks all
510 /// rematerializations and resets live-ins/RP in all regions impacted by the
511 /// stage to their pre-stage values.
512 void finalizeGCNSchedStage() override;
513
514public:
515 bool initGCNSchedStage() override;
516
517 bool initGCNRegion() override;
518
519 bool shouldRevertScheduling(unsigned WavesAfter) override;
520
523};
524
532
541
543private:
544 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
545
546 bool HasIGLPInstrs = false;
547
548public:
549 void schedule() override;
550
551 void finalizeSchedule() override;
552
554 std::unique_ptr<MachineSchedStrategy> S,
555 bool RemoveKillFlags);
556};
557
558} // End namespace llvm
559
560#endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
MachineInstrBuilder & UseMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
This file defines the DenseMap class.
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
IRTranslator LLVM IR MI
This file implements a map that provides insertion order iteration.
bool shouldRevertScheduling(unsigned WavesAfter) override
ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
DenseMap< unsigned, LaneBitmask > LiveRegSet
GCNSchedStrategy & S
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
MachineFunction & MF
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual ~GCNSchedStage()=default
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
MachineBasicBlock * CurrentMBB
const GCNSubtarget & ST
This is a minimal scheduler strategy.
const unsigned HighRPSGPRBias
GCNDownwardRPTracker DownwardTracker
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
std::vector< unsigned > MaxPressure
SUnit * pickNodeBidirectional(bool &IsTopNode, bool &PickedPending)
GCNSchedStageID getCurrentStage()
bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Evaluates instructions in the pending queue using a subset of scheduling heuristics.
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
GCNDownwardRPTracker * getDownwardTracker()
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
GCNUpwardRPTracker UpwardTracker
void printCandidateDecision(const SchedCandidate &Current, const SchedCandidate &Preferred)
const unsigned HighRPVGPRBias
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool &IsPending, bool IsBottomUp)
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
void setTargetOccupancy(unsigned Occ)
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
GCNUpwardRPTracker * getUpwardTracker()
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
ScheduleDAGMILive * DAG
GenericScheduler(const MachineSchedContext *C)
bool shouldRevertScheduling(unsigned WavesAfter) override
ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
MachineInstrBundleIterator< MachineInstr > iterator
Representation of each machine instruction.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
bool shouldRevertScheduling(unsigned WavesAfter) override
MemoryClauseInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNSchedStage() override
Track the current register pressure at some position in the instruction stream, and remember the high...
GCNRPTracker::LiveRegSet & getLiveRegsForRegionIdx(unsigned RegionIdx)
RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Scheduling unit. This is a node in the scheduling DAG.
Each Scheduling boundary is associated with ready queues.
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
unsigned getBubbles() const
ScheduleMetrics(unsigned L, unsigned BC)
unsigned getLength() const
static const unsigned ScaleFactor
unsigned getMetric() const
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition DenseSet.h:291
typename SuperClass::iterator iterator
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provide an instruction scheduling machine model to CodeGen passes.
UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > RegionBoundaries
A region's boundaries i.e.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...