LLVM 20.0.0git
GCNSchedStrategy.h
Go to the documentation of this file.
1//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
14#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
15
16#include "GCNRegPressure.h"
17#include "llvm/ADT/MapVector.h"
19
20namespace llvm {
21
22class SIMachineFunctionInfo;
23class SIRegisterInfo;
24class GCNSubtarget;
25class GCNSchedStage;
26
27enum class GCNSchedStageID : unsigned {
34};
35
36#ifndef NDEBUG
37raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
38#endif
39
40/// This is a minimal scheduler strategy. The main difference between this
41/// and the GenericScheduler is that GCNSchedStrategy uses different
42/// heuristics to determine excess/critical pressure sets.
44protected:
45 SUnit *pickNodeBidirectional(bool &IsTopNode);
46
47 void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
48 const RegPressureTracker &RPTracker,
49 SchedCandidate &Cand, bool IsBottomUp);
50
51 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
52 const RegPressureTracker &RPTracker,
53 const SIRegisterInfo *SRI, unsigned SGPRPressure,
54 unsigned VGPRPressure, bool IsBottomUp);
55
56 std::vector<unsigned> Pressure;
57
58 std::vector<unsigned> MaxPressure;
59
61
63
65
67
68 // Scheduling stages for this strategy.
70
71 // Pointer to the current SchedStageID.
73
74 // GCN RP Tracker for top-down scheduling
76
77 // GCN RP Tracker for botttom-up scheduling
79
80public:
81 // schedule() have seen register pressure over the critical limits and had to
82 // track register pressure for actual scheduling heuristics.
84
85 // Schedule known to have excess register pressure. Be more conservative in
86 // increasing ILP and preserving VGPRs.
87 bool KnownExcessRP = false;
88
89 // An error margin is necessary because of poor performance of the generic RP
90 // tracker and can be adjusted up for tuning heuristics to try and more
91 // aggressively reduce register pressure.
92 unsigned ErrorMargin = 3;
93
94 // Bias for SGPR limits under a high register pressure.
95 const unsigned HighRPSGPRBias = 7;
96
97 // Bias for VGPR limits under a high register pressure.
98 const unsigned HighRPVGPRBias = 7;
99
101
103
104 unsigned SGPRLimitBias = 0;
105
106 unsigned VGPRLimitBias = 0;
107
109
110 SUnit *pickNode(bool &IsTopNode) override;
111
112 void schedNode(SUnit *SU, bool IsTopNode) override;
113
114 void initialize(ScheduleDAGMI *DAG) override;
115
116 unsigned getTargetOccupancy() { return TargetOccupancy; }
117
118 void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
119
121
122 // Advances stage. Returns true if there are remaining stages.
123 bool advanceStage();
124
125 bool hasNextStage() const;
126
128
130
132};
133
134/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
135/// maximum number of waves per simd).
137public:
139 bool IsLegacyScheduler = false);
140};
141
142/// The goal of this scheduling strategy is to maximize ILP for a single wave
143/// (i.e. latency hiding).
145protected:
146 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
147 SchedBoundary *Zone) const override;
148
149public:
151};
152
153/// The goal of this scheduling strategy is to maximize memory clause for a
154/// single wave.
156protected:
157 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
158 SchedBoundary *Zone) const override;
159
160public:
162};
163
165 unsigned ScheduleLength;
166 unsigned BubbleCycles;
167
168public:
170 ScheduleMetrics(unsigned L, unsigned BC)
171 : ScheduleLength(L), BubbleCycles(BC) {}
172 unsigned getLength() const { return ScheduleLength; }
173 unsigned getBubbles() const { return BubbleCycles; }
174 unsigned getMetric() const {
175 unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength;
176 // Metric is zero if the amount of bubbles is less than 1% which is too
177 // small. So, return 1.
178 return Metric ? Metric : 1;
179 }
180 static const unsigned ScaleFactor;
181};
182
184 dbgs() << "\n Schedule Metric (scaled by "
186 << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
187 << Sm.getLength() << " ]\n";
188 return OS;
189}
190
191class GCNScheduleDAGMILive;
194 // The live in/out pressure as indexed by the first or last MI in the region
195 // before scheduling.
197 // The mapping of RegionIDx to key instruction
198 DenseMap<unsigned, MachineInstr *> IdxToInstruction;
199 // Whether we are calculating LiveOuts or LiveIns
200 bool IsLiveOut;
201
202public:
205 : DAG(GCNDAG), IsLiveOut(LiveOut) {}
206 // Build the Instr->LiveReg and RegionIdx->Instr maps
207 void buildLiveRegMap();
208
209 // Retrieve the LiveReg for a given RegionIdx
211 assert(IdxToInstruction.find(RegionIdx) != IdxToInstruction.end());
212 MachineInstr *Key = IdxToInstruction[RegionIdx];
213 return RegionLiveRegMap[Key];
214 }
215};
216
218 friend class GCNSchedStage;
222 friend class PreRARematStage;
224 friend class RegionPressureMap;
225
226 const GCNSubtarget &ST;
227
229
230 // Occupancy target at the beginning of function scheduling cycle.
231 unsigned StartingOccupancy;
232
233 // Minimal real occupancy recorder for the function.
234 unsigned MinOccupancy;
235
236 // Vector of regions recorder for later rescheduling
238 MachineBasicBlock::iterator>, 32> Regions;
239
240 // Records if a region is not yet scheduled, or schedule has been reverted,
241 // or we generally desire to reschedule it.
242 BitVector RescheduleRegions;
243
244 // Record regions with high register pressure.
245 BitVector RegionsWithHighRP;
246
247 // Record regions with excess register pressure over the physical register
248 // limit. Register pressure in these regions usually will result in spilling.
249 BitVector RegionsWithExcessRP;
250
251 // Regions that has the same occupancy as the latest MinOccupancy
252 BitVector RegionsWithMinOcc;
253
254 // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
255 BitVector RegionsWithIGLPInstrs;
256
257 // Region live-in cache.
259
260 // Region pressure cache.
262
263 // Temporary basic block live-in cache.
265
266 // The map of the initial first region instruction to region live in registers
268
269 // Calculate the map of the initial first region instruction to region live in
270 // registers
272
273 // Calculate the map of the initial last region instruction to region live out
274 // registers
276 getRegionLiveOutMap() const;
277
278 // The live out registers per region. These are internally stored as a map of
279 // the initial last region instruction to region live out registers, but can
280 // be retreived with the regionIdx by calls to getLiveRegsForRegionIdx.
281 RegionPressureMap RegionLiveOuts;
282
283 // Return current region pressure.
284 GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
285
286 // Compute and cache live-ins and pressure for all regions in block.
287 void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
288
289 // Update region boundaries when removing MI or inserting NewMI before MI.
290 void updateRegionBoundaries(
292 MachineBasicBlock::iterator>> &RegionBoundaries,
294 bool Removing = false);
295
296 void runSchedStages();
297
298 std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
299
300public:
302 std::unique_ptr<MachineSchedStrategy> S);
303
304 void schedule() override;
305
306 void finalizeSchedule() override;
307};
308
309// GCNSchedStrategy applies multiple scheduling stages to a function.
311protected:
313
315
317
319
321
323
324 // The current block being scheduled.
326
327 // Current region index.
328 unsigned RegionIdx = 0;
329
330 // Record the original order of instructions before scheduling.
331 std::vector<MachineInstr *> Unsched;
332
333 // RP before scheduling the current region.
335
336 // RP after scheduling the current region.
338
339 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
340
342
343public:
344 // Initialize state for a scheduling stage. Returns false if the current stage
345 // should be skipped.
346 virtual bool initGCNSchedStage();
347
348 // Finalize state after finishing a scheduling pass on the function.
349 virtual void finalizeGCNSchedStage();
350
351 // Setup for scheduling a region. Returns false if the current region should
352 // be skipped.
353 virtual bool initGCNRegion();
354
355 // Track whether a new region is also a new MBB.
356 void setupNewBlock();
357
358 // Finalize state after scheudling a region.
359 void finalizeGCNRegion();
360
361 // Check result of scheduling.
362 void checkScheduling();
363
364 // computes the given schedule virtual execution time in clocks
365 ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule);
367 unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
368 DenseMap<unsigned, unsigned> &ReadyCycles,
369 const TargetSchedModel &SM);
370
371 // Returns true if scheduling should be reverted.
372 virtual bool shouldRevertScheduling(unsigned WavesAfter);
373
374 // Returns true if current region has known excess pressure.
375 bool isRegionWithExcessRP() const {
376 return DAG.RegionsWithExcessRP[RegionIdx];
377 }
378
379 // The region number this stage is currently working on
380 unsigned getRegionIdx() { return RegionIdx; }
381
382 // Returns true if the new schedule may result in more spilling.
383 bool mayCauseSpilling(unsigned WavesAfter);
384
385 // Attempt to revert scheduling for this region.
386 void revertScheduling();
387
389
390 virtual ~GCNSchedStage() = default;
391};
392
394public:
395 bool shouldRevertScheduling(unsigned WavesAfter) override;
396
399};
400
402private:
403 // Save the initial occupancy before starting this stage.
404 unsigned InitialOccupancy;
405
406public:
407 bool initGCNSchedStage() override;
408
409 void finalizeGCNSchedStage() override;
410
411 bool initGCNRegion() override;
412
413 bool shouldRevertScheduling(unsigned WavesAfter) override;
414
417};
418
419// Retry function scheduling if we found resulting occupancy and it is
420// lower than used for other scheduling passes. This will give more freedom
421// to schedule low register pressure blocks.
423public:
424 bool initGCNSchedStage() override;
425
426 bool initGCNRegion() override;
427
428 bool shouldRevertScheduling(unsigned WavesAfter) override;
429
432};
433
435private:
436 // Each region at MinOccupancy will have their own list of trivially
437 // rematerializable instructions we can remat to reduce RP. The list maps an
438 // instruction to the position we should remat before, usually the MI using
439 // the rematerializable instruction.
441 RematerializableInsts;
442
443 // Map a trivially rematerializable def to a list of regions at MinOccupancy
444 // that has the defined reg as a live-in.
446
447 // Collect all trivially rematerializable VGPR instructions with a single def
448 // and single use outside the defining block into RematerializableInsts.
449 void collectRematerializableInstructions();
450
451 bool isTriviallyReMaterializable(const MachineInstr &MI);
452
453 // TODO: Should also attempt to reduce RP of SGPRs and AGPRs
454 // Attempt to reduce RP of VGPR by sinking trivially rematerializable
455 // instructions. Returns true if we were able to sink instruction(s).
456 bool sinkTriviallyRematInsts(const GCNSubtarget &ST,
457 const TargetInstrInfo *TII);
458
459public:
460 bool initGCNSchedStage() override;
461
462 bool initGCNRegion() override;
463
464 bool shouldRevertScheduling(unsigned WavesAfter) override;
465
468};
469
471public:
472 bool shouldRevertScheduling(unsigned WavesAfter) override;
473
476};
477
479public:
480 bool shouldRevertScheduling(unsigned WavesAfter) override;
481
485};
486
488private:
489 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
490
491 bool HasIGLPInstrs = false;
492
493public:
494 void schedule() override;
495
496 void finalizeSchedule() override;
497
499 std::unique_ptr<MachineSchedStrategy> S,
500 bool RemoveKillFlags);
501};
502
503} // End namespace llvm
504
505#endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
MachineBasicBlock & MBB
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements a map that provides insertion order iteration.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
bool shouldRevertScheduling(unsigned WavesAfter) override
ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
iterator end()
Definition: DenseMap.h:84
The goal of this scheduling strategy is to maximize ILP for a single wave (i.e.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
The goal of this scheduling strategy is to maximize memory clause for a single wave.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
virtual bool initGCNRegion()
GCNSchedStrategy & S
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
MachineFunction & MF
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual ~GCNSchedStage()=default
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
MachineBasicBlock * CurrentMBB
const GCNSubtarget & ST
This is a minimal scheduler strategy.
const unsigned HighRPSGPRBias
GCNDownwardRPTracker DownwardTracker
SmallVector< GCNSchedStageID, 4 > SchedStages
SUnit * pickNodeBidirectional(bool &IsTopNode)
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool IsBottomUp)
std::vector< unsigned > MaxPressure
GCNSchedStageID getCurrentStage()
MachineFunction * MF
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
GCNDownwardRPTracker * getDownwardTracker()
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
GCNUpwardRPTracker UpwardTracker
const unsigned HighRPVGPRBias
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
void setTargetOccupancy(unsigned Occ)
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
GCNUpwardRPTracker * getUpwardTracker()
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
ScheduleDAGMILive * DAG
bool shouldRevertScheduling(unsigned WavesAfter) override
ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
MachineInstrBundleIterator< MachineInstr > iterator
Representation of each machine instruction.
Definition: MachineInstr.h:69
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
bool shouldRevertScheduling(unsigned WavesAfter) override
MemoryClauseInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
bool initGCNSchedStage() override
Track the current register pressure at some position in the instruction stream, and remember the high...
GCNRPTracker::LiveRegSet & getLiveRegsForRegionIdx(unsigned RegionIdx)
RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
Each Scheduling boundary is associated with ready queues.
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
unsigned getBubbles() const
ScheduleMetrics(unsigned L, unsigned BC)
unsigned getLength() const
static const unsigned ScaleFactor
unsigned getMetric() const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
typename SuperClass::iterator iterator
Definition: SmallVector.h:577
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
TargetInstrInfo - Interface to description of machine instruction set.
Provide an instruction scheduling machine model to CodeGen passes.
UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:303
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...