LLVM 19.0.0git
GCNSchedStrategy.h
Go to the documentation of this file.
1//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
14#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
15
16#include "GCNRegPressure.h"
17#include "llvm/ADT/MapVector.h"
19
20namespace llvm {
21
22class SIMachineFunctionInfo;
23class SIRegisterInfo;
24class GCNSubtarget;
25class GCNSchedStage;
26
27enum class GCNSchedStageID : unsigned {
33};
34
35#ifndef NDEBUG
36raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
37#endif
38
39/// This is a minimal scheduler strategy. The main difference between this
40/// and the GenericScheduler is that GCNSchedStrategy uses different
41/// heuristics to determine excess/critical pressure sets.
43protected:
44 SUnit *pickNodeBidirectional(bool &IsTopNode);
45
46 void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
47 const RegPressureTracker &RPTracker,
48 SchedCandidate &Cand);
49
50 void initCandidate(SchedCandidate &Cand, SUnit *SU,
51 bool AtTop, const RegPressureTracker &RPTracker,
52 const SIRegisterInfo *SRI,
53 unsigned SGPRPressure, unsigned VGPRPressure);
54
55 std::vector<unsigned> Pressure;
56
57 std::vector<unsigned> MaxPressure;
58
60
62
64
66
67 // Scheduling stages for this strategy.
69
70 // Pointer to the current SchedStageID.
72
73public:
74 // schedule() have seen register pressure over the critical limits and had to
75 // track register pressure for actual scheduling heuristics.
77
78 // Schedule known to have excess register pressure. Be more conservative in
79 // increasing ILP and preserving VGPRs.
80 bool KnownExcessRP = false;
81
82 // An error margin is necessary because of poor performance of the generic RP
83 // tracker and can be adjusted up for tuning heuristics to try and more
84 // aggressively reduce register pressure.
85 unsigned ErrorMargin = 3;
86
87 // Bias for SGPR limits under a high register pressure.
88 const unsigned HighRPSGPRBias = 7;
89
90 // Bias for VGPR limits under a high register pressure.
91 const unsigned HighRPVGPRBias = 7;
92
94
96
97 unsigned SGPRLimitBias = 0;
98
99 unsigned VGPRLimitBias = 0;
100
102
103 SUnit *pickNode(bool &IsTopNode) override;
104
105 void initialize(ScheduleDAGMI *DAG) override;
106
107 unsigned getTargetOccupancy() { return TargetOccupancy; }
108
109 void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
110
112
113 // Advances stage. Returns true if there are remaining stages.
114 bool advanceStage();
115
116 bool hasNextStage() const;
117
119};
120
121/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
122/// maximum number of waves per simd).
124public:
126};
127
128/// The goal of this scheduling strategy is to maximize ILP for a single wave
129/// (i.e. latency hiding).
131protected:
132 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
133 SchedBoundary *Zone) const override;
134
135public:
137};
138
140 unsigned ScheduleLength;
141 unsigned BubbleCycles;
142
143public:
145 ScheduleMetrics(unsigned L, unsigned BC)
146 : ScheduleLength(L), BubbleCycles(BC) {}
147 unsigned getLength() const { return ScheduleLength; }
148 unsigned getBubbles() const { return BubbleCycles; }
149 unsigned getMetric() const {
150 unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength;
151 // Metric is zero if the amount of bubbles is less than 1% which is too
152 // small. So, return 1.
153 return Metric ? Metric : 1;
154 }
155 static const unsigned ScaleFactor;
156};
157
159 dbgs() << "\n Schedule Metric (scaled by "
161 << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
162 << Sm.getLength() << " ]\n";
163 return OS;
164}
165
167 friend class GCNSchedStage;
171 friend class PreRARematStage;
173
174 const GCNSubtarget &ST;
175
177
178 // Occupancy target at the beginning of function scheduling cycle.
179 unsigned StartingOccupancy;
180
181 // Minimal real occupancy recorder for the function.
182 unsigned MinOccupancy;
183
184 // Vector of regions recorder for later rescheduling
186 MachineBasicBlock::iterator>, 32> Regions;
187
188 // Records if a region is not yet scheduled, or schedule has been reverted,
189 // or we generally desire to reschedule it.
190 BitVector RescheduleRegions;
191
192 // Record regions with high register pressure.
193 BitVector RegionsWithHighRP;
194
195 // Record regions with excess register pressure over the physical register
196 // limit. Register pressure in these regions usually will result in spilling.
197 BitVector RegionsWithExcessRP;
198
199 // Regions that has the same occupancy as the latest MinOccupancy
200 BitVector RegionsWithMinOcc;
201
202 // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
203 BitVector RegionsWithIGLPInstrs;
204
205 // Region live-in cache.
207
208 // Region pressure cache.
210
211 // Temporary basic block live-in cache.
213
215
217
218 // Return current region pressure.
219 GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
220
221 // Compute and cache live-ins and pressure for all regions in block.
222 void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
223
224 // Update region boundaries when removing MI or inserting NewMI before MI.
225 void updateRegionBoundaries(
227 MachineBasicBlock::iterator>> &RegionBoundaries,
229 bool Removing = false);
230
231 void runSchedStages();
232
233 std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
234
235public:
237 std::unique_ptr<MachineSchedStrategy> S);
238
239 void schedule() override;
240
241 void finalizeSchedule() override;
242};
243
244// GCNSchedStrategy applies multiple scheduling stages to a function.
246protected:
248
250
252
254
256
258
259 // The current block being scheduled.
261
262 // Current region index.
263 unsigned RegionIdx = 0;
264
265 // Record the original order of instructions before scheduling.
266 std::vector<MachineInstr *> Unsched;
267
268 // RP before scheduling the current region.
270
271 // RP after scheduling the current region.
273
274 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
275
277
278public:
279 // Initialize state for a scheduling stage. Returns false if the current stage
280 // should be skipped.
281 virtual bool initGCNSchedStage();
282
283 // Finalize state after finishing a scheduling pass on the function.
284 virtual void finalizeGCNSchedStage();
285
286 // Setup for scheduling a region. Returns false if the current region should
287 // be skipped.
288 virtual bool initGCNRegion();
289
290 // Track whether a new region is also a new MBB.
291 void setupNewBlock();
292
293 // Finalize state after scheudling a region.
294 void finalizeGCNRegion();
295
296 // Check result of scheduling.
297 void checkScheduling();
298
299 // computes the given schedule virtual execution time in clocks
300 ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule);
302 unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
303 DenseMap<unsigned, unsigned> &ReadyCycles,
304 const TargetSchedModel &SM);
305
306 // Returns true if scheduling should be reverted.
307 virtual bool shouldRevertScheduling(unsigned WavesAfter);
308
309 // Returns true if current region has known excess pressure.
310 bool isRegionWithExcessRP() const {
311 return DAG.RegionsWithExcessRP[RegionIdx];
312 }
313
314 // Returns true if the new schedule may result in more spilling.
315 bool mayCauseSpilling(unsigned WavesAfter);
316
317 // Attempt to revert scheduling for this region.
318 void revertScheduling();
319
321
322 virtual ~GCNSchedStage() = default;
323};
324
326public:
327 bool shouldRevertScheduling(unsigned WavesAfter) override;
328
331};
332
334private:
335 // Save the initial occupancy before starting this stage.
336 unsigned InitialOccupancy;
337
338public:
339 bool initGCNSchedStage() override;
340
341 void finalizeGCNSchedStage() override;
342
343 bool initGCNRegion() override;
344
345 bool shouldRevertScheduling(unsigned WavesAfter) override;
346
349};
350
351// Retry function scheduling if we found resulting occupancy and it is
352// lower than used for other scheduling passes. This will give more freedom
353// to schedule low register pressure blocks.
355public:
356 bool initGCNSchedStage() override;
357
358 bool initGCNRegion() override;
359
360 bool shouldRevertScheduling(unsigned WavesAfter) override;
361
364};
365
367private:
368 // Each region at MinOccupancy will have their own list of trivially
369 // rematerializable instructions we can remat to reduce RP. The list maps an
370 // instruction to the position we should remat before, usually the MI using
371 // the rematerializable instruction.
373 RematerializableInsts;
374
375 // Map a trivially rematerializable def to a list of regions at MinOccupancy
376 // that has the defined reg as a live-in.
378
379 // Collect all trivially rematerializable VGPR instructions with a single def
380 // and single use outside the defining block into RematerializableInsts.
381 void collectRematerializableInstructions();
382
383 bool isTriviallyReMaterializable(const MachineInstr &MI);
384
385 // TODO: Should also attempt to reduce RP of SGPRs and AGPRs
386 // Attempt to reduce RP of VGPR by sinking trivially rematerializable
387 // instructions. Returns true if we were able to sink instruction(s).
388 bool sinkTriviallyRematInsts(const GCNSubtarget &ST,
389 const TargetInstrInfo *TII);
390
391public:
392 bool initGCNSchedStage() override;
393
394 bool initGCNRegion() override;
395
396 bool shouldRevertScheduling(unsigned WavesAfter) override;
397
400};
401
403public:
404 bool shouldRevertScheduling(unsigned WavesAfter) override;
405
408};
409
411private:
412 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
413
414 bool HasIGLPInstrs = false;
415
416public:
417 void schedule() override;
418
419 void finalizeSchedule() override;
420
422 std::unique_ptr<MachineSchedStrategy> S,
423 bool RemoveKillFlags);
424};
425
426} // End namespace llvm
427
428#endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
MachineBasicBlock & MBB
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements a map that provides insertion order iteration.
raw_pwrite_stream & OS
bool shouldRevertScheduling(unsigned WavesAfter) override
ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
The goal of this scheduling strategy is to maximize ILP for a single wave (i.e.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
virtual bool initGCNRegion()
GCNSchedStrategy & S
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
MachineFunction & MF
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual ~GCNSchedStage()=default
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
MachineBasicBlock * CurrentMBB
const GCNSubtarget & ST
This is a minimal scheduler strategy.
const unsigned HighRPSGPRBias
SmallVector< GCNSchedStageID, 4 > SchedStages
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand)
SUnit * pickNodeBidirectional(bool &IsTopNode)
std::vector< unsigned > MaxPressure
GCNSchedStageID getCurrentStage()
MachineFunction * MF
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure)
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
const unsigned HighRPVGPRBias
void setTargetOccupancy(unsigned Occ)
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
ScheduleDAGMILive * DAG
bool shouldRevertScheduling(unsigned WavesAfter) override
ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
MachineInstrBundleIterator< MachineInstr > iterator
Representation of each machine instruction.
Definition: MachineInstr.h:69
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
bool shouldRevertScheduling(unsigned WavesAfter) override
OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
bool initGCNSchedStage() override
Track the current register pressure at some position in the instruction stream, and remember the high...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
Each Scheduling boundary is associated with ready queues.
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
unsigned getBubbles() const
ScheduleMetrics(unsigned L, unsigned BC)
unsigned getLength() const
static const unsigned ScaleFactor
unsigned getMetric() const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
typename SuperClass::iterator iterator
Definition: SmallVector.h:590
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
TargetInstrInfo - Interface to description of machine instruction set.
Provide an instruction scheduling machine model to CodeGen passes.
UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...