LLVM 23.0.0git
SIMachineScheduler.h
Go to the documentation of this file.
1//===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// SI Machine Scheduler interface
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
15#define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
16
20#include <cstdint>
21#include <set>
22#include <vector>
23
24namespace llvm {
25
26class SIInstrInfo;
27class SIRegisterInfo;
28class SIScheduleDAGMI;
30
39
41 // The reason for this candidate.
43
44 // Set of reasons that apply to multiple candidates.
46
48
49 bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
51};
52
57
59 SIScheduleDAGMI *DAG;
61
62 std::vector<SUnit*> SUnits;
63 std::map<unsigned, unsigned> NodeNum2Index;
64 std::vector<SUnit*> TopReadySUs;
65 std::vector<SUnit*> ScheduledSUnits;
66
67 /// The top of the unscheduled zone.
68 IntervalPressure TopPressure;
69 RegPressureTracker TopRPTracker;
70
71 // Pressure: number of said class of registers needed to
72 // store the live virtual and real registers.
73 // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
74 // Pressure of additional registers required inside the block.
75 std::vector<unsigned> InternalAdditionalPressure;
76 // Pressure of input and output registers
77 std::vector<unsigned> LiveInPressure;
78 std::vector<unsigned> LiveOutPressure;
79 // Registers required by the block, and outputs.
80 // We do track only virtual registers.
81 // Note that some registers are not 32 bits,
82 // and thus the pressure is not equal
83 // to the number of live registers.
84 std::set<Register> LiveInRegs;
85 std::set<Register> LiveOutRegs;
86
87 bool Scheduled = false;
88 bool HighLatencyBlock = false;
89
90 std::vector<unsigned> HasLowLatencyNonWaitedParent;
91
92 // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
93 unsigned ID;
94
95 std::vector<SIScheduleBlock*> Preds; // All blocks predecessors.
96 // All blocks successors, and the kind of link
97 std::vector<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> Succs;
98 unsigned NumHighLatencySuccessors = 0;
99
100public:
102 unsigned ID):
103 DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {}
104
105 ~SIScheduleBlock() = default;
106
107 unsigned getID() const { return ID; }
108
109 /// Functions for Block construction.
110 void addUnit(SUnit *SU);
111
112 // When all SUs have been added.
113 void finalizeUnits();
114
115 // Add block pred, which has instruction predecessor of SU.
116 void addPred(SIScheduleBlock *Pred);
118
119 const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
121 getSuccs() const { return Succs; }
122
123 unsigned Height = 0; // Maximum topdown path length to block without outputs
124 unsigned Depth = 0; // Maximum bottomup path length to block without inputs
125
127 return NumHighLatencySuccessors;
128 }
129
130 bool isHighLatencyBlock() { return HighLatencyBlock; }
131
132 // This is approximative.
133 // Ideally should take into accounts some instructions (rcp, etc)
134 // are 4 times slower.
135 int getCost() { return SUnits.size(); }
136
137 // The block Predecessors and Successors must be all registered
138 // before fastSchedule().
139 // Fast schedule with no particular requirement.
140 void fastSchedule();
141
142 std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
143
144 // Complete schedule that will try to minimize reg pressure and
145 // low latencies, and will fill liveins and liveouts.
146 // Needs all MIs to be grouped between BeginBlock and EndBlock.
147 // The MIs can be moved after the scheduling,
148 // it is just used to allow correct track of live registers.
151
152 bool isScheduled() { return Scheduled; }
153
154 // Needs the block to be scheduled inside
155 // TODO: find a way to compute it.
156 std::vector<unsigned> &getInternalAdditionalRegUsage() {
157 return InternalAdditionalPressure;
158 }
159
160 std::set<Register> &getInRegs() { return LiveInRegs; }
161 std::set<Register> &getOutRegs() { return LiveOutRegs; }
162
163 void printDebug(bool Full);
164
165private:
166 struct SISchedCandidate : SISchedulerCandidate {
167 // The best SUnit candidate.
168 SUnit *SU = nullptr;
169
170 unsigned SGPRUsage;
171 unsigned VGPRUsage;
172 bool IsLowLatency;
173 unsigned LowLatencyOffset;
174 bool HasLowLatencyNonWaitedParent;
175
176 SISchedCandidate() = default;
177
178 bool isValid() const { return SU; }
179
180 // Copy the status of another candidate without changing policy.
181 void setBest(SISchedCandidate &Best) {
182 assert(Best.Reason != NoCand && "uninitialized Sched candidate");
183 SU = Best.SU;
184 Reason = Best.Reason;
185 SGPRUsage = Best.SGPRUsage;
186 VGPRUsage = Best.VGPRUsage;
187 IsLowLatency = Best.IsLowLatency;
188 LowLatencyOffset = Best.LowLatencyOffset;
189 HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
190 }
191 };
192
193 void undoSchedule();
194
195 void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
196 void releaseSucc(SUnit *SU, SDep *SuccEdge);
197 // InOrOutBlock: restrict to links pointing inside the block (true),
198 // or restrict to links pointing outside the block (false).
199 void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
200
201 void nodeScheduled(SUnit *SU);
202 void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
203 SUnit* pickNode();
204 void traceCandidate(const SISchedCandidate &Cand);
205 void initRegPressure(MachineBasicBlock::iterator BeginBlock,
207};
208
210 std::vector<SIScheduleBlock*> Blocks;
211 std::vector<int> TopDownIndex2Block;
212 std::vector<int> TopDownBlock2Index;
213};
214
220
222 SIScheduleDAGMI *DAG;
223 // unique_ptr handles freeing memory for us.
224 std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
226 SIScheduleBlocks> Blocks;
227 std::vector<SIScheduleBlock*> CurrentBlocks;
228 std::vector<int> Node2CurrentBlock;
229
230 // Topological sort
231 // Maps topological index to the node number.
232 std::vector<int> TopDownIndex2Block;
233 std::vector<int> TopDownBlock2Index;
234 std::vector<int> BottomUpIndex2Block;
235
236 // 0 -> Color not given.
237 // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
238 // Above -> Other groups.
239 int NextReservedID;
240 int NextNonReservedID;
241 std::vector<int> CurrentColoring;
242 std::vector<int> CurrentTopDownReservedDependencyColoring;
243 std::vector<int> CurrentBottomUpReservedDependencyColoring;
244
245public:
247
250
251 bool isSUInBlock(SUnit *SU, unsigned ID);
252
253private:
254 // Give a Reserved color to every high latency.
255 void colorHighLatenciesAlone();
256
257 // Create groups of high latencies with a Reserved color.
258 void colorHighLatenciesGroups();
259
260 // Compute coloring for topdown and bottom traversals with
261 // different colors depending on dependencies on Reserved colors.
262 void colorComputeReservedDependencies();
263
264 // Give color to all non-colored SUs according to Reserved groups dependencies.
265 void colorAccordingToReservedDependencies();
266
267 // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
268 // The new colors are computed according to the dependencies on the other blocks
269 // formed with colorAccordingToReservedDependencies.
270 void colorEndsAccordingToDependencies();
271
272 // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
273 void colorForceConsecutiveOrderInGroup();
274
275 // Merge Constant loads that have all their users into another group to the group.
276 // (TODO: else if all their users depend on the same group, put them there)
277 void colorMergeConstantLoadsNextGroup();
278
279 // Merge SUs that have all their users into another group to the group
280 void colorMergeIfPossibleNextGroup();
281
282 // Merge SUs that have all their users into another group to the group,
283 // but only for Reserved groups.
284 void colorMergeIfPossibleNextGroupOnlyForReserved();
285
286 // Merge SUs that have all their users into another group to the group,
287 // but only if the group is no more than a few SUs.
288 void colorMergeIfPossibleSmallGroupsToNextGroup();
289
290 // Divides Blocks with important size.
291 // Idea of implementation: attribute new colors depending on topdown and
292 // bottom up links to other blocks.
293 void cutHugeBlocks();
294
295 // Put in one group all instructions with no users in this scheduling region
296 // (we'd want these groups be at the end).
297 void regroupNoUserInstructions();
298
299 // Give Reserved color to export instructions
300 void colorExports();
301
302 void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
303
304 void topologicalSort();
305
306 void scheduleInsideBlocks();
307
308 void fillStats();
309};
310
316
318 SIScheduleDAGMI *DAG;
320 std::vector<SIScheduleBlock*> Blocks;
321
322 std::vector<std::map<Register, unsigned>> LiveOutRegsNumUsages;
323 std::set<Register> LiveRegs;
324 // Num of schedulable unscheduled blocks reading the register.
325 std::map<Register, unsigned> LiveRegsConsumers;
326
327 std::vector<unsigned> LastPosHighLatencyParentScheduled;
328 int LastPosWaitedHighLatency;
329
330 std::vector<SIScheduleBlock*> BlocksScheduled;
331 unsigned NumBlockScheduled;
332 std::vector<SIScheduleBlock*> ReadyBlocks;
333
334 unsigned VregCurrentUsage;
335 unsigned SregCurrentUsage;
336
337 // Currently is only approximation.
338 unsigned maxVregUsage;
339 unsigned maxSregUsage;
340
341 std::vector<unsigned> BlockNumPredsLeft;
342 std::vector<unsigned> BlockNumSuccsLeft;
343
344public:
347 SIScheduleBlocks BlocksStruct);
349
350 std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; }
351
352 unsigned getVGPRUsage() { return maxVregUsage; }
353 unsigned getSGPRUsage() { return maxSregUsage; }
354
355private:
356 struct SIBlockSchedCandidate : SISchedulerCandidate {
357 // The best Block candidate.
358 SIScheduleBlock *Block = nullptr;
359
360 bool IsHighLatency;
361 int VGPRUsageDiff;
362 unsigned NumSuccessors;
363 unsigned NumHighLatencySuccessors;
364 unsigned LastPosHighLatParentScheduled;
365 unsigned Height;
366
367 SIBlockSchedCandidate() = default;
368
369 bool isValid() const { return Block; }
370
371 // Copy the status of another candidate without changing policy.
372 void setBest(SIBlockSchedCandidate &Best) {
373 assert(Best.Reason != NoCand && "uninitialized Sched candidate");
374 Block = Best.Block;
375 Reason = Best.Reason;
376 IsHighLatency = Best.IsHighLatency;
377 VGPRUsageDiff = Best.VGPRUsageDiff;
378 NumSuccessors = Best.NumSuccessors;
379 NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
380 LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
381 Height = Best.Height;
382 }
383 };
384
385 bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
386 SIBlockSchedCandidate &TryCand);
387 bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
388 SIBlockSchedCandidate &TryCand);
389 SIScheduleBlock *pickBlock();
390
391 void addLiveRegs(std::set<VirtRegOrUnit> &Regs);
392 void decreaseLiveRegs(SIScheduleBlock *Block, std::set<Register> &Regs);
393 void releaseBlockSuccs(SIScheduleBlock *Parent);
394 void blockScheduled(SIScheduleBlock *Block);
395
396 // Check register pressure change
397 // by scheduling a block with these LiveIn and LiveOut.
398 std::vector<int> checkRegUsageImpact(std::set<Register> &InRegs,
399 std::set<Register> &OutRegs);
400
401 void schedule();
402};
403
405 std::vector<unsigned> SUs;
406 unsigned MaxSGPRUsage;
407 unsigned MaxVGPRUsage;
408};
409
411 SIScheduleDAGMI *DAG;
412 SIScheduleBlockCreator BlockCreator;
413
414public:
415 SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {}
416
417 ~SIScheduler() = default;
418
421 SISchedulerBlockSchedulerVariant ScheduleVariant);
422};
423
424class SIScheduleDAGMI final : public ScheduleDAGMILive {
425 const SIInstrInfo *SITII;
426 const SIRegisterInfo *SITRI;
427
428 std::vector<SUnit> SUnitsLinksBackup;
429
430 // For moveLowLatencies. After all Scheduling variants are tested.
431 std::vector<unsigned> ScheduledSUnits;
432 std::vector<unsigned> ScheduledSUnitsInv;
433
434public:
436
438
439 // Entry point for the schedule.
440 void schedule() override;
441
442 // To init Block's RPTracker.
446
450 LiveIntervals *getLIS() { return LIS; }
452 const TargetRegisterInfo *getTRI() { return TRI; }
454 SUnit &getEntrySU() { return EntrySU; }
455 SUnit& getExitSU() { return ExitSU; }
456
457 void restoreSULinksLeft();
458
459 template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
460 _Iterator End,
461 unsigned &VgprUsage,
462 unsigned &SgprUsage);
463
464 std::set<VirtRegOrUnit> getInRegs() {
465 std::set<VirtRegOrUnit> InRegs;
466 for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
467 InRegs.insert(RegMaskPair.VRegOrUnit);
468 }
469 return InRegs;
470 }
471
472 std::set<VirtRegOrUnit> getOutRegs() {
473 std::set<VirtRegOrUnit> OutRegs;
474 for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
475 OutRegs.insert(RegMaskPair.VRegOrUnit);
476 }
477 return OutRegs;
478 };
479
480private:
481 void topologicalSort();
482 // After scheduling is done, improve low latency placements.
483 void moveLowLatencies();
484
485public:
486 // Some stats for scheduling inside blocks.
487 std::vector<unsigned> IsLowLatencySU;
488 std::vector<unsigned> LowLatencyOffset;
489 std::vector<unsigned> IsHighLatencySU;
490 // Topological sort
491 // Maps topological index to the node number.
492 std::vector<int> TopDownIndex2SU;
493 std::vector<int> BottomUpIndex2SU;
494};
495
496} // end namespace llvm
497
498#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
MachineInstrBundleIterator< MachineInstr > iterator
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Track the current register pressure at some position in the instruction stream, and remember the high...
bool isSUInBlock(SUnit *SU, unsigned ID)
SIScheduleBlockCreator(SIScheduleDAGMI *DAG)
SIScheduleBlocks getBlocks(SISchedulerBlockCreatorVariant BlockVariant)
SIScheduleBlockScheduler(SIScheduleDAGMI *DAG, SISchedulerBlockSchedulerVariant Variant, SIScheduleBlocks BlocksStruct)
std::vector< SIScheduleBlock * > getBlocks()
unsigned getNumHighLatencySuccessors() const
SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC, unsigned ID)
ArrayRef< std::pair< SIScheduleBlock *, SIScheduleBlockLinkKind > > getSuccs() const
~SIScheduleBlock()=default
std::vector< unsigned > & getInternalAdditionalRegUsage()
const std::vector< SIScheduleBlock * > & getPreds() const
void addPred(SIScheduleBlock *Pred)
std::set< Register > & getOutRegs()
std::vector< SUnit * > getScheduledUnits()
void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind)
void schedule(MachineBasicBlock::iterator BeginBlock, MachineBasicBlock::iterator EndBlock)
void addUnit(SUnit *SU)
Functions for Block construction.
std::set< Register > & getInRegs()
std::vector< int > BottomUpIndex2SU
std::vector< unsigned > IsHighLatencySU
std::vector< unsigned > LowLatencyOffset
std::vector< int > TopDownIndex2SU
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGTopologicalSort * GetTopo()
void fillVgprSgprCost(_Iterator First, _Iterator End, unsigned &VgprUsage, unsigned &SgprUsage)
MachineRegisterInfo * getMRI()
SIScheduleDAGMI(MachineSchedContext *C)
MachineBasicBlock::iterator getCurrentBottom()
std::vector< unsigned > IsLowLatencySU
MachineBasicBlock::iterator getCurrentTop()
MachineBasicBlock * getBB()
void initRPTracker(RegPressureTracker &RPTracker)
std::set< VirtRegOrUnit > getOutRegs()
~SIScheduleDAGMI() override
const TargetRegisterInfo * getTRI()
std::set< VirtRegOrUnit > getInRegs()
struct SIScheduleBlockResult scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant, SISchedulerBlockSchedulerVariant ScheduleVariant)
~SIScheduler()=default
SIScheduler(SIScheduleDAGMI *DAG)
Scheduling unit. This is a node in the scheduling DAG.
MachineBasicBlock * BB
The block in which to insert instructions.
ScheduleDAGTopologicalSort Topo
Topo - A topological ordering for SUnits which permits fast IsReachable and similar queries.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
RegisterClassInfo * RegClassInfo
RegPressureTracker RPTracker
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetRegisterInfo * TRI
Target processor register info.
SUnit EntrySU
Special node for the region entry.
MachineFunction & MF
Machine function.
SUnit ExitSU
Special node for the region exit.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
SISchedulerBlockSchedulerVariant
SISchedulerBlockCreatorVariant
@ LatenciesAlonePlusConsecutive
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
std::vector< unsigned > SUs
std::vector< int > TopDownIndex2Block
std::vector< SIScheduleBlock * > Blocks
std::vector< int > TopDownBlock2Index
SIScheduleCandReason Reason
bool isRepeat(SIScheduleCandReason R)
void setRepeat(SIScheduleCandReason R)