Line data Source code
1 : //===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : /// \file
11 : /// SI Machine Scheduler interface
12 : //
13 : //===----------------------------------------------------------------------===//
14 :
15 : #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
16 : #define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
17 :
18 : #include "SIInstrInfo.h"
19 : #include "llvm/CodeGen/MachineBasicBlock.h"
20 : #include "llvm/CodeGen/MachineScheduler.h"
21 : #include "llvm/CodeGen/RegisterPressure.h"
22 : #include "llvm/CodeGen/ScheduleDAG.h"
23 : #include <cassert>
24 : #include <cstdint>
25 : #include <map>
26 : #include <memory>
27 : #include <set>
28 : #include <vector>
29 :
30 : namespace llvm {
31 :
32 : enum SIScheduleCandReason {
33 : NoCand,
34 : RegUsage,
35 : Latency,
36 : Successor,
37 : Depth,
38 : NodeOrder
39 : };
40 :
41 : struct SISchedulerCandidate {
42 : // The reason for this candidate.
43 : SIScheduleCandReason Reason = NoCand;
44 :
45 : // Set of reasons that apply to multiple candidates.
46 : uint32_t RepeatReasonSet = 0;
47 :
48 25 : SISchedulerCandidate() = default;
49 :
50 : bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
51 15 : void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
52 : };
53 :
54 : class SIScheduleDAGMI;
55 : class SIScheduleBlockCreator;
56 :
57 : enum SIScheduleBlockLinkKind {
58 : NoData,
59 : Data
60 : };
61 :
62 : class SIScheduleBlock {
63 : SIScheduleDAGMI *DAG;
64 : SIScheduleBlockCreator *BC;
65 :
66 : std::vector<SUnit*> SUnits;
67 : std::map<unsigned, unsigned> NodeNum2Index;
68 : std::vector<SUnit*> TopReadySUs;
69 : std::vector<SUnit*> ScheduledSUnits;
70 :
71 : /// The top of the unscheduled zone.
72 : IntervalPressure TopPressure;
73 : RegPressureTracker TopRPTracker;
74 :
75 : // Pressure: number of said class of registers needed to
76 : // store the live virtual and real registers.
77 : // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
78 : // Pressure of additional registers required inside the block.
79 : std::vector<unsigned> InternalAdditionnalPressure;
80 : // Pressure of input and output registers
81 : std::vector<unsigned> LiveInPressure;
82 : std::vector<unsigned> LiveOutPressure;
83 : // Registers required by the block, and outputs.
84 : // We do track only virtual registers.
85 : // Note that some registers are not 32 bits,
86 : // and thus the pressure is not equal
87 : // to the number of live registers.
88 : std::set<unsigned> LiveInRegs;
89 : std::set<unsigned> LiveOutRegs;
90 :
91 : bool Scheduled = false;
92 : bool HighLatencyBlock = false;
93 :
94 : std::vector<unsigned> HasLowLatencyNonWaitedParent;
95 :
96 : // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
97 : unsigned ID;
98 :
99 : std::vector<SIScheduleBlock*> Preds; // All blocks predecessors.
100 : // All blocks successors, and the kind of link
101 : std::vector<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> Succs;
102 : unsigned NumHighLatencySuccessors = 0;
103 :
104 : public:
105 5 : SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
106 5 : unsigned ID):
107 20 : DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {}
108 :
109 10 : ~SIScheduleBlock() = default;
110 :
111 0 : unsigned getID() const { return ID; }
112 :
113 : /// Functions for Block construction.
114 : void addUnit(SUnit *SU);
115 :
116 : // When all SUs have been added.
117 : void finalizeUnits();
118 :
119 : // Add block pred, which has instruction predecessor of SU.
120 : void addPred(SIScheduleBlock *Pred);
121 : void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind);
122 :
123 : const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
124 : ArrayRef<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>>
125 : getSuccs() const { return Succs; }
126 :
127 : unsigned Height; // Maximum topdown path length to block without outputs
128 : unsigned Depth; // Maximum bottomup path length to block without inputs
129 :
130 0 : unsigned getNumHighLatencySuccessors() const {
131 0 : return NumHighLatencySuccessors;
132 : }
133 :
134 0 : bool isHighLatencyBlock() { return HighLatencyBlock; }
135 :
136 : // This is approximative.
137 : // Ideally should take into accounts some instructions (rcp, etc)
138 : // are 4 times slower.
139 8 : int getCost() { return SUnits.size(); }
140 :
141 : // The block Predecessors and Successors must be all registered
142 : // before fastSchedule().
143 : // Fast schedule with no particular requirement.
144 : void fastSchedule();
145 :
146 15 : std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
147 :
148 : // Complete schedule that will try to minimize reg pressure and
149 : // low latencies, and will fill liveins and liveouts.
150 : // Needs all MIs to be grouped between BeginBlock and EndBlock.
151 : // The MIs can be moved after the scheduling,
152 : // it is just used to allow correct track of live registers.
153 : void schedule(MachineBasicBlock::iterator BeginBlock,
154 : MachineBasicBlock::iterator EndBlock);
155 :
156 : bool isScheduled() { return Scheduled; }
157 :
158 : // Needs the block to be scheduled inside
159 : // TODO: find a way to compute it.
160 : std::vector<unsigned> &getInternalAdditionnalRegUsage() {
161 : return InternalAdditionnalPressure;
162 : }
163 :
164 5 : std::set<unsigned> &getInRegs() { return LiveInRegs; }
165 10 : std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
166 :
167 : void printDebug(bool Full);
168 :
169 : private:
170 : struct SISchedCandidate : SISchedulerCandidate {
171 : // The best SUnit candidate.
172 : SUnit *SU = nullptr;
173 :
174 : unsigned SGPRUsage;
175 : unsigned VGPRUsage;
176 : bool IsLowLatency;
177 : unsigned LowLatencyOffset;
178 : bool HasLowLatencyNonWaitedParent;
179 :
180 16 : SISchedCandidate() = default;
181 :
182 0 : bool isValid() const { return SU; }
183 :
184 : // Copy the status of another candidate without changing policy.
185 : void setBest(SISchedCandidate &Best) {
186 : assert(Best.Reason != NoCand && "uninitialized Sched candidate");
187 19 : SU = Best.SU;
188 19 : Reason = Best.Reason;
189 19 : SGPRUsage = Best.SGPRUsage;
190 19 : VGPRUsage = Best.VGPRUsage;
191 19 : IsLowLatency = Best.IsLowLatency;
192 19 : LowLatencyOffset = Best.LowLatencyOffset;
193 19 : HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
194 : }
195 : };
196 :
197 : void undoSchedule();
198 :
199 : void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
200 : void releaseSucc(SUnit *SU, SDep *SuccEdge);
201 : // InOrOutBlock: restrict to links pointing inside the block (true),
202 : // or restrict to links pointing outside the block (false).
203 : void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
204 :
205 : void nodeScheduled(SUnit *SU);
206 : void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
207 : void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
208 : SUnit* pickNode();
209 : void traceCandidate(const SISchedCandidate &Cand);
210 : void initRegPressure(MachineBasicBlock::iterator BeginBlock,
211 : MachineBasicBlock::iterator EndBlock);
212 : };
213 :
214 : struct SIScheduleBlocks {
215 : std::vector<SIScheduleBlock*> Blocks;
216 : std::vector<int> TopDownIndex2Block;
217 : std::vector<int> TopDownBlock2Index;
218 : };
219 :
220 : enum SISchedulerBlockCreatorVariant {
221 : LatenciesAlone,
222 : LatenciesGrouped,
223 : LatenciesAlonePlusConsecutive
224 : };
225 :
226 4 : class SIScheduleBlockCreator {
227 : SIScheduleDAGMI *DAG;
228 : // unique_ptr handles freeing memory for us.
229 : std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
230 : std::map<SISchedulerBlockCreatorVariant,
231 : SIScheduleBlocks> Blocks;
232 : std::vector<SIScheduleBlock*> CurrentBlocks;
233 : std::vector<int> Node2CurrentBlock;
234 :
235 : // Topological sort
236 : // Maps topological index to the node number.
237 : std::vector<int> TopDownIndex2Block;
238 : std::vector<int> TopDownBlock2Index;
239 : std::vector<int> BottomUpIndex2Block;
240 :
241 : // 0 -> Color not given.
242 : // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
243 : // Above -> Other groups.
244 : int NextReservedID;
245 : int NextNonReservedID;
246 : std::vector<int> CurrentColoring;
247 : std::vector<int> CurrentTopDownReservedDependencyColoring;
248 : std::vector<int> CurrentBottomUpReservedDependencyColoring;
249 :
250 : public:
251 : SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
252 : ~SIScheduleBlockCreator();
253 :
254 : SIScheduleBlocks
255 : getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
256 :
257 : bool isSUInBlock(SUnit *SU, unsigned ID);
258 :
259 : private:
260 : // Give a Reserved color to every high latency.
261 : void colorHighLatenciesAlone();
262 :
263 : // Create groups of high latencies with a Reserved color.
264 : void colorHighLatenciesGroups();
265 :
266 : // Compute coloring for topdown and bottom traversals with
267 : // different colors depending on dependencies on Reserved colors.
268 : void colorComputeReservedDependencies();
269 :
270 : // Give color to all non-colored SUs according to Reserved groups dependencies.
271 : void colorAccordingToReservedDependencies();
272 :
273 : // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
274 : // The new colors are computed according to the dependencies on the other blocks
275 : // formed with colorAccordingToReservedDependencies.
276 : void colorEndsAccordingToDependencies();
277 :
278 : // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
279 : void colorForceConsecutiveOrderInGroup();
280 :
281 : // Merge Constant loads that have all their users into another group to the group.
282 : // (TODO: else if all their users depend on the same group, put them there)
283 : void colorMergeConstantLoadsNextGroup();
284 :
285 : // Merge SUs that have all their users into another group to the group
286 : void colorMergeIfPossibleNextGroup();
287 :
288 : // Merge SUs that have all their users into another group to the group,
289 : // but only for Reserved groups.
290 : void colorMergeIfPossibleNextGroupOnlyForReserved();
291 :
292 : // Merge SUs that have all their users into another group to the group,
293 : // but only if the group is no more than a few SUs.
294 : void colorMergeIfPossibleSmallGroupsToNextGroup();
295 :
296 : // Divides Blocks with important size.
297 : // Idea of implementation: attribute new colors depending on topdown and
298 : // bottom up links to other blocks.
299 : void cutHugeBlocks();
300 :
301 : // Put in one group all instructions with no users in this scheduling region
302 : // (we'd want these groups be at the end).
303 : void regroupNoUserInstructions();
304 :
305 : // Give Reserved color to export instructions
306 : void colorExports();
307 :
308 : void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
309 :
310 : void topologicalSort();
311 :
312 : void scheduleInsideBlocks();
313 :
314 : void fillStats();
315 : };
316 :
317 : enum SISchedulerBlockSchedulerVariant {
318 : BlockLatencyRegUsage,
319 : BlockRegUsageLatency,
320 : BlockRegUsage
321 : };
322 :
323 : class SIScheduleBlockScheduler {
324 : SIScheduleDAGMI *DAG;
325 : SISchedulerBlockSchedulerVariant Variant;
326 : std::vector<SIScheduleBlock*> Blocks;
327 :
328 : std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
329 : std::set<unsigned> LiveRegs;
330 : // Num of schedulable unscheduled blocks reading the register.
331 : std::map<unsigned, unsigned> LiveRegsConsumers;
332 :
333 : std::vector<unsigned> LastPosHighLatencyParentScheduled;
334 : int LastPosWaitedHighLatency;
335 :
336 : std::vector<SIScheduleBlock*> BlocksScheduled;
337 : unsigned NumBlockScheduled;
338 : std::vector<SIScheduleBlock*> ReadyBlocks;
339 :
340 : unsigned VregCurrentUsage;
341 : unsigned SregCurrentUsage;
342 :
343 : // Currently is only approximation.
344 : unsigned maxVregUsage;
345 : unsigned maxSregUsage;
346 :
347 : std::vector<unsigned> BlockNumPredsLeft;
348 : std::vector<unsigned> BlockNumSuccsLeft;
349 :
350 : public:
351 : SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
352 : SISchedulerBlockSchedulerVariant Variant,
353 : SIScheduleBlocks BlocksStruct);
354 4 : ~SIScheduleBlockScheduler() = default;
355 :
356 2 : std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; }
357 :
358 0 : unsigned getVGPRUsage() { return maxVregUsage; }
359 0 : unsigned getSGPRUsage() { return maxSregUsage; }
360 :
361 : private:
362 : struct SIBlockSchedCandidate : SISchedulerCandidate {
363 : // The best Block candidate.
364 : SIScheduleBlock *Block = nullptr;
365 :
366 : bool IsHighLatency;
367 : int VGPRUsageDiff;
368 : unsigned NumSuccessors;
369 : unsigned NumHighLatencySuccessors;
370 : unsigned LastPosHighLatParentScheduled;
371 : unsigned Height;
372 :
373 12 : SIBlockSchedCandidate() = default;
374 :
375 0 : bool isValid() const { return Block; }
376 :
377 : // Copy the status of another candidate without changing policy.
378 : void setBest(SIBlockSchedCandidate &Best) {
379 : assert(Best.Reason != NoCand && "uninitialized Sched candidate");
380 5 : Block = Best.Block;
381 5 : Reason = Best.Reason;
382 5 : IsHighLatency = Best.IsHighLatency;
383 5 : VGPRUsageDiff = Best.VGPRUsageDiff;
384 5 : NumSuccessors = Best.NumSuccessors;
385 5 : NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
386 5 : LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
387 5 : Height = Best.Height;
388 : }
389 : };
390 :
391 : bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
392 : SIBlockSchedCandidate &TryCand);
393 : bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
394 : SIBlockSchedCandidate &TryCand);
395 : SIScheduleBlock *pickBlock();
396 :
397 : void addLiveRegs(std::set<unsigned> &Regs);
398 : void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
399 : void releaseBlockSuccs(SIScheduleBlock *Parent);
400 : void blockScheduled(SIScheduleBlock *Block);
401 :
402 : // Check register pressure change
403 : // by scheduling a block with these LiveIn and LiveOut.
404 : std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
405 : std::set<unsigned> &OutRegs);
406 :
407 : void schedule();
408 : };
409 :
410 6 : struct SIScheduleBlockResult {
411 : std::vector<unsigned> SUs;
412 : unsigned MaxSGPRUsage;
413 : unsigned MaxVGPRUsage;
414 : };
415 :
416 : class SIScheduler {
417 : SIScheduleDAGMI *DAG;
418 : SIScheduleBlockCreator BlockCreator;
419 :
420 : public:
421 2 : SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {}
422 :
423 2 : ~SIScheduler() = default;
424 :
425 : struct SIScheduleBlockResult
426 : scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
427 : SISchedulerBlockSchedulerVariant ScheduleVariant);
428 : };
429 :
430 3 : class SIScheduleDAGMI final : public ScheduleDAGMILive {
431 : const SIInstrInfo *SITII;
432 : const SIRegisterInfo *SITRI;
433 :
434 : std::vector<SUnit> SUnitsLinksBackup;
435 :
436 : // For moveLowLatencies. After all Scheduling variants are tested.
437 : std::vector<unsigned> ScheduledSUnits;
438 : std::vector<unsigned> ScheduledSUnitsInv;
439 :
440 : unsigned VGPRSetID;
441 : unsigned SGPRSetID;
442 :
443 : public:
444 : SIScheduleDAGMI(MachineSchedContext *C);
445 :
446 : ~SIScheduleDAGMI() override;
447 :
448 : // Entry point for the schedule.
449 : void schedule() override;
450 :
451 : // To init Block's RPTracker.
452 15 : void initRPTracker(RegPressureTracker &RPTracker) {
453 15 : RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false);
454 15 : }
455 :
456 0 : MachineBasicBlock *getBB() { return BB; }
457 0 : MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; }
458 0 : MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; }
459 0 : LiveIntervals *getLIS() { return LIS; }
460 0 : MachineRegisterInfo *getMRI() { return &MRI; }
461 0 : const TargetRegisterInfo *getTRI() { return TRI; }
462 0 : ScheduleDAGTopologicalSort *GetTopo() { return &Topo; }
463 : SUnit& getEntrySU() { return EntrySU; }
464 : SUnit& getExitSU() { return ExitSU; }
465 :
466 : void restoreSULinksLeft();
467 :
468 : template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
469 : _Iterator End,
470 : unsigned &VgprUsage,
471 : unsigned &SgprUsage);
472 :
473 2 : std::set<unsigned> getInRegs() {
474 : std::set<unsigned> InRegs;
475 13 : for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
476 11 : InRegs.insert(RegMaskPair.RegUnit);
477 : }
478 2 : return InRegs;
479 : }
480 :
481 2 : std::set<unsigned> getOutRegs() {
482 : std::set<unsigned> OutRegs;
483 5 : for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
484 3 : OutRegs.insert(RegMaskPair.RegUnit);
485 : }
486 2 : return OutRegs;
487 : };
488 :
489 0 : unsigned getVGPRSetID() const { return VGPRSetID; }
490 0 : unsigned getSGPRSetID() const { return SGPRSetID; }
491 :
492 : private:
493 : void topologicalSort();
494 : // After scheduling is done, improve low latency placements.
495 : void moveLowLatencies();
496 :
497 : public:
498 : // Some stats for scheduling inside blocks.
499 : std::vector<unsigned> IsLowLatencySU;
500 : std::vector<unsigned> LowLatencyOffset;
501 : std::vector<unsigned> IsHighLatencySU;
502 : // Topological sort
503 : // Maps topological index to the node number.
504 : std::vector<int> TopDownIndex2SU;
505 : std::vector<int> BottomUpIndex2SU;
506 : };
507 :
508 : } // end namespace llvm
509 :
510 : #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
|