LLVM  10.0.0svn
SIMachineScheduler.h
Go to the documentation of this file.
1 //===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// SI Machine Scheduler interface
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
16 
17 #include "SIInstrInfo.h"
22 #include <cassert>
23 #include <cstdint>
24 #include <map>
25 #include <memory>
26 #include <set>
27 #include <vector>
28 
29 namespace llvm {
30 
38 };
39 
41  // The reason for this candidate.
43 
44  // Set of reasons that apply to multiple candidates.
46 
47  SISchedulerCandidate() = default;
48 
49  bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
50  void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
51 };
52 
53 class SIScheduleDAGMI;
55 
59 };
60 
62  SIScheduleDAGMI *DAG;
64 
65  std::vector<SUnit*> SUnits;
66  std::map<unsigned, unsigned> NodeNum2Index;
67  std::vector<SUnit*> TopReadySUs;
68  std::vector<SUnit*> ScheduledSUnits;
69 
70  /// The top of the unscheduled zone.
71  IntervalPressure TopPressure;
72  RegPressureTracker TopRPTracker;
73 
74  // Pressure: number of said class of registers needed to
75  // store the live virtual and real registers.
76  // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
77  // Pressure of additional registers required inside the block.
78  std::vector<unsigned> InternalAdditionnalPressure;
79  // Pressure of input and output registers
80  std::vector<unsigned> LiveInPressure;
81  std::vector<unsigned> LiveOutPressure;
82  // Registers required by the block, and outputs.
83  // We do track only virtual registers.
84  // Note that some registers are not 32 bits,
85  // and thus the pressure is not equal
86  // to the number of live registers.
87  std::set<unsigned> LiveInRegs;
88  std::set<unsigned> LiveOutRegs;
89 
90  bool Scheduled = false;
91  bool HighLatencyBlock = false;
92 
93  std::vector<unsigned> HasLowLatencyNonWaitedParent;
94 
95  // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
96  unsigned ID;
97 
98  std::vector<SIScheduleBlock*> Preds; // All blocks predecessors.
99  // All blocks successors, and the kind of link
100  std::vector<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> Succs;
101  unsigned NumHighLatencySuccessors = 0;
102 
103 public:
105  unsigned ID):
106  DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {}
107 
108  ~SIScheduleBlock() = default;
109 
110  unsigned getID() const { return ID; }
111 
112  /// Functions for Block construction.
113  void addUnit(SUnit *SU);
114 
115  // When all SUs have been added.
116  void finalizeUnits();
117 
118  // Add block pred, which has instruction predecessor of SU.
119  void addPred(SIScheduleBlock *Pred);
120  void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind);
121 
122  const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
124  getSuccs() const { return Succs; }
125 
126  unsigned Height; // Maximum topdown path length to block without outputs
127  unsigned Depth; // Maximum bottomup path length to block without inputs
128 
129  unsigned getNumHighLatencySuccessors() const {
130  return NumHighLatencySuccessors;
131  }
132 
133  bool isHighLatencyBlock() { return HighLatencyBlock; }
134 
135  // This is approximative.
136  // Ideally should take into accounts some instructions (rcp, etc)
137  // are 4 times slower.
138  int getCost() { return SUnits.size(); }
139 
140  // The block Predecessors and Successors must be all registered
141  // before fastSchedule().
142  // Fast schedule with no particular requirement.
143  void fastSchedule();
144 
145  std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
146 
147  // Complete schedule that will try to minimize reg pressure and
148  // low latencies, and will fill liveins and liveouts.
149  // Needs all MIs to be grouped between BeginBlock and EndBlock.
150  // The MIs can be moved after the scheduling,
151  // it is just used to allow correct track of live registers.
152  void schedule(MachineBasicBlock::iterator BeginBlock,
153  MachineBasicBlock::iterator EndBlock);
154 
155  bool isScheduled() { return Scheduled; }
156 
157  // Needs the block to be scheduled inside
158  // TODO: find a way to compute it.
159  std::vector<unsigned> &getInternalAdditionnalRegUsage() {
160  return InternalAdditionnalPressure;
161  }
162 
163  std::set<unsigned> &getInRegs() { return LiveInRegs; }
164  std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
165 
166  void printDebug(bool Full);
167 
168 private:
169  struct SISchedCandidate : SISchedulerCandidate {
170  // The best SUnit candidate.
171  SUnit *SU = nullptr;
172 
173  unsigned SGPRUsage;
174  unsigned VGPRUsage;
175  bool IsLowLatency;
176  unsigned LowLatencyOffset;
177  bool HasLowLatencyNonWaitedParent;
178 
179  SISchedCandidate() = default;
180 
181  bool isValid() const { return SU; }
182 
183  // Copy the status of another candidate without changing policy.
184  void setBest(SISchedCandidate &Best) {
185  assert(Best.Reason != NoCand && "uninitialized Sched candidate");
186  SU = Best.SU;
187  Reason = Best.Reason;
188  SGPRUsage = Best.SGPRUsage;
189  VGPRUsage = Best.VGPRUsage;
190  IsLowLatency = Best.IsLowLatency;
191  LowLatencyOffset = Best.LowLatencyOffset;
192  HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
193  }
194  };
195 
196  void undoSchedule();
197 
198  void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
199  void releaseSucc(SUnit *SU, SDep *SuccEdge);
200  // InOrOutBlock: restrict to links pointing inside the block (true),
201  // or restrict to links pointing outside the block (false).
202  void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
203 
204  void nodeScheduled(SUnit *SU);
205  void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
206  void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
207  SUnit* pickNode();
208  void traceCandidate(const SISchedCandidate &Cand);
209  void initRegPressure(MachineBasicBlock::iterator BeginBlock,
210  MachineBasicBlock::iterator EndBlock);
211 };
212 
214  std::vector<SIScheduleBlock*> Blocks;
215  std::vector<int> TopDownIndex2Block;
216  std::vector<int> TopDownBlock2Index;
217 };
218 
223 };
224 
226  SIScheduleDAGMI *DAG;
227  // unique_ptr handles freeing memory for us.
228  std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
230  SIScheduleBlocks> Blocks;
231  std::vector<SIScheduleBlock*> CurrentBlocks;
232  std::vector<int> Node2CurrentBlock;
233 
234  // Topological sort
235  // Maps topological index to the node number.
236  std::vector<int> TopDownIndex2Block;
237  std::vector<int> TopDownBlock2Index;
238  std::vector<int> BottomUpIndex2Block;
239 
240  // 0 -> Color not given.
241  // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
242  // Above -> Other groups.
243  int NextReservedID;
244  int NextNonReservedID;
245  std::vector<int> CurrentColoring;
246  std::vector<int> CurrentTopDownReservedDependencyColoring;
247  std::vector<int> CurrentBottomUpReservedDependencyColoring;
248 
249 public:
252 
254  getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
255 
256  bool isSUInBlock(SUnit *SU, unsigned ID);
257 
258 private:
259  // Give a Reserved color to every high latency.
260  void colorHighLatenciesAlone();
261 
262  // Create groups of high latencies with a Reserved color.
263  void colorHighLatenciesGroups();
264 
265  // Compute coloring for topdown and bottom traversals with
266  // different colors depending on dependencies on Reserved colors.
267  void colorComputeReservedDependencies();
268 
269  // Give color to all non-colored SUs according to Reserved groups dependencies.
270  void colorAccordingToReservedDependencies();
271 
272  // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
273  // The new colors are computed according to the dependencies on the other blocks
274  // formed with colorAccordingToReservedDependencies.
275  void colorEndsAccordingToDependencies();
276 
277  // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
278  void colorForceConsecutiveOrderInGroup();
279 
280  // Merge Constant loads that have all their users into another group to the group.
281  // (TODO: else if all their users depend on the same group, put them there)
282  void colorMergeConstantLoadsNextGroup();
283 
284  // Merge SUs that have all their users into another group to the group
285  void colorMergeIfPossibleNextGroup();
286 
287  // Merge SUs that have all their users into another group to the group,
288  // but only for Reserved groups.
289  void colorMergeIfPossibleNextGroupOnlyForReserved();
290 
291  // Merge SUs that have all their users into another group to the group,
292  // but only if the group is no more than a few SUs.
293  void colorMergeIfPossibleSmallGroupsToNextGroup();
294 
295  // Divides Blocks with important size.
296  // Idea of implementation: attribute new colors depending on topdown and
297  // bottom up links to other blocks.
298  void cutHugeBlocks();
299 
300  // Put in one group all instructions with no users in this scheduling region
301  // (we'd want these groups be at the end).
302  void regroupNoUserInstructions();
303 
304  // Give Reserved color to export instructions
305  void colorExports();
306 
307  void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
308 
309  void topologicalSort();
310 
311  void scheduleInsideBlocks();
312 
313  void fillStats();
314 };
315 
320 };
321 
323  SIScheduleDAGMI *DAG;
325  std::vector<SIScheduleBlock*> Blocks;
326 
327  std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
328  std::set<unsigned> LiveRegs;
329  // Num of schedulable unscheduled blocks reading the register.
330  std::map<unsigned, unsigned> LiveRegsConsumers;
331 
332  std::vector<unsigned> LastPosHighLatencyParentScheduled;
333  int LastPosWaitedHighLatency;
334 
335  std::vector<SIScheduleBlock*> BlocksScheduled;
336  unsigned NumBlockScheduled;
337  std::vector<SIScheduleBlock*> ReadyBlocks;
338 
339  unsigned VregCurrentUsage;
340  unsigned SregCurrentUsage;
341 
342  // Currently is only approximation.
343  unsigned maxVregUsage;
344  unsigned maxSregUsage;
345 
346  std::vector<unsigned> BlockNumPredsLeft;
347  std::vector<unsigned> BlockNumSuccsLeft;
348 
349 public:
352  SIScheduleBlocks BlocksStruct);
353  ~SIScheduleBlockScheduler() = default;
354 
355  std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; }
356 
357  unsigned getVGPRUsage() { return maxVregUsage; }
358  unsigned getSGPRUsage() { return maxSregUsage; }
359 
360 private:
361  struct SIBlockSchedCandidate : SISchedulerCandidate {
362  // The best Block candidate.
363  SIScheduleBlock *Block = nullptr;
364 
365  bool IsHighLatency;
366  int VGPRUsageDiff;
367  unsigned NumSuccessors;
368  unsigned NumHighLatencySuccessors;
369  unsigned LastPosHighLatParentScheduled;
370  unsigned Height;
371 
372  SIBlockSchedCandidate() = default;
373 
374  bool isValid() const { return Block; }
375 
376  // Copy the status of another candidate without changing policy.
377  void setBest(SIBlockSchedCandidate &Best) {
378  assert(Best.Reason != NoCand && "uninitialized Sched candidate");
379  Block = Best.Block;
380  Reason = Best.Reason;
381  IsHighLatency = Best.IsHighLatency;
382  VGPRUsageDiff = Best.VGPRUsageDiff;
383  NumSuccessors = Best.NumSuccessors;
384  NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
385  LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
386  Height = Best.Height;
387  }
388  };
389 
390  bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
391  SIBlockSchedCandidate &TryCand);
392  bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
393  SIBlockSchedCandidate &TryCand);
394  SIScheduleBlock *pickBlock();
395 
396  void addLiveRegs(std::set<unsigned> &Regs);
397  void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
398  void releaseBlockSuccs(SIScheduleBlock *Parent);
399  void blockScheduled(SIScheduleBlock *Block);
400 
401  // Check register pressure change
402  // by scheduling a block with these LiveIn and LiveOut.
403  std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
404  std::set<unsigned> &OutRegs);
405 
406  void schedule();
407 };
408 
410  std::vector<unsigned> SUs;
411  unsigned MaxSGPRUsage;
412  unsigned MaxVGPRUsage;
413 };
414 
415 class SIScheduler {
416  SIScheduleDAGMI *DAG;
417  SIScheduleBlockCreator BlockCreator;
418 
419 public:
420  SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {}
421 
422  ~SIScheduler() = default;
423 
424  struct SIScheduleBlockResult
425  scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
426  SISchedulerBlockSchedulerVariant ScheduleVariant);
427 };
428 
429 class SIScheduleDAGMI final : public ScheduleDAGMILive {
430  const SIInstrInfo *SITII;
431  const SIRegisterInfo *SITRI;
432 
433  std::vector<SUnit> SUnitsLinksBackup;
434 
435  // For moveLowLatencies. After all Scheduling variants are tested.
436  std::vector<unsigned> ScheduledSUnits;
437  std::vector<unsigned> ScheduledSUnitsInv;
438 
439  unsigned VGPRSetID;
440  unsigned SGPRSetID;
441 
442 public:
444 
445  ~SIScheduleDAGMI() override;
446 
447  // Entry point for the schedule.
448  void schedule() override;
449 
450  // To init Block's RPTracker.
452  RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false);
453  }
454 
455  MachineBasicBlock *getBB() { return BB; }
457  MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; }
458  LiveIntervals *getLIS() { return LIS; }
460  const TargetRegisterInfo *getTRI() { return TRI; }
461  ScheduleDAGTopologicalSort *GetTopo() { return &Topo; }
462  SUnit& getEntrySU() { return EntrySU; }
463  SUnit& getExitSU() { return ExitSU; }
464 
465  void restoreSULinksLeft();
466 
467  template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
468  _Iterator End,
469  unsigned &VgprUsage,
470  unsigned &SgprUsage);
471 
472  std::set<unsigned> getInRegs() {
473  std::set<unsigned> InRegs;
474  for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
475  InRegs.insert(RegMaskPair.RegUnit);
476  }
477  return InRegs;
478  }
479 
480  std::set<unsigned> getOutRegs() {
481  std::set<unsigned> OutRegs;
482  for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
483  OutRegs.insert(RegMaskPair.RegUnit);
484  }
485  return OutRegs;
486  };
487 
488  unsigned getVGPRSetID() const { return VGPRSetID; }
489  unsigned getSGPRSetID() const { return SGPRSetID; }
490 
491 private:
492  void topologicalSort();
493  // After scheduling is done, improve low latency placements.
494  void moveLowLatencies();
495 
496 public:
497  // Some stats for scheduling inside blocks.
498  std::vector<unsigned> IsLowLatencySU;
499  std::vector<unsigned> LowLatencyOffset;
500  std::vector<unsigned> IsHighLatencySU;
501  // Topological sort
502  // Maps topological index to the node number.
503  std::vector<int> TopDownIndex2SU;
504  std::vector<int> BottomUpIndex2SU;
505 };
506 
507 } // end namespace llvm
508 
509 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
uint64_t CallInst * C
SIScheduleBlockLinkKind
std::vector< SIScheduleBlock * > Blocks
This class represents lattice values for constants.
Definition: AllocatorList.h:23
SIScheduleCandReason Reason
std::vector< unsigned > & getInternalAdditionnalRegUsage()
std::vector< unsigned > IsLowLatencySU
std::vector< SIScheduleBlock * > getBlocks()
unsigned getSGPRSetID() const
unsigned const TargetRegisterInfo * TRI
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
std::vector< unsigned > LowLatencyOffset
ArrayRef< std::pair< SIScheduleBlock *, SIScheduleBlockLinkKind > > getSuccs() const
std::set< unsigned > getOutRegs()
std::set< unsigned > getInRegs()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
bool isRepeat(SIScheduleCandReason R)
std::vector< int > TopDownIndex2Block
Scheduling dependency.
Definition: ScheduleDAG.h:49
unsigned const MachineRegisterInfo * MRI
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx...
std::vector< int > TopDownIndex2SU
std::vector< SUnit * > getScheduledUnits()
MachineRegisterInfo * getMRI()
MachineBasicBlock * getBB()
Track the current register pressure at some position in the instruction stream, and remember the high...
LiveIntervals * getLIS()
const TargetRegisterInfo * getTRI()
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
MachineBasicBlock::iterator getCurrentBottom()
std::set< unsigned > & getOutRegs()
SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC, unsigned ID)
std::vector< unsigned > IsHighLatencySU
std::vector< unsigned > SUs
SISchedulerBlockCreatorVariant
MachineBasicBlock::iterator getCurrentTop()
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
ScheduleDAGTopologicalSort * GetTopo()
Interface definition for SIInstrInfo.
std::vector< int > BottomUpIndex2SU
SIScheduler(SIScheduleDAGMI *DAG)
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
void initRPTracker(RegPressureTracker &RPTracker)
std::set< unsigned > & getInRegs()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getNumHighLatencySuccessors() const
std::vector< int > TopDownBlock2Index
const std::vector< SIScheduleBlock * > & getPreds() const
void setRepeat(SIScheduleCandReason R)
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:689
unsigned getVGPRSetID() const
SISchedulerBlockSchedulerVariant
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242