LLVM  4.0.0
SIMachineScheduler.h
Go to the documentation of this file.
1 //===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief SI Machine Scheduler interface
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
16 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
17 
18 #include "SIInstrInfo.h"
23 #include <cassert>
24 #include <cstdint>
25 #include <map>
26 #include <memory>
27 #include <set>
28 #include <vector>
29 
30 namespace llvm {
31 
39 };
40 
42  // The reason for this candidate.
44 
45  // Set of reasons that apply to multiple candidates.
47 
49  : Reason(NoCand), RepeatReasonSet(0) {}
50 
51  bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
53 };
54 
55 class SIScheduleDAGMI;
56 class SIScheduleBlockCreator;
57 
59  SIScheduleDAGMI *DAG;
61 
62  std::vector<SUnit*> SUnits;
63  std::map<unsigned, unsigned> NodeNum2Index;
64  std::vector<SUnit*> TopReadySUs;
65  std::vector<SUnit*> ScheduledSUnits;
66 
67  /// The top of the unscheduled zone.
68  IntervalPressure TopPressure;
69  RegPressureTracker TopRPTracker;
70 
71  // Pressure: number of said class of registers needed to
72  // store the live virtual and real registers.
73  // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
74  // Pressure of additional registers required inside the block.
75  std::vector<unsigned> InternalAdditionnalPressure;
76  // Pressure of input and output registers
77  std::vector<unsigned> LiveInPressure;
78  std::vector<unsigned> LiveOutPressure;
79  // Registers required by the block, and outputs.
80  // We do track only virtual registers.
81  // Note that some registers are not 32 bits,
82  // and thus the pressure is not equal
83  // to the number of live registers.
84  std::set<unsigned> LiveInRegs;
85  std::set<unsigned> LiveOutRegs;
86 
87  bool Scheduled;
88  bool HighLatencyBlock;
89 
90  std::vector<unsigned> HasLowLatencyNonWaitedParent;
91 
92  // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
93  unsigned ID;
94 
95  std::vector<SIScheduleBlock*> Preds; // All blocks predecessors.
96  std::vector<SIScheduleBlock*> Succs; // All blocks successors.
97  unsigned NumHighLatencySuccessors;
98 
99 public:
101  unsigned ID):
102  DAG(DAG), BC(BC), TopRPTracker(TopPressure), Scheduled(false),
103  HighLatencyBlock(false), ID(ID), NumHighLatencySuccessors(0) {}
104 
105  ~SIScheduleBlock() = default;
106 
107  unsigned getID() const { return ID; }
108 
109  /// Functions for Block construction.
110  void addUnit(SUnit *SU);
111 
112  // When all SUs have been added.
113  void finalizeUnits();
114 
115  // Add block pred, which has instruction predecessor of SU.
116  void addPred(SIScheduleBlock *Pred);
117  void addSucc(SIScheduleBlock *Succ);
118 
119  const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
120  const std::vector<SIScheduleBlock*>& getSuccs() const { return Succs; }
121 
122  unsigned Height; // Maximum topdown path length to block without outputs
123  unsigned Depth; // Maximum bottomup path length to block without inputs
124 
125  unsigned getNumHighLatencySuccessors() const {
126  return NumHighLatencySuccessors;
127  }
128 
129  bool isHighLatencyBlock() { return HighLatencyBlock; }
130 
131  // This is approximative.
132  // Ideally should take into accounts some instructions (rcp, etc)
133  // are 4 times slower.
134  int getCost() { return SUnits.size(); }
135 
136  // The block Predecessors and Successors must be all registered
137  // before fastSchedule().
138  // Fast schedule with no particular requirement.
139  void fastSchedule();
140 
141  std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
142 
143  // Complete schedule that will try to minimize reg pressure and
144  // low latencies, and will fill liveins and liveouts.
145  // Needs all MIs to be grouped between BeginBlock and EndBlock.
146  // The MIs can be moved after the scheduling,
147  // it is just used to allow correct track of live registers.
148  void schedule(MachineBasicBlock::iterator BeginBlock,
149  MachineBasicBlock::iterator EndBlock);
150 
151  bool isScheduled() { return Scheduled; }
152 
153  // Needs the block to be scheduled inside
154  // TODO: find a way to compute it.
155  std::vector<unsigned> &getInternalAdditionnalRegUsage() {
156  return InternalAdditionnalPressure;
157  }
158 
159  std::set<unsigned> &getInRegs() { return LiveInRegs; }
160  std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
161 
162  void printDebug(bool Full);
163 
164 private:
165  struct SISchedCandidate : SISchedulerCandidate {
166  // The best SUnit candidate.
167  SUnit *SU = nullptr;
168 
169  unsigned SGPRUsage;
170  unsigned VGPRUsage;
171  bool IsLowLatency;
172  unsigned LowLatencyOffset;
173  bool HasLowLatencyNonWaitedParent;
174 
175  SISchedCandidate() = default;
176 
177  bool isValid() const { return SU; }
178 
179  // Copy the status of another candidate without changing policy.
180  void setBest(SISchedCandidate &Best) {
181  assert(Best.Reason != NoCand && "uninitialized Sched candidate");
182  SU = Best.SU;
183  Reason = Best.Reason;
184  SGPRUsage = Best.SGPRUsage;
185  VGPRUsage = Best.VGPRUsage;
186  IsLowLatency = Best.IsLowLatency;
187  LowLatencyOffset = Best.LowLatencyOffset;
188  HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
189  }
190  };
191 
192  void undoSchedule();
193 
194  void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
195  void releaseSucc(SUnit *SU, SDep *SuccEdge);
196  // InOrOutBlock: restrict to links pointing inside the block (true),
197  // or restrict to links pointing outside the block (false).
198  void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
199 
200  void nodeScheduled(SUnit *SU);
201  void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
202  void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
203  SUnit* pickNode();
204  void traceCandidate(const SISchedCandidate &Cand);
205  void initRegPressure(MachineBasicBlock::iterator BeginBlock,
206  MachineBasicBlock::iterator EndBlock);
207 };
208 
210  std::vector<SIScheduleBlock*> Blocks;
211  std::vector<int> TopDownIndex2Block;
212  std::vector<int> TopDownBlock2Index;
213 };
214 
219 };
220 
222  SIScheduleDAGMI *DAG;
223  // unique_ptr handles freeing memory for us.
224  std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
226  SIScheduleBlocks> Blocks;
227  std::vector<SIScheduleBlock*> CurrentBlocks;
228  std::vector<int> Node2CurrentBlock;
229 
230  // Topological sort
231  // Maps topological index to the node number.
232  std::vector<int> TopDownIndex2Block;
233  std::vector<int> TopDownBlock2Index;
234  std::vector<int> BottomUpIndex2Block;
235 
236  // 0 -> Color not given.
237  // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
238  // Above -> Other groups.
239  int NextReservedID;
240  int NextNonReservedID;
241  std::vector<int> CurrentColoring;
242  std::vector<int> CurrentTopDownReservedDependencyColoring;
243  std::vector<int> CurrentBottomUpReservedDependencyColoring;
244 
245 public:
248 
251 
252  bool isSUInBlock(SUnit *SU, unsigned ID);
253 
254 private:
255  // Give a Reserved color to every high latency.
256  void colorHighLatenciesAlone();
257 
258  // Create groups of high latencies with a Reserved color.
259  void colorHighLatenciesGroups();
260 
261  // Compute coloring for topdown and bottom traversals with
262  // different colors depending on dependencies on Reserved colors.
263  void colorComputeReservedDependencies();
264 
265  // Give color to all non-colored SUs according to Reserved groups dependencies.
266  void colorAccordingToReservedDependencies();
267 
268  // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
269  // The new colors are computed according to the dependencies on the other blocks
270  // formed with colorAccordingToReservedDependencies.
271  void colorEndsAccordingToDependencies();
272 
273  // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
274  void colorForceConsecutiveOrderInGroup();
275 
276  // Merge Constant loads that have all their users into another group to the group.
277  // (TODO: else if all their users depend on the same group, put them there)
278  void colorMergeConstantLoadsNextGroup();
279 
280  // Merge SUs that have all their users into another group to the group
281  void colorMergeIfPossibleNextGroup();
282 
283  // Merge SUs that have all their users into another group to the group,
284  // but only for Reserved groups.
285  void colorMergeIfPossibleNextGroupOnlyForReserved();
286 
287  // Merge SUs that have all their users into another group to the group,
288  // but only if the group is no more than a few SUs.
289  void colorMergeIfPossibleSmallGroupsToNextGroup();
290 
291  // Divides Blocks with important size.
292  // Idea of implementation: attribute new colors depending on topdown and
293  // bottom up links to other blocks.
294  void cutHugeBlocks();
295 
296  // Put in one group all instructions with no users in this scheduling region
297  // (we'd want these groups be at the end).
298  void regroupNoUserInstructions();
299 
300  void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
301 
302  void topologicalSort();
303 
304  void scheduleInsideBlocks();
305 
306  void fillStats();
307 };
308 
313 };
314 
316  SIScheduleDAGMI *DAG;
318  std::vector<SIScheduleBlock*> Blocks;
319 
320  std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
321  std::set<unsigned> LiveRegs;
322  // Num of schedulable unscheduled blocks reading the register.
323  std::map<unsigned, unsigned> LiveRegsConsumers;
324 
325  std::vector<unsigned> LastPosHighLatencyParentScheduled;
326  int LastPosWaitedHighLatency;
327 
328  std::vector<SIScheduleBlock*> BlocksScheduled;
329  unsigned NumBlockScheduled;
330  std::vector<SIScheduleBlock*> ReadyBlocks;
331 
332  unsigned VregCurrentUsage;
333  unsigned SregCurrentUsage;
334 
335  // Currently is only approximation.
336  unsigned maxVregUsage;
337  unsigned maxSregUsage;
338 
339  std::vector<unsigned> BlockNumPredsLeft;
340  std::vector<unsigned> BlockNumSuccsLeft;
341 
342 public:
345  SIScheduleBlocks BlocksStruct);
346  ~SIScheduleBlockScheduler() = default;
347 
348  std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; }
349 
350  unsigned getVGPRUsage() { return maxVregUsage; }
351  unsigned getSGPRUsage() { return maxSregUsage; }
352 
353 private:
354  struct SIBlockSchedCandidate : SISchedulerCandidate {
355  // The best Block candidate.
356  SIScheduleBlock *Block = nullptr;
357 
358  bool IsHighLatency;
359  int VGPRUsageDiff;
360  unsigned NumSuccessors;
361  unsigned NumHighLatencySuccessors;
362  unsigned LastPosHighLatParentScheduled;
363  unsigned Height;
364 
365  SIBlockSchedCandidate() = default;
366 
367  bool isValid() const { return Block; }
368 
369  // Copy the status of another candidate without changing policy.
370  void setBest(SIBlockSchedCandidate &Best) {
371  assert(Best.Reason != NoCand && "uninitialized Sched candidate");
372  Block = Best.Block;
373  Reason = Best.Reason;
374  IsHighLatency = Best.IsHighLatency;
375  VGPRUsageDiff = Best.VGPRUsageDiff;
376  NumSuccessors = Best.NumSuccessors;
377  NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
378  LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
379  Height = Best.Height;
380  }
381  };
382 
383  bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
384  SIBlockSchedCandidate &TryCand);
385  bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
386  SIBlockSchedCandidate &TryCand);
387  SIScheduleBlock *pickBlock();
388 
389  void addLiveRegs(std::set<unsigned> &Regs);
390  void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
391  void releaseBlockSuccs(SIScheduleBlock *Parent);
392  void blockScheduled(SIScheduleBlock *Block);
393 
394  // Check register pressure change
395  // by scheduling a block with these LiveIn and LiveOut.
396  std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
397  std::set<unsigned> &OutRegs);
398 
399  void schedule();
400 };
401 
403  std::vector<unsigned> SUs;
404  unsigned MaxSGPRUsage;
405  unsigned MaxVGPRUsage;
406 };
407 
408 class SIScheduler {
409  SIScheduleDAGMI *DAG;
410  SIScheduleBlockCreator BlockCreator;
411 
412 public:
413  SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {}
414 
415  ~SIScheduler() = default;
416 
417  struct SIScheduleBlockResult
419  SISchedulerBlockSchedulerVariant ScheduleVariant);
420 };
421 
422 class SIScheduleDAGMI final : public ScheduleDAGMILive {
423  const SIInstrInfo *SITII;
424  const SIRegisterInfo *SITRI;
425 
426  std::vector<SUnit> SUnitsLinksBackup;
427 
428  // For moveLowLatencies. After all Scheduling variants are tested.
429  std::vector<unsigned> ScheduledSUnits;
430  std::vector<unsigned> ScheduledSUnitsInv;
431 
432  unsigned VGPRSetID;
433  unsigned SGPRSetID;
434 
435 public:
437 
438  ~SIScheduleDAGMI() override;
439 
440  // Entry point for the schedule.
441  void schedule() override;
442 
443  // To init Block's RPTracker.
445  RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false);
446  }
447 
448  MachineBasicBlock *getBB() { return BB; }
451  LiveIntervals *getLIS() { return LIS; }
453  const TargetRegisterInfo *getTRI() { return TRI; }
454  SUnit& getEntrySU() { return EntrySU; }
455  SUnit& getExitSU() { return ExitSU; }
456 
457  void restoreSULinksLeft();
458 
459  template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
460  _Iterator End,
461  unsigned &VgprUsage,
462  unsigned &SgprUsage);
463 
464  std::set<unsigned> getInRegs() {
465  std::set<unsigned> InRegs;
466  for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
467  InRegs.insert(RegMaskPair.RegUnit);
468  }
469  return InRegs;
470  }
471 
472  unsigned getVGPRSetID() const { return VGPRSetID; }
473  unsigned getSGPRSetID() const { return SGPRSetID; }
474 
475 private:
476  void topologicalSort();
477  // After scheduling is done, improve low latency placements.
478  void moveLowLatencies();
479 
480 public:
481  // Some stats for scheduling inside blocks.
482  std::vector<unsigned> IsLowLatencySU;
483  std::vector<unsigned> LowLatencyOffset;
484  std::vector<unsigned> IsHighLatencySU;
485  // Topological sort
486  // Maps topological index to the node number.
487  std::vector<int> TopDownIndex2SU;
488  std::vector<int> BottomUpIndex2SU;
489 };
490 
491 } // end namespace llvm
492 
493 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
std::vector< SIScheduleBlock * > Blocks
SIScheduleDAGMI(MachineSchedContext *C)
unsigned getNumHighLatencySuccessors() const
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
SIScheduleCandReason Reason
std::vector< unsigned > & getInternalAdditionnalRegUsage()
std::vector< unsigned > IsLowLatencySU
void addUnit(SUnit *SU)
Functions for Block construction.
std::vector< SIScheduleBlock * > getBlocks()
MachineInstrBundleIterator< MachineInstr > iterator
RegisterClassInfo * RegClassInfo
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
MachineFunction & MF
Definition: ScheduleDAG.h:581
std::vector< unsigned > LowLatencyOffset
~SIScheduleBlock()=default
std::set< unsigned > getInRegs()
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
const RegList & Regs
Function Alias Analysis false
SmallVector< RegisterMaskPair, 8 > LiveInRegs
List of live in virtual registers or physical register units.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
bool isRepeat(SIScheduleCandReason R)
unsigned getSGPRSetID() const
std::vector< int > TopDownIndex2Block
struct SIScheduleBlockResult scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant, SISchedulerBlockSchedulerVariant ScheduleVariant)
~SIScheduleDAGMI() override
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx...
bool isSUInBlock(SUnit *SU, unsigned ID)
std::vector< int > TopDownIndex2SU
std::vector< SUnit * > getScheduledUnits()
SIScheduleBlockScheduler(SIScheduleDAGMI *DAG, SISchedulerBlockSchedulerVariant Variant, SIScheduleBlocks BlocksStruct)
~SIScheduler()=default
MachineRegisterInfo * getMRI()
MachineBasicBlock * getBB()
static const unsigned End
Track the current register pressure at some position in the instruction stream, and remember the high...
LiveIntervals * getLIS()
const TargetRegisterInfo * getTRI()
SIScheduleBlocks getBlocks(SISchedulerBlockCreatorVariant BlockVariant)
unsigned getID() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
MachineBasicBlock::iterator getCurrentBottom()
void schedule(MachineBasicBlock::iterator BeginBlock, MachineBasicBlock::iterator EndBlock)
std::set< unsigned > & getOutRegs()
SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC, unsigned ID)
void addSucc(SIScheduleBlock *Succ)
std::vector< unsigned > IsHighLatencySU
std::vector< unsigned > SUs
SISchedulerBlockCreatorVariant
INITIALIZE_PASS(HexagonGenMux,"hexagon-mux","Hexagon generate mux instructions", false, false) void HexagonGenMux I isValid()
SIScheduleBlockCreator(SIScheduleDAGMI *DAG)
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
MachineBasicBlock::iterator getCurrentTop()
void fillVgprSgprCost(_Iterator First, _Iterator End, unsigned &VgprUsage, unsigned &SgprUsage)
const std::vector< SIScheduleBlock * > & getSuccs() const
unsigned getVGPRSetID() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
LiveIntervals * LIS
Interface definition for SIInstrInfo.
void addPred(SIScheduleBlock *Pred)
const TargetRegisterInfo * TRI
Definition: ScheduleDAG.h:580
std::vector< int > BottomUpIndex2SU
SIScheduler(SIScheduleDAGMI *DAG)
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
void initRPTracker(RegPressureTracker &RPTracker)
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
std::set< unsigned > & getInRegs()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::vector< int > TopDownBlock2Index
void setRepeat(SIScheduleCandReason R)
const std::vector< SIScheduleBlock * > & getPreds() const
MachineBasicBlock * BB
State specific to the current scheduling region.
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
MachineRegisterInfo & MRI
Definition: ScheduleDAG.h:582
RegPressureTracker RPTracker
SISchedulerBlockSchedulerVariant
SUnit - Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:244