LLVM  6.0.0svn
SIMachineScheduler.h
Go to the documentation of this file.
1 //===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief SI Machine Scheduler interface
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
16 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
17 
18 #include "SIInstrInfo.h"
23 #include <cassert>
24 #include <cstdint>
25 #include <map>
26 #include <memory>
27 #include <set>
28 #include <vector>
29 
30 namespace llvm {
31 
39 };
40 
42  // The reason for this candidate.
44 
45  // Set of reasons that apply to multiple candidates.
47 
48  SISchedulerCandidate() = default;
49 
50  bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
51  void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
52 };
53 
54 class SIScheduleDAGMI;
56 
60 };
61 
63  SIScheduleDAGMI *DAG;
65 
66  std::vector<SUnit*> SUnits;
67  std::map<unsigned, unsigned> NodeNum2Index;
68  std::vector<SUnit*> TopReadySUs;
69  std::vector<SUnit*> ScheduledSUnits;
70 
71  /// The top of the unscheduled zone.
72  IntervalPressure TopPressure;
73  RegPressureTracker TopRPTracker;
74 
75  // Pressure: number of said class of registers needed to
76  // store the live virtual and real registers.
77  // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
78  // Pressure of additional registers required inside the block.
79  std::vector<unsigned> InternalAdditionnalPressure;
80  // Pressure of input and output registers
81  std::vector<unsigned> LiveInPressure;
82  std::vector<unsigned> LiveOutPressure;
83  // Registers required by the block, and outputs.
84  // We do track only virtual registers.
85  // Note that some registers are not 32 bits,
86  // and thus the pressure is not equal
87  // to the number of live registers.
88  std::set<unsigned> LiveInRegs;
89  std::set<unsigned> LiveOutRegs;
90 
91  bool Scheduled = false;
92  bool HighLatencyBlock = false;
93 
94  std::vector<unsigned> HasLowLatencyNonWaitedParent;
95 
96  // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
97  unsigned ID;
98 
99  std::vector<SIScheduleBlock*> Preds; // All blocks predecessors.
100  // All blocks successors, and the kind of link
101  std::vector<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> Succs;
102  unsigned NumHighLatencySuccessors = 0;
103 
104 public:
106  unsigned ID):
107  DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {}
108 
109  ~SIScheduleBlock() = default;
110 
111  unsigned getID() const { return ID; }
112 
113  /// Functions for Block construction.
114  void addUnit(SUnit *SU);
115 
116  // When all SUs have been added.
117  void finalizeUnits();
118 
119  // Add block pred, which has instruction predecessor of SU.
120  void addPred(SIScheduleBlock *Pred);
121  void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind);
122 
123  const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
125  getSuccs() const { return Succs; }
126 
127  unsigned Height; // Maximum topdown path length to block without outputs
128  unsigned Depth; // Maximum bottomup path length to block without inputs
129 
130  unsigned getNumHighLatencySuccessors() const {
131  return NumHighLatencySuccessors;
132  }
133 
134  bool isHighLatencyBlock() { return HighLatencyBlock; }
135 
136  // This is approximative.
137  // Ideally should take into accounts some instructions (rcp, etc)
138  // are 4 times slower.
139  int getCost() { return SUnits.size(); }
140 
141  // The block Predecessors and Successors must be all registered
142  // before fastSchedule().
143  // Fast schedule with no particular requirement.
144  void fastSchedule();
145 
146  std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
147 
148  // Complete schedule that will try to minimize reg pressure and
149  // low latencies, and will fill liveins and liveouts.
150  // Needs all MIs to be grouped between BeginBlock and EndBlock.
151  // The MIs can be moved after the scheduling,
152  // it is just used to allow correct track of live registers.
153  void schedule(MachineBasicBlock::iterator BeginBlock,
154  MachineBasicBlock::iterator EndBlock);
155 
156  bool isScheduled() { return Scheduled; }
157 
158  // Needs the block to be scheduled inside
159  // TODO: find a way to compute it.
160  std::vector<unsigned> &getInternalAdditionnalRegUsage() {
161  return InternalAdditionnalPressure;
162  }
163 
164  std::set<unsigned> &getInRegs() { return LiveInRegs; }
165  std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
166 
167  void printDebug(bool Full);
168 
169 private:
170  struct SISchedCandidate : SISchedulerCandidate {
171  // The best SUnit candidate.
172  SUnit *SU = nullptr;
173 
174  unsigned SGPRUsage;
175  unsigned VGPRUsage;
176  bool IsLowLatency;
177  unsigned LowLatencyOffset;
178  bool HasLowLatencyNonWaitedParent;
179 
180  SISchedCandidate() = default;
181 
182  bool isValid() const { return SU; }
183 
184  // Copy the status of another candidate without changing policy.
185  void setBest(SISchedCandidate &Best) {
186  assert(Best.Reason != NoCand && "uninitialized Sched candidate");
187  SU = Best.SU;
188  Reason = Best.Reason;
189  SGPRUsage = Best.SGPRUsage;
190  VGPRUsage = Best.VGPRUsage;
191  IsLowLatency = Best.IsLowLatency;
192  LowLatencyOffset = Best.LowLatencyOffset;
193  HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
194  }
195  };
196 
197  void undoSchedule();
198 
199  void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
200  void releaseSucc(SUnit *SU, SDep *SuccEdge);
201  // InOrOutBlock: restrict to links pointing inside the block (true),
202  // or restrict to links pointing outside the block (false).
203  void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
204 
205  void nodeScheduled(SUnit *SU);
206  void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
207  void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
208  SUnit* pickNode();
209  void traceCandidate(const SISchedCandidate &Cand);
210  void initRegPressure(MachineBasicBlock::iterator BeginBlock,
211  MachineBasicBlock::iterator EndBlock);
212 };
213 
215  std::vector<SIScheduleBlock*> Blocks;
216  std::vector<int> TopDownIndex2Block;
217  std::vector<int> TopDownBlock2Index;
218 };
219 
224 };
225 
227  SIScheduleDAGMI *DAG;
228  // unique_ptr handles freeing memory for us.
229  std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
231  SIScheduleBlocks> Blocks;
232  std::vector<SIScheduleBlock*> CurrentBlocks;
233  std::vector<int> Node2CurrentBlock;
234 
235  // Topological sort
236  // Maps topological index to the node number.
237  std::vector<int> TopDownIndex2Block;
238  std::vector<int> TopDownBlock2Index;
239  std::vector<int> BottomUpIndex2Block;
240 
241  // 0 -> Color not given.
242  // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
243  // Above -> Other groups.
244  int NextReservedID;
245  int NextNonReservedID;
246  std::vector<int> CurrentColoring;
247  std::vector<int> CurrentTopDownReservedDependencyColoring;
248  std::vector<int> CurrentBottomUpReservedDependencyColoring;
249 
250 public:
253 
255  getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
256 
257  bool isSUInBlock(SUnit *SU, unsigned ID);
258 
259 private:
260  // Give a Reserved color to every high latency.
261  void colorHighLatenciesAlone();
262 
263  // Create groups of high latencies with a Reserved color.
264  void colorHighLatenciesGroups();
265 
266  // Compute coloring for topdown and bottom traversals with
267  // different colors depending on dependencies on Reserved colors.
268  void colorComputeReservedDependencies();
269 
270  // Give color to all non-colored SUs according to Reserved groups dependencies.
271  void colorAccordingToReservedDependencies();
272 
273  // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
274  // The new colors are computed according to the dependencies on the other blocks
275  // formed with colorAccordingToReservedDependencies.
276  void colorEndsAccordingToDependencies();
277 
278  // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
279  void colorForceConsecutiveOrderInGroup();
280 
281  // Merge Constant loads that have all their users into another group to the group.
282  // (TODO: else if all their users depend on the same group, put them there)
283  void colorMergeConstantLoadsNextGroup();
284 
285  // Merge SUs that have all their users into another group to the group
286  void colorMergeIfPossibleNextGroup();
287 
288  // Merge SUs that have all their users into another group to the group,
289  // but only for Reserved groups.
290  void colorMergeIfPossibleNextGroupOnlyForReserved();
291 
292  // Merge SUs that have all their users into another group to the group,
293  // but only if the group is no more than a few SUs.
294  void colorMergeIfPossibleSmallGroupsToNextGroup();
295 
296  // Divides Blocks with important size.
297  // Idea of implementation: attribute new colors depending on topdown and
298  // bottom up links to other blocks.
299  void cutHugeBlocks();
300 
301  // Put in one group all instructions with no users in this scheduling region
302  // (we'd want these groups be at the end).
303  void regroupNoUserInstructions();
304 
305  // Give Reserved color to export instructions
306  void colorExports();
307 
308  void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
309 
310  void topologicalSort();
311 
312  void scheduleInsideBlocks();
313 
314  void fillStats();
315 };
316 
321 };
322 
324  SIScheduleDAGMI *DAG;
326  std::vector<SIScheduleBlock*> Blocks;
327 
328  std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
329  std::set<unsigned> LiveRegs;
330  // Num of schedulable unscheduled blocks reading the register.
331  std::map<unsigned, unsigned> LiveRegsConsumers;
332 
333  std::vector<unsigned> LastPosHighLatencyParentScheduled;
334  int LastPosWaitedHighLatency;
335 
336  std::vector<SIScheduleBlock*> BlocksScheduled;
337  unsigned NumBlockScheduled;
338  std::vector<SIScheduleBlock*> ReadyBlocks;
339 
340  unsigned VregCurrentUsage;
341  unsigned SregCurrentUsage;
342 
343  // Currently is only approximation.
344  unsigned maxVregUsage;
345  unsigned maxSregUsage;
346 
347  std::vector<unsigned> BlockNumPredsLeft;
348  std::vector<unsigned> BlockNumSuccsLeft;
349 
350 public:
353  SIScheduleBlocks BlocksStruct);
354  ~SIScheduleBlockScheduler() = default;
355 
356  std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; }
357 
358  unsigned getVGPRUsage() { return maxVregUsage; }
359  unsigned getSGPRUsage() { return maxSregUsage; }
360 
361 private:
362  struct SIBlockSchedCandidate : SISchedulerCandidate {
363  // The best Block candidate.
364  SIScheduleBlock *Block = nullptr;
365 
366  bool IsHighLatency;
367  int VGPRUsageDiff;
368  unsigned NumSuccessors;
369  unsigned NumHighLatencySuccessors;
370  unsigned LastPosHighLatParentScheduled;
371  unsigned Height;
372 
373  SIBlockSchedCandidate() = default;
374 
375  bool isValid() const { return Block; }
376 
377  // Copy the status of another candidate without changing policy.
378  void setBest(SIBlockSchedCandidate &Best) {
379  assert(Best.Reason != NoCand && "uninitialized Sched candidate");
380  Block = Best.Block;
381  Reason = Best.Reason;
382  IsHighLatency = Best.IsHighLatency;
383  VGPRUsageDiff = Best.VGPRUsageDiff;
384  NumSuccessors = Best.NumSuccessors;
385  NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
386  LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
387  Height = Best.Height;
388  }
389  };
390 
391  bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
392  SIBlockSchedCandidate &TryCand);
393  bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
394  SIBlockSchedCandidate &TryCand);
395  SIScheduleBlock *pickBlock();
396 
397  void addLiveRegs(std::set<unsigned> &Regs);
398  void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
399  void releaseBlockSuccs(SIScheduleBlock *Parent);
400  void blockScheduled(SIScheduleBlock *Block);
401 
402  // Check register pressure change
403  // by scheduling a block with these LiveIn and LiveOut.
404  std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
405  std::set<unsigned> &OutRegs);
406 
407  void schedule();
408 };
409 
411  std::vector<unsigned> SUs;
412  unsigned MaxSGPRUsage;
413  unsigned MaxVGPRUsage;
414 };
415 
416 class SIScheduler {
417  SIScheduleDAGMI *DAG;
418  SIScheduleBlockCreator BlockCreator;
419 
420 public:
421  SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {}
422 
423  ~SIScheduler() = default;
424 
425  struct SIScheduleBlockResult
426  scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
427  SISchedulerBlockSchedulerVariant ScheduleVariant);
428 };
429 
430 class SIScheduleDAGMI final : public ScheduleDAGMILive {
431  const SIInstrInfo *SITII;
432  const SIRegisterInfo *SITRI;
433 
434  std::vector<SUnit> SUnitsLinksBackup;
435 
436  // For moveLowLatencies. After all Scheduling variants are tested.
437  std::vector<unsigned> ScheduledSUnits;
438  std::vector<unsigned> ScheduledSUnitsInv;
439 
440  unsigned VGPRSetID;
441  unsigned SGPRSetID;
442 
443 public:
445 
446  ~SIScheduleDAGMI() override;
447 
448  // Entry point for the schedule.
449  void schedule() override;
450 
451  // To init Block's RPTracker.
453  RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false);
454  }
455 
456  MachineBasicBlock *getBB() { return BB; }
458  MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; }
459  LiveIntervals *getLIS() { return LIS; }
461  const TargetRegisterInfo *getTRI() { return TRI; }
462  ScheduleDAGTopologicalSort *GetTopo() { return &Topo; }
463  SUnit& getEntrySU() { return EntrySU; }
464  SUnit& getExitSU() { return ExitSU; }
465 
466  void restoreSULinksLeft();
467 
468  template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
469  _Iterator End,
470  unsigned &VgprUsage,
471  unsigned &SgprUsage);
472 
473  std::set<unsigned> getInRegs() {
474  std::set<unsigned> InRegs;
475  for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
476  InRegs.insert(RegMaskPair.RegUnit);
477  }
478  return InRegs;
479  }
480 
481  std::set<unsigned> getOutRegs() {
482  std::set<unsigned> OutRegs;
483  for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
484  OutRegs.insert(RegMaskPair.RegUnit);
485  }
486  return OutRegs;
487  };
488 
489  unsigned getVGPRSetID() const { return VGPRSetID; }
490  unsigned getSGPRSetID() const { return SGPRSetID; }
491 
492 private:
493  void topologicalSort();
494  // After scheduling is done, improve low latency placements.
495  void moveLowLatencies();
496 
497 public:
498  // Some stats for scheduling inside blocks.
499  std::vector<unsigned> IsLowLatencySU;
500  std::vector<unsigned> LowLatencyOffset;
501  std::vector<unsigned> IsHighLatencySU;
502  // Topological sort
503  // Maps topological index to the node number.
504  std::vector<int> TopDownIndex2SU;
505  std::vector<int> BottomUpIndex2SU;
506 };
507 
508 } // end namespace llvm
509 
510 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
uint64_t CallInst * C
SIScheduleBlockLinkKind
std::vector< SIScheduleBlock * > Blocks
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
SIScheduleCandReason Reason
std::vector< unsigned > & getInternalAdditionnalRegUsage()
std::vector< unsigned > IsLowLatencySU
std::vector< SIScheduleBlock * > getBlocks()
unsigned getSGPRSetID() const
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
std::vector< unsigned > LowLatencyOffset
ArrayRef< std::pair< SIScheduleBlock *, SIScheduleBlockLinkKind > > getSuccs() const
std::set< unsigned > getOutRegs()
std::set< unsigned > getInRegs()
const RegList & Regs
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
bool isRepeat(SIScheduleCandReason R)
std::vector< int > TopDownIndex2Block
Scheduling dependency.
Definition: ScheduleDAG.h:50
unsigned const MachineRegisterInfo * MRI
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx...
std::vector< int > TopDownIndex2SU
std::vector< SUnit * > getScheduledUnits()
MachineRegisterInfo * getMRI()
MachineBasicBlock * getBB()
static const unsigned End
Track the current register pressure at some position in the instruction stream, and remember the high...
LiveIntervals * getLIS()
const TargetRegisterInfo * getTRI()
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
MachineBasicBlock::iterator getCurrentBottom()
std::set< unsigned > & getOutRegs()
SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC, unsigned ID)
std::vector< unsigned > IsHighLatencySU
std::vector< unsigned > SUs
SISchedulerBlockCreatorVariant
MachineBasicBlock::iterator getCurrentTop()
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
ScheduleDAGTopologicalSort * GetTopo()
Interface definition for SIInstrInfo.
std::vector< int > BottomUpIndex2SU
SIScheduler(SIScheduleDAGMI *DAG)
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
void initRPTracker(RegPressureTracker &RPTracker)
const unsigned Kind
std::set< unsigned > & getInRegs()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getNumHighLatencySuccessors() const
std::vector< int > TopDownBlock2Index
const std::vector< SIScheduleBlock * > & getPreds() const
void setRepeat(SIScheduleCandReason R)
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:694
unsigned getVGPRSetID() const
SISchedulerBlockSchedulerVariant
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:247