LLVM  4.0.0
GCNSchedStrategy.cpp
Go to the documentation of this file.
1 //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This contains a MachineSchedStrategy implementation for maximizing wave
12 /// occupancy on GCN hardware.
13 //===----------------------------------------------------------------------===//
14 
15 #include "GCNSchedStrategy.h"
16 #include "AMDGPUSubtarget.h"
17 #include "SIInstrInfo.h"
18 #include "SIMachineFunctionInfo.h"
19 #include "SIRegisterInfo.h"
21 
22 #define DEBUG_TYPE "misched"
23 
24 using namespace llvm;
25 
27  const MachineSchedContext *C) :
28  GenericScheduler(C) { }
29 
30 static unsigned getMaxWaves(unsigned SGPRs, unsigned VGPRs,
31  const MachineFunction &MF) {
32 
33  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
35  unsigned MinRegOccupancy = std::min(ST.getOccupancyWithNumSGPRs(SGPRs),
36  ST.getOccupancyWithNumVGPRs(VGPRs));
37  return std::min(MinRegOccupancy,
38  ST.getOccupancyWithLocalMemSize(MFI->getLDSSize()));
39 }
40 
41 void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
42  bool AtTop, const RegPressureTracker &RPTracker,
43  const SIRegisterInfo *SRI,
44  int SGPRPressure,
45  int VGPRPressure,
46  int SGPRExcessLimit,
47  int VGPRExcessLimit,
48  int SGPRCriticalLimit,
49  int VGPRCriticalLimit) {
50 
51  Cand.SU = SU;
52  Cand.AtTop = AtTop;
53 
54  // getDownwardPressure() and getUpwardPressure() make temporary changes to
55  // the the tracker, so we need to pass those function a non-const copy.
56  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
57 
58  std::vector<unsigned> Pressure;
59  std::vector<unsigned> MaxPressure;
60 
61  if (AtTop)
62  TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
63  else {
64  // FIXME: I think for bottom up scheduling, the register pressure is cached
65  // and can be retrieved by DAG->getPressureDif(SU).
66  TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
67  }
68 
69  int NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()];
70  int NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()];
71 
72  // If two instructions increase the pressure of different register sets
73  // by the same amount, the generic scheduler will prefer to schedule the
74  // instruction that increases the set with the least amount of registers,
75  // which in our case would be SGPRs. This is rarely what we want, so
76  // when we report excess/critical register pressure, we do it either
77  // only for VGPRs or only for SGPRs.
78 
79  // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
80  const int MaxVGPRPressureInc = 16;
81  bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
82  bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
83 
84 
85  // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
86  // to increase the likelihood we don't go over the limits. We should improve
87  // the analysis to look through dependencies to find the path with the least
88  // register pressure.
89  // FIXME: This is also necessary, because some passes that run after
90  // scheduling and before regalloc increase register pressure.
91  const int ErrorMargin = 3;
92  VGPRExcessLimit -= ErrorMargin;
93  SGPRExcessLimit -= ErrorMargin;
94 
95  // We only need to update the RPDelata for instructions that increase
96  // register pressure. Instructions that decrease or keep reg pressure
97  // the same will be marked as RegExcess in tryCandidate() when they
98  // are compared with instructions that increase the register pressure.
99  if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
100  Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet());
101  Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
102  }
103 
104  if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
105  Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet());
106  Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure = SGPRExcessLimit);
107  }
108 
109  // Register pressure is considered 'CRITICAL' if it is approaching a value
110  // that would reduce the wave occupancy for the execution unit. When
111  // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both
112  // has the same cost, so we don't need to prefer one over the other.
113 
114  VGPRCriticalLimit -= ErrorMargin;
115  SGPRCriticalLimit -= ErrorMargin;
116 
117  int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
118  int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
119 
120  if (SGPRDelta >= 0 || VGPRDelta >= 0) {
121  if (SGPRDelta > VGPRDelta) {
122  Cand.RPDelta.CriticalMax = PressureChange(SRI->getSGPRPressureSet());
123  Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
124  } else {
125  Cand.RPDelta.CriticalMax = PressureChange(SRI->getVGPRPressureSet());
126  Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
127  }
128  }
129 }
130 
131 // This function is mostly cut and pasted from
132 // GenericScheduler::pickNodeFromQueue()
133 void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
134  const CandPolicy &ZonePolicy,
135  const RegPressureTracker &RPTracker,
136  SchedCandidate &Cand) {
138  const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
139  ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
140  unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()];
141  unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()];
142  unsigned SGPRExcessLimit =
143  Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
144  unsigned VGPRExcessLimit =
145  Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
146  unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, DAG->MF);
147  unsigned SGPRCriticalLimit = SRI->getMaxNumSGPRs(ST, MaxWaves, true);
148  unsigned VGPRCriticalLimit = SRI->getMaxNumVGPRs(MaxWaves);
149 
150  ReadyQueue &Q = Zone.Available;
151  for (SUnit *SU : Q) {
152 
153  SchedCandidate TryCand(ZonePolicy);
154  initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
155  SGPRPressure, VGPRPressure,
156  SGPRExcessLimit, VGPRExcessLimit,
157  SGPRCriticalLimit, VGPRCriticalLimit);
158  // Pass SchedBoundary only when comparing nodes from the same boundary.
159  SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
160  GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
161  if (TryCand.Reason != NoCand) {
162  // Initialize resource delta if needed in case future heuristics query it.
163  if (TryCand.ResDelta == SchedResourceDelta())
164  TryCand.initResourceDelta(Zone.DAG, SchedModel);
165  Cand.setBest(TryCand);
166  }
167  }
168 }
169 
171  switch (Reason) {
172  default:
173  return Reason;
176  return -Reason;
177  }
178 }
179 
180 // This function is mostly cut and pasted from
181 // GenericScheduler::pickNodeBidirectional()
182 SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
183  // Schedule as far as possible in the direction of no choice. This is most
184  // efficient, but also provides the best heuristics for CriticalPSets.
185  if (SUnit *SU = Bot.pickOnlyChoice()) {
186  IsTopNode = false;
187  return SU;
188  }
189  if (SUnit *SU = Top.pickOnlyChoice()) {
190  IsTopNode = true;
191  return SU;
192  }
193  // Set the bottom-up policy based on the state of the current bottom zone and
194  // the instructions outside the zone, including the top zone.
195  CandPolicy BotPolicy;
196  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
197  // Set the top-down policy based on the state of the current top zone and
198  // the instructions outside the zone, including the bottom zone.
199  CandPolicy TopPolicy;
200  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
201 
202  // See if BotCand is still valid (because we previously scheduled from Top).
203  DEBUG(dbgs() << "Picking from Bot:\n");
204  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
205  BotCand.Policy != BotPolicy) {
206  BotCand.reset(CandPolicy());
207  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
208  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
209  } else {
211  }
212 
213  // Check if the top Q has a better candidate.
214  DEBUG(dbgs() << "Picking from Top:\n");
215  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
216  TopCand.Policy != TopPolicy) {
217  TopCand.reset(CandPolicy());
218  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
219  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
220  } else {
222  }
223 
224  // Pick best from BotCand and TopCand.
225  DEBUG(
226  dbgs() << "Top Cand: ";
228  dbgs() << "Bot Cand: ";
230  );
231  SchedCandidate Cand;
232  if (TopCand.Reason == BotCand.Reason) {
233  Cand = BotCand;
236  GenericScheduler::tryCandidate(Cand, TopCand, nullptr);
237  if (TopCand.Reason != NoCand) {
238  Cand.setBest(TopCand);
239  } else {
240  TopCand.Reason = TopReason;
241  }
242  } else {
244  Cand = TopCand;
245  } else if (BotCand.Reason == RegExcess && BotCand.RPDelta.Excess.getUnitInc() <= 0) {
246  Cand = BotCand;
247  } else if (TopCand.Reason == RegCritical && TopCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
248  Cand = TopCand;
249  } else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
250  Cand = BotCand;
251  } else {
254  if (TopRank > BotRank) {
255  Cand = TopCand;
256  } else {
257  Cand = BotCand;
258  }
259  }
260  }
261  DEBUG(
262  dbgs() << "Picking: ";
263  traceCandidate(Cand);
264  );
265 
266  IsTopNode = Cand.AtTop;
267  return Cand.SU;
268 }
269 
270 // This function is mostly cut and pasted from
271 // GenericScheduler::pickNode()
273  if (DAG->top() == DAG->bottom()) {
275  Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
276  return nullptr;
277  }
278  SUnit *SU;
279  do {
281  SU = Top.pickOnlyChoice();
282  if (!SU) {
283  CandPolicy NoPolicy;
284  TopCand.reset(NoPolicy);
285  pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
286  assert(TopCand.Reason != NoCand && "failed to find a candidate");
287  SU = TopCand.SU;
288  }
289  IsTopNode = true;
290  } else if (RegionPolicy.OnlyBottomUp) {
291  SU = Bot.pickOnlyChoice();
292  if (!SU) {
293  CandPolicy NoPolicy;
294  BotCand.reset(NoPolicy);
295  pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
296  assert(BotCand.Reason != NoCand && "failed to find a candidate");
297  SU = BotCand.SU;
298  }
299  IsTopNode = false;
300  } else {
301  SU = pickNodeBidirectional(IsTopNode);
302  }
303  } while (SU->isScheduled);
304 
305  if (SU->isTopReady())
306  Top.removeReady(SU);
307  if (SU->isBottomReady())
308  Bot.removeReady(SU);
309 
310  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
311  return SU;
312 }
Interface definition for SIRegisterInfo.
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
AMDGPU specific subclass of TargetSubtarget.
Each Scheduling boundary is associated with ready queues.
const MachineSchedContext * Context
MachineInstr * getInstr() const
getInstr - Return the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:389
void setUnitInc(int Inc)
void traceCandidate(const SchedCandidate &Cand)
static int getBidirectionalReasonRank(GenericSchedulerBase::CandReason Reason)
void reset(const CandPolicy &NewPolicy)
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
ScheduleDAGMI * DAG
MachineBasicBlock::iterator top() const
const RegPressureTracker & getTopRPTracker() const
const RegPressureTracker & getBotRPTracker() const
MachineFunction & MF
Definition: ScheduleDAG.h:581
bool isScheduled
Definition: ScheduleDAG.h:286
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineSchedPolicy RegionPolicy
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
unsigned getVGPRPressureSet() const
RegisterClassInfo * RegClassInfo
CandReason
Represent the type of SchedCandidate found within a single queue.
Helpers for implementing custom MachineSchedStrategy classes.
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
static unsigned getMaxWaves(unsigned SGPRs, unsigned VGPRs, const MachineFunction &MF)
bool empty() const
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone)
Apply a set of heursitics to a new candidate.
Track the current register pressure at some position in the instruction stream, and remember the high...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Policy for scheduling the next instruction in the candidate's zone.
const TargetSchedModel * SchedModel
const TargetRegisterInfo * TRI
ScheduleDAGMILive * DAG
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const
Inverse of getMaxLocalMemWithWaveCount.
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
bool isTopReady() const
Definition: ScheduleDAG.h:467
PressureChange CriticalMax
MachineBasicBlock::iterator bottom() const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
SchedCandidate BotCand
Candidate last picked from Bot boundary.
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Capture a change in pressure for a single pressure set.
void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
unsigned getSGPRPressureSet() const
unsigned NodeNum
Definition: ScheduleDAG.h:266
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C)
SchedCandidate TopCand
Candidate last picked from Top boundary.
bool isBottomReady() const
Definition: ScheduleDAG.h:470
#define DEBUG(X)
Definition: Debug.h:100
void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
SUnit - Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:244