LLVM  12.0.0git
GCNSchedStrategy.cpp
Go to the documentation of this file.
1 //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This contains a MachineSchedStrategy implementation for maximizing wave
11 /// occupancy on GCN hardware.
12 //===----------------------------------------------------------------------===//
13 
14 #include "GCNSchedStrategy.h"
15 #include "AMDGPUSubtarget.h"
16 #include "SIInstrInfo.h"
17 #include "SIMachineFunctionInfo.h"
18 #include "SIRegisterInfo.h"
19 #include "Utils/AMDGPUBaseInfo.h"
22 
23 #define DEBUG_TYPE "machine-scheduler"
24 
25 using namespace llvm;
26 
28  const MachineSchedContext *C) :
29  GenericScheduler(C), TargetOccupancy(0), MF(nullptr) { }
30 
33 
34  const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
35 
36  MF = &DAG->MF;
37 
38  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
39 
40  // FIXME: This is also necessary, because some passes that run after
41  // scheduling and before regalloc increase register pressure.
42  const int ErrorMargin = 3;
43 
44  SGPRExcessLimit = Context->RegClassInfo
45  ->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass) - ErrorMargin;
46  VGPRExcessLimit = Context->RegClassInfo
47  ->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass) - ErrorMargin;
48  if (TargetOccupancy) {
49  SGPRCriticalLimit = ST.getMaxNumSGPRs(TargetOccupancy, true);
50  VGPRCriticalLimit = ST.getMaxNumVGPRs(TargetOccupancy);
51  } else {
52  SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
53  AMDGPU::RegisterPressureSets::SReg_32);
54  VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
55  AMDGPU::RegisterPressureSets::VGPR_32);
56  }
57 
58  SGPRCriticalLimit -= ErrorMargin;
59  VGPRCriticalLimit -= ErrorMargin;
60 }
61 
62 void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
63  bool AtTop, const RegPressureTracker &RPTracker,
64  const SIRegisterInfo *SRI,
65  unsigned SGPRPressure,
66  unsigned VGPRPressure) {
67 
68  Cand.SU = SU;
69  Cand.AtTop = AtTop;
70 
71  // getDownwardPressure() and getUpwardPressure() make temporary changes to
72  // the tracker, so we need to pass those function a non-const copy.
73  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
74 
75  Pressure.clear();
76  MaxPressure.clear();
77 
78  if (AtTop)
79  TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
80  else {
81  // FIXME: I think for bottom up scheduling, the register pressure is cached
82  // and can be retrieved by DAG->getPressureDif(SU).
83  TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
84  }
85 
86  unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
87  unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
88 
89  // If two instructions increase the pressure of different register sets
90  // by the same amount, the generic scheduler will prefer to schedule the
91  // instruction that increases the set with the least amount of registers,
92  // which in our case would be SGPRs. This is rarely what we want, so
93  // when we report excess/critical register pressure, we do it either
94  // only for VGPRs or only for SGPRs.
95 
96  // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
97  const unsigned MaxVGPRPressureInc = 16;
98  bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
99  bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
100 
101 
102  // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
103  // to increase the likelihood we don't go over the limits. We should improve
104  // the analysis to look through dependencies to find the path with the least
105  // register pressure.
106 
107  // We only need to update the RPDelta for instructions that increase register
108  // pressure. Instructions that decrease or keep reg pressure the same will be
109  // marked as RegExcess in tryCandidate() when they are compared with
110  // instructions that increase the register pressure.
111  if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
112  Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
113  Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
114  }
115 
116  if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
117  Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
118  Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
119  }
120 
121  // Register pressure is considered 'CRITICAL' if it is approaching a value
122  // that would reduce the wave occupancy for the execution unit. When
123  // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both
124  // has the same cost, so we don't need to prefer one over the other.
125 
126  int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
127  int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
128 
129  if (SGPRDelta >= 0 || VGPRDelta >= 0) {
130  if (SGPRDelta > VGPRDelta) {
131  Cand.RPDelta.CriticalMax =
132  PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
133  Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
134  } else {
135  Cand.RPDelta.CriticalMax =
136  PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
137  Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
138  }
139  }
140 }
141 
142 // This function is mostly cut and pasted from
143 // GenericScheduler::pickNodeFromQueue()
144 void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
145  const CandPolicy &ZonePolicy,
146  const RegPressureTracker &RPTracker,
147  SchedCandidate &Cand) {
148  const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
149  ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
150  unsigned SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
151  unsigned VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
152  ReadyQueue &Q = Zone.Available;
153  for (SUnit *SU : Q) {
154 
155  SchedCandidate TryCand(ZonePolicy);
156  initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
157  SGPRPressure, VGPRPressure);
158  // Pass SchedBoundary only when comparing nodes from the same boundary.
159  SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
160  GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
161  if (TryCand.Reason != NoCand) {
162  // Initialize resource delta if needed in case future heuristics query it.
163  if (TryCand.ResDelta == SchedResourceDelta())
164  TryCand.initResourceDelta(Zone.DAG, SchedModel);
165  Cand.setBest(TryCand);
166  LLVM_DEBUG(traceCandidate(Cand));
167  }
168  }
169 }
170 
171 // This function is mostly cut and pasted from
172 // GenericScheduler::pickNodeBidirectional()
173 SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
174  // Schedule as far as possible in the direction of no choice. This is most
175  // efficient, but also provides the best heuristics for CriticalPSets.
176  if (SUnit *SU = Bot.pickOnlyChoice()) {
177  IsTopNode = false;
178  return SU;
179  }
180  if (SUnit *SU = Top.pickOnlyChoice()) {
181  IsTopNode = true;
182  return SU;
183  }
184  // Set the bottom-up policy based on the state of the current bottom zone and
185  // the instructions outside the zone, including the top zone.
186  CandPolicy BotPolicy;
187  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
188  // Set the top-down policy based on the state of the current top zone and
189  // the instructions outside the zone, including the bottom zone.
190  CandPolicy TopPolicy;
191  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
192 
193  // See if BotCand is still valid (because we previously scheduled from Top).
194  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
195  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
196  BotCand.Policy != BotPolicy) {
198  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
199  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
200  } else {
202 #ifndef NDEBUG
203  if (VerifyScheduling) {
204  SchedCandidate TCand;
205  TCand.reset(CandPolicy());
206  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
207  assert(TCand.SU == BotCand.SU &&
208  "Last pick result should correspond to re-picking right now");
209  }
210 #endif
211  }
212 
213  // Check if the top Q has a better candidate.
214  LLVM_DEBUG(dbgs() << "Picking from Top:\n");
215  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
216  TopCand.Policy != TopPolicy) {
218  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
219  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
220  } else {
222 #ifndef NDEBUG
223  if (VerifyScheduling) {
224  SchedCandidate TCand;
225  TCand.reset(CandPolicy());
226  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
227  assert(TCand.SU == TopCand.SU &&
228  "Last pick result should correspond to re-picking right now");
229  }
230 #endif
231  }
232 
233  // Pick best from BotCand and TopCand.
234  LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
235  dbgs() << "Bot Cand: "; traceCandidate(BotCand););
236  SchedCandidate Cand = BotCand;
238  GenericScheduler::tryCandidate(Cand, TopCand, nullptr);
239  if (TopCand.Reason != NoCand) {
240  Cand.setBest(TopCand);
241  }
242  LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
243 
244  IsTopNode = Cand.AtTop;
245  return Cand.SU;
246 }
247 
248 // This function is mostly cut and pasted from
249 // GenericScheduler::pickNode()
251  if (DAG->top() == DAG->bottom()) {
253  Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
254  return nullptr;
255  }
256  SUnit *SU;
257  do {
259  SU = Top.pickOnlyChoice();
260  if (!SU) {
261  CandPolicy NoPolicy;
262  TopCand.reset(NoPolicy);
263  pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
264  assert(TopCand.Reason != NoCand && "failed to find a candidate");
265  SU = TopCand.SU;
266  }
267  IsTopNode = true;
268  } else if (RegionPolicy.OnlyBottomUp) {
269  SU = Bot.pickOnlyChoice();
270  if (!SU) {
271  CandPolicy NoPolicy;
272  BotCand.reset(NoPolicy);
273  pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
274  assert(BotCand.Reason != NoCand && "failed to find a candidate");
275  SU = BotCand.SU;
276  }
277  IsTopNode = false;
278  } else {
279  SU = pickNodeBidirectional(IsTopNode);
280  }
281  } while (SU->isScheduled);
282 
283  if (SU->isTopReady())
284  Top.removeReady(SU);
285  if (SU->isBottomReady())
286  Bot.removeReady(SU);
287 
288  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
289  << *SU->getInstr());
290  return SU;
291 }
292 
294  std::unique_ptr<MachineSchedStrategy> S) :
295  ScheduleDAGMILive(C, std::move(S)),
296  ST(MF.getSubtarget<GCNSubtarget>()),
297  MFI(*MF.getInfo<SIMachineFunctionInfo>()),
298  StartingOccupancy(MFI.getOccupancy()),
299  MinOccupancy(StartingOccupancy), Stage(Collect), RegionIdx(0) {
300 
301  LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
302 }
303 
305  if (Stage == Collect) {
306  // Just record regions at the first pass.
307  Regions.push_back(std::make_pair(RegionBegin, RegionEnd));
308  return;
309  }
310 
311  std::vector<MachineInstr*> Unsched;
312  Unsched.reserve(NumRegionInstrs);
313  for (auto &I : *this) {
314  Unsched.push_back(&I);
315  }
316 
317  GCNRegPressure PressureBefore;
318  if (LIS) {
319  PressureBefore = Pressure[RegionIdx];
320 
321  LLVM_DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:";
322  GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI);
323  dbgs() << "Region live-in pressure: ";
324  llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs());
325  dbgs() << "Region register pressure: ";
326  PressureBefore.print(dbgs()));
327  }
328 
330  Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
331  RescheduleRegions[RegionIdx] = false;
332 
333  if (!LIS)
334  return;
335 
336  // Check the results of scheduling.
338  auto PressureAfter = getRealRegPressure();
339 
340  LLVM_DEBUG(dbgs() << "Pressure after scheduling: ";
341  PressureAfter.print(dbgs()));
342 
343  if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
344  PressureAfter.getVGPRNum() <= S.VGPRCriticalLimit) {
345  Pressure[RegionIdx] = PressureAfter;
346  LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
347  return;
348  }
349  unsigned Occ = MFI.getOccupancy();
350  unsigned WavesAfter = std::min(Occ, PressureAfter.getOccupancy(ST));
351  unsigned WavesBefore = std::min(Occ, PressureBefore.getOccupancy(ST));
352  LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
353  << ", after " << WavesAfter << ".\n");
354 
355  // We could not keep current target occupancy because of the just scheduled
356  // region. Record new occupancy for next scheduling cycle.
357  unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
358  // Allow memory bound functions to drop to 4 waves if not limited by an
359  // attribute.
360  if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy &&
361  WavesAfter >= MFI.getMinAllowedOccupancy()) {
362  LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
363  << MFI.getMinAllowedOccupancy() << " waves\n");
364  NewOccupancy = WavesAfter;
365  }
366  if (NewOccupancy < MinOccupancy) {
367  MinOccupancy = NewOccupancy;
368  MFI.limitOccupancy(MinOccupancy);
369  LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
370  << MinOccupancy << ".\n");
371  }
372 
373  unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
374  unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
375  if (PressureAfter.getVGPRNum() > MaxVGPRs ||
376  PressureAfter.getSGPRNum() > MaxSGPRs)
377  RescheduleRegions[RegionIdx] = true;
378 
379  if (WavesAfter >= MinOccupancy) {
380  if (Stage == UnclusteredReschedule &&
381  !PressureAfter.less(ST, PressureBefore)) {
382  LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
383  } else if (WavesAfter > MFI.getMinWavesPerEU() ||
384  PressureAfter.less(ST, PressureBefore) ||
385  !RescheduleRegions[RegionIdx]) {
386  Pressure[RegionIdx] = PressureAfter;
387  return;
388  } else {
389  LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
390  }
391  }
392 
393  LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
394  RescheduleRegions[RegionIdx] = true;
396  for (MachineInstr *MI : Unsched) {
397  if (MI->isDebugInstr())
398  continue;
399 
400  if (MI->getIterator() != RegionEnd) {
401  BB->remove(MI);
402  BB->insert(RegionEnd, MI);
403  if (!MI->isDebugInstr())
404  LIS->handleMove(*MI, true);
405  }
406  // Reset read-undef flags and update them later.
407  for (auto &Op : MI->operands())
408  if (Op.isReg() && Op.isDef())
409  Op.setIsUndef(false);
410  RegisterOperands RegOpers;
411  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
412  if (!MI->isDebugInstr()) {
413  if (ShouldTrackLaneMasks) {
414  // Adjust liveness and add missing dead+read-undef flags.
415  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
416  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
417  } else {
418  // Adjust for missing dead-def flags.
419  RegOpers.detectDeadDefs(*MI, *LIS);
420  }
421  }
422  RegionEnd = MI->getIterator();
423  ++RegionEnd;
424  LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
425  }
426  RegionBegin = Unsched.front()->getIterator();
427  Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
428 
430 }
431 
432 GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const {
434  RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]);
435  return RPTracker.moveMaxPressure();
436 }
437 
438 void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) {
440 
441  // If the block has the only successor then live-ins of that successor are
442  // live-outs of the current block. We can reuse calculated live set if the
443  // successor will be sent to scheduling past current block.
444  const MachineBasicBlock *OnlySucc = nullptr;
445  if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) {
446  SlotIndexes *Ind = LIS->getSlotIndexes();
447  if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin()))
448  OnlySucc = *MBB->succ_begin();
449  }
450 
451  // Scheduler sends regions from the end of the block upwards.
452  size_t CurRegion = RegionIdx;
453  for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)
454  if (Regions[CurRegion].first->getParent() != MBB)
455  break;
456  --CurRegion;
457 
458  auto I = MBB->begin();
459  auto LiveInIt = MBBLiveIns.find(MBB);
460  if (LiveInIt != MBBLiveIns.end()) {
461  auto LiveIn = std::move(LiveInIt->second);
462  RPTracker.reset(*MBB->begin(), &LiveIn);
463  MBBLiveIns.erase(LiveInIt);
464  } else {
465  auto &Rgn = Regions[CurRegion];
466  I = Rgn.first;
467  auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
468  auto LRS = BBLiveInMap.lookup(NonDbgMI);
469  assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
470  RPTracker.reset(*I, &LRS);
471  }
472 
473  for ( ; ; ) {
474  I = RPTracker.getNext();
475 
476  if (Regions[CurRegion].first == I) {
477  LiveIns[CurRegion] = RPTracker.getLiveRegs();
478  RPTracker.clearMaxPressure();
479  }
480 
481  if (Regions[CurRegion].second == I) {
482  Pressure[CurRegion] = RPTracker.moveMaxPressure();
483  if (CurRegion-- == RegionIdx)
484  break;
485  }
486  RPTracker.advanceToNext();
487  RPTracker.advanceBeforeNext();
488  }
489 
490  if (OnlySucc) {
491  if (I != MBB->end()) {
492  RPTracker.advanceToNext();
493  RPTracker.advance(MBB->end());
494  }
495  RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs());
496  RPTracker.advanceBeforeNext();
497  MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
498  }
499 }
500 
502 GCNScheduleDAGMILive::getBBLiveInMap() const {
503  assert(!Regions.empty());
504  std::vector<MachineInstr *> BBStarters;
505  BBStarters.reserve(Regions.size());
506  auto I = Regions.rbegin(), E = Regions.rend();
507  auto *BB = I->first->getParent();
508  do {
509  auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
510  BBStarters.push_back(MI);
511  do {
512  ++I;
513  } while (I != E && I->first->getParent() == BB);
514  } while (I != E);
515  return getLiveRegMap(BBStarters, false /*After*/, *LIS);
516 }
517 
520  LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
521 
522  LiveIns.resize(Regions.size());
523  Pressure.resize(Regions.size());
524  RescheduleRegions.resize(Regions.size());
525  RescheduleRegions.set();
526 
527  if (!Regions.empty())
528  BBLiveInMap = getBBLiveInMap();
529 
530  std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
531 
532  do {
533  Stage++;
534  RegionIdx = 0;
535  MachineBasicBlock *MBB = nullptr;
536 
537  if (Stage > InitialSchedule) {
538  if (!LIS)
539  break;
540 
541  // Retry function scheduling if we found resulting occupancy and it is
542  // lower than used for first pass scheduling. This will give more freedom
543  // to schedule low register pressure blocks.
544  // Code is partially copied from MachineSchedulerBase::scheduleRegions().
545 
546  if (Stage == UnclusteredReschedule) {
547  if (RescheduleRegions.none())
548  continue;
549  LLVM_DEBUG(dbgs() <<
550  "Retrying function scheduling without clustering.\n");
551  }
552 
553  if (Stage == ClusteredLowOccupancyReschedule) {
554  if (StartingOccupancy <= MinOccupancy)
555  break;
556 
557  LLVM_DEBUG(
558  dbgs()
559  << "Retrying function scheduling with lowest recorded occupancy "
560  << MinOccupancy << ".\n");
561 
562  S.setTargetOccupancy(MinOccupancy);
563  }
564  }
565 
566  if (Stage == UnclusteredReschedule)
567  SavedMutations.swap(Mutations);
568 
569  for (auto Region : Regions) {
570  if (Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx])
571  continue;
572 
573  RegionBegin = Region.first;
574  RegionEnd = Region.second;
575 
576  if (RegionBegin->getParent() != MBB) {
577  if (MBB) finishBlock();
578  MBB = RegionBegin->getParent();
579  startBlock(MBB);
580  if (Stage == InitialSchedule)
581  computeBlockPressure(MBB);
582  }
583 
584  unsigned NumRegionInstrs = std::distance(begin(), end());
585  enterRegion(MBB, begin(), end(), NumRegionInstrs);
586 
587  // Skip empty scheduling regions (0 or 1 schedulable instructions).
588  if (begin() == end() || begin() == std::prev(end())) {
589  exitRegion();
590  continue;
591  }
592 
593  LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
594  LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*MBB) << " "
595  << MBB->getName() << "\n From: " << *begin()
596  << " To: ";
597  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
598  else dbgs() << "End";
599  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
600 
601  schedule();
602 
603  exitRegion();
604  ++RegionIdx;
605  }
606  finishBlock();
607 
608  if (Stage == UnclusteredReschedule)
609  SavedMutations.swap(Mutations);
610  } while (Stage != LastStage);
611 }
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:372
uint64_t CallInst * C
BitVector & set()
Definition: BitVector.h:398
Interface definition for SIRegisterInfo.
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
AMDGPU specific subclass of TargetSubtarget.
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
Each Scheduling boundary is associated with ready queues.
static void printLiveRegs(raw_ostream &OS, const LiveRegSet &LiveRegs, const MachineRegisterInfo &MRI)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
const MachineSchedContext * Context
This is a minimal scheduler strategy.
decltype(MaxPressure) moveMaxPressure()
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
bool reset(const MachineInstr &MI, const LiveRegSet *LiveRegs=nullptr)
void setUnitInc(int Inc)
void traceCandidate(const SchedCandidate &Cand)
void reset(const CandPolicy &NewPolicy)
ScheduleDAGMI * DAG
bool isBottomReady() const
Definition: ScheduleDAG.h:449
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
std::unique_ptr< MachineSchedStrategy > SchedImpl
decltype(LiveRegs) moveLiveRegs()
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
MachineBasicBlock & MBB
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:560
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
const RegPressureTracker & getBotRPTracker() const
decltype(LiveRegs) const & getLiveRegs() const
Definition: BitVector.h:959
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
unsigned getOccupancy(const GCNSubtarget &ST) const
SlotIndexes pass.
Definition: SlotIndexes.h:314
void limitOccupancy(const MachineFunction &MF)
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def...
Definition: SlotIndexes.h:254
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
MachineBasicBlock::iterator top() const
MachineSchedPolicy RegionPolicy
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
bool isTopReady() const
Definition: ScheduleDAG.h:446
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
List of registers defined and used by a machine instruction.
const MachineBasicBlock::const_iterator getNext() const
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
RegisterClassInfo * RegClassInfo
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
Helpers for implementing custom MachineSchedStrategy classes.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
cl::opt< bool > VerifyScheduling
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition: SlotIndexes.h:466
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:305
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
Track the current register pressure at some position in the instruction stream, and remember the high...
Policy for scheduling the next instruction in the candidate&#39;s zone.
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
const TargetSchedModel * SchedModel
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
const TargetRegisterInfo * TRI
ScheduleDAGMILive * DAG
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:266
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
const RegPressureTracker & getTopRPTracker() const
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
PressureChange CriticalMax
unsigned succ_size() const
IterT skipDebugInstructionsForward(IterT It, IterT End)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator...
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
bool none() const
none - Returns true if none of the bits are set.
Definition: BitVector.h:202
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
Representation of each machine instruction.
Definition: MachineInstr.h:62
LiveIntervals * LIS
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
Status of an instruction&#39;s critical resource consumption.
SchedCandidate BotCand
Candidate last picked from Bot boundary.
MachineBasicBlock::iterator bottom() const
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
#define I(x, y, z)
Definition: MD5.cpp:59
Capture a change in pressure for a single pressure set.
void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
SlotIndexes * getSlotIndexes() const
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void print(raw_ostream &OS, const GCNSubtarget *ST=nullptr) const
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C)
SchedCandidate TopCand
Candidate last picked from Top boundary.
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
MachineBasicBlock * BB
The block in which to insert instructions.
IRTranslator LLVM IR MI
bool empty() const
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:561
#define LLVM_DEBUG(X)
Definition: Debug.h:122
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
void finishBlock() override
Cleans up after scheduling in the given block.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
RegPressureTracker RPTracker
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block...
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242