Line data Source code
1 : //===- GCNIterativeScheduler.cpp ------------------------------------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 :
10 : #include "GCNIterativeScheduler.h"
11 : #include "AMDGPUSubtarget.h"
12 : #include "GCNRegPressure.h"
13 : #include "GCNSchedStrategy.h"
14 : #include "SIMachineFunctionInfo.h"
15 : #include "llvm/ADT/ArrayRef.h"
16 : #include "llvm/ADT/STLExtras.h"
17 : #include "llvm/ADT/SmallVector.h"
18 : #include "llvm/CodeGen/LiveIntervals.h"
19 : #include "llvm/CodeGen/MachineBasicBlock.h"
20 : #include "llvm/CodeGen/MachineFunction.h"
21 : #include "llvm/CodeGen/RegisterPressure.h"
22 : #include "llvm/CodeGen/ScheduleDAG.h"
23 : #include "llvm/Config/llvm-config.h"
24 : #include "llvm/Support/Compiler.h"
25 : #include "llvm/Support/Debug.h"
26 : #include "llvm/Support/raw_ostream.h"
27 : #include <algorithm>
28 : #include <cassert>
29 : #include <iterator>
30 : #include <limits>
31 : #include <memory>
32 : #include <type_traits>
33 : #include <vector>
34 :
35 : using namespace llvm;
36 :
37 : #define DEBUG_TYPE "machine-scheduler"
38 :
39 : namespace llvm {
40 :
41 : std::vector<const SUnit *> makeMinRegSchedule(ArrayRef<const SUnit *> TopRoots,
42 : const ScheduleDAG &DAG);
43 :
44 : std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
45 : const ScheduleDAG &DAG);
46 : }
47 :
48 : // shim accessors for different order containers
49 : static inline MachineInstr *getMachineInstr(MachineInstr *MI) {
50 : return MI;
51 : }
52 : static inline MachineInstr *getMachineInstr(const SUnit *SU) {
53 3868 : return SU->getInstr();
54 : }
55 : static inline MachineInstr *getMachineInstr(const SUnit &SU) {
56 228 : return SU.getInstr();
57 : }
58 :
59 : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
60 : LLVM_DUMP_METHOD
61 : static void printRegion(raw_ostream &OS,
62 : MachineBasicBlock::iterator Begin,
63 : MachineBasicBlock::iterator End,
64 : const LiveIntervals *LIS,
65 : unsigned MaxInstNum =
66 : std::numeric_limits<unsigned>::max()) {
67 : auto BB = Begin->getParent();
68 : OS << BB->getParent()->getName() << ":" << printMBBReference(*BB) << ' '
69 : << BB->getName() << ":\n";
70 : auto I = Begin;
71 : MaxInstNum = std::max(MaxInstNum, 1u);
72 : for (; I != End && MaxInstNum; ++I, --MaxInstNum) {
73 : if (!I->isDebugInstr() && LIS)
74 : OS << LIS->getInstructionIndex(*I);
75 : OS << '\t' << *I;
76 : }
77 : if (I != End) {
78 : OS << "\t...\n";
79 : I = std::prev(End);
80 : if (!I->isDebugInstr() && LIS)
81 : OS << LIS->getInstructionIndex(*I);
82 : OS << '\t' << *I;
83 : }
84 : if (End != BB->end()) { // print boundary inst if present
85 : OS << "----\n";
86 : if (LIS) OS << LIS->getInstructionIndex(*End) << '\t';
87 : OS << *End;
88 : }
89 : }
90 :
91 : LLVM_DUMP_METHOD
92 : static void printLivenessInfo(raw_ostream &OS,
93 : MachineBasicBlock::iterator Begin,
94 : MachineBasicBlock::iterator End,
95 : const LiveIntervals *LIS) {
96 : const auto BB = Begin->getParent();
97 : const auto &MRI = BB->getParent()->getRegInfo();
98 :
99 : const auto LiveIns = getLiveRegsBefore(*Begin, *LIS);
100 : OS << "LIn RP: ";
101 : getRegPressure(MRI, LiveIns).print(OS);
102 :
103 : const auto BottomMI = End == BB->end() ? std::prev(End) : End;
104 : const auto LiveOuts = getLiveRegsAfter(*BottomMI, *LIS);
105 : OS << "LOt RP: ";
106 : getRegPressure(MRI, LiveOuts).print(OS);
107 : }
108 :
109 : LLVM_DUMP_METHOD
110 : void GCNIterativeScheduler::printRegions(raw_ostream &OS) const {
111 : const auto &ST = MF.getSubtarget<GCNSubtarget>();
112 : for (const auto R : Regions) {
113 : OS << "Region to schedule ";
114 : printRegion(OS, R->Begin, R->End, LIS, 1);
115 : printLivenessInfo(OS, R->Begin, R->End, LIS);
116 : OS << "Max RP: ";
117 : R->MaxPressure.print(OS, &ST);
118 : }
119 : }
120 :
121 : LLVM_DUMP_METHOD
122 : void GCNIterativeScheduler::printSchedResult(raw_ostream &OS,
123 : const Region *R,
124 : const GCNRegPressure &RP) const {
125 : OS << "\nAfter scheduling ";
126 : printRegion(OS, R->Begin, R->End, LIS);
127 : printSchedRP(OS, R->MaxPressure, RP);
128 : OS << '\n';
129 : }
130 :
131 : LLVM_DUMP_METHOD
132 : void GCNIterativeScheduler::printSchedRP(raw_ostream &OS,
133 : const GCNRegPressure &Before,
134 : const GCNRegPressure &After) const {
135 : const auto &ST = MF.getSubtarget<GCNSubtarget>();
136 : OS << "RP before: ";
137 : Before.print(OS, &ST);
138 : OS << "RP after: ";
139 : After.print(OS, &ST);
140 : }
141 : #endif
142 :
143 : // DAG builder helper
144 : class GCNIterativeScheduler::BuildDAG {
145 : GCNIterativeScheduler &Sch;
146 : SmallVector<SUnit *, 8> TopRoots;
147 :
148 : SmallVector<SUnit*, 8> BotRoots;
149 : public:
150 8 : BuildDAG(const Region &R, GCNIterativeScheduler &_Sch)
151 8 : : Sch(_Sch) {
152 8 : auto BB = R.Begin->getParent();
153 8 : Sch.BaseClass::startBlock(BB);
154 8 : Sch.BaseClass::enterRegion(BB, R.Begin, R.End, R.NumRegionInstrs);
155 :
156 8 : Sch.buildSchedGraph(Sch.AA, nullptr, nullptr, nullptr,
157 : /*TrackLaneMask*/true);
158 8 : Sch.Topo.InitDAGTopologicalSorting();
159 8 : Sch.findRootsAndBiasEdges(TopRoots, BotRoots);
160 8 : }
161 :
162 16 : ~BuildDAG() {
163 8 : Sch.BaseClass::exitRegion();
164 8 : Sch.BaseClass::finishBlock();
165 8 : }
166 :
167 : ArrayRef<const SUnit *> getTopRoots() const {
168 : return TopRoots;
169 : }
170 : ArrayRef<SUnit*> getBottomRoots() const {
171 : return BotRoots;
172 : }
173 : };
174 :
175 : class GCNIterativeScheduler::OverrideLegacyStrategy {
176 : GCNIterativeScheduler &Sch;
177 : Region &Rgn;
178 : std::unique_ptr<MachineSchedStrategy> SaveSchedImpl;
179 : GCNRegPressure SaveMaxRP;
180 :
181 : public:
182 4 : OverrideLegacyStrategy(Region &R,
183 : MachineSchedStrategy &OverrideStrategy,
184 : GCNIterativeScheduler &_Sch)
185 4 : : Sch(_Sch)
186 : , Rgn(R)
187 : , SaveSchedImpl(std::move(_Sch.SchedImpl))
188 8 : , SaveMaxRP(R.MaxPressure) {
189 : Sch.SchedImpl.reset(&OverrideStrategy);
190 4 : auto BB = R.Begin->getParent();
191 4 : Sch.BaseClass::startBlock(BB);
192 4 : Sch.BaseClass::enterRegion(BB, R.Begin, R.End, R.NumRegionInstrs);
193 4 : }
194 :
195 8 : ~OverrideLegacyStrategy() {
196 4 : Sch.BaseClass::exitRegion();
197 4 : Sch.BaseClass::finishBlock();
198 4 : Sch.SchedImpl.release();
199 : Sch.SchedImpl = std::move(SaveSchedImpl);
200 4 : }
201 :
202 0 : void schedule() {
203 : assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End);
204 : LLVM_DEBUG(dbgs() << "\nScheduling ";
205 : printRegion(dbgs(), Rgn.Begin, Rgn.End, Sch.LIS, 2));
206 0 : Sch.BaseClass::schedule();
207 :
208 : // Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
209 0 : Sch.RegionEnd = Rgn.End;
210 : //assert(Rgn.End == Sch.RegionEnd);
211 0 : Rgn.Begin = Sch.RegionBegin;
212 : Rgn.MaxPressure.clear();
213 0 : }
214 :
215 : void restoreOrder() {
216 : assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End);
217 : // DAG SUnits are stored using original region's order
218 : // so just use SUnits as the restoring schedule
219 1 : Sch.scheduleRegion(Rgn, Sch.SUnits, SaveMaxRP);
220 : }
221 : };
222 :
223 : namespace {
224 :
225 : // just a stub to make base class happy
226 8 : class SchedStrategyStub : public MachineSchedStrategy {
227 : public:
228 16 : bool shouldTrackPressure() const override { return false; }
229 16 : bool shouldTrackLaneMasks() const override { return false; }
230 0 : void initialize(ScheduleDAGMI *DAG) override {}
231 0 : SUnit *pickNode(bool &IsTopNode) override { return nullptr; }
232 0 : void schedNode(SUnit *SU, bool IsTopNode) override {}
233 0 : void releaseTopNode(SUnit *SU) override {}
234 0 : void releaseBottomNode(SUnit *SU) override {}
235 : };
236 :
237 : } // end anonymous namespace
238 :
239 8 : GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C,
240 8 : StrategyKind S)
241 : : BaseClass(C, llvm::make_unique<SchedStrategyStub>())
242 : , Context(C)
243 : , Strategy(S)
244 24 : , UPTracker(*LIS) {
245 8 : }
246 :
247 : // returns max pressure for a region
248 : GCNRegPressure
249 12 : GCNIterativeScheduler::getRegionPressure(MachineBasicBlock::iterator Begin,
250 : MachineBasicBlock::iterator End)
251 : const {
252 : // For the purpose of pressure tracking bottom inst of the region should
253 : // be also processed. End is either BB end, BB terminator inst or sched
254 : // boundary inst.
255 12 : auto const BBEnd = Begin->getParent()->end();
256 12 : auto const BottomMI = End == BBEnd ? std::prev(End) : End;
257 :
258 : // scheduleRegions walks bottom to top, so its likely we just get next
259 : // instruction to track
260 12 : auto AfterBottomMI = std::next(BottomMI);
261 12 : if (AfterBottomMI == BBEnd ||
262 0 : &*AfterBottomMI != UPTracker.getLastTrackedMI()) {
263 12 : UPTracker.reset(*BottomMI);
264 : } else {
265 : assert(UPTracker.isValid());
266 : }
267 :
268 3648 : for (auto I = BottomMI; I != Begin; --I)
269 3636 : UPTracker.recede(*I);
270 :
271 12 : UPTracker.recede(*Begin);
272 :
273 : assert(UPTracker.isValid() ||
274 : (dbgs() << "Tracked region ",
275 : printRegion(dbgs(), Begin, End, LIS), false));
276 12 : return UPTracker.moveMaxPressure();
277 : }
278 :
279 : // returns max pressure for a tentative schedule
280 : template <typename Range> GCNRegPressure
281 8 : GCNIterativeScheduler::getSchedulePressure(const Region &R,
282 : Range &&Schedule) const {
283 8 : auto const BBEnd = R.Begin->getParent()->end();
284 8 : GCNUpwardRPTracker RPTracker(*LIS);
285 8 : if (R.End != BBEnd) {
286 : // R.End points to the boundary instruction but the
287 : // schedule doesn't include it
288 8 : RPTracker.reset(*R.End);
289 8 : RPTracker.recede(*R.End);
290 : } else {
291 : // R.End doesn't point to the boundary instruction
292 0 : RPTracker.reset(*std::prev(BBEnd));
293 : }
294 2432 : for (auto I = Schedule.end(), B = Schedule.begin(); I != B;) {
295 2424 : RPTracker.recede(*getMachineInstr(*--I));
296 : }
297 8 : return RPTracker.moveMaxPressure();
298 : }
299 :
300 8 : void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overriden
301 : MachineBasicBlock::iterator Begin,
302 : MachineBasicBlock::iterator End,
303 : unsigned NumRegionInstrs) {
304 8 : BaseClass::enterRegion(BB, Begin, End, NumRegionInstrs);
305 8 : if (NumRegionInstrs > 2) {
306 8 : Regions.push_back(
307 16 : new (Alloc.Allocate())
308 : Region { Begin, End, NumRegionInstrs,
309 8 : getRegionPressure(Begin, End), nullptr });
310 : }
311 8 : }
312 :
313 8 : void GCNIterativeScheduler::schedule() { // overriden
314 : // do nothing
315 : LLVM_DEBUG(printLivenessInfo(dbgs(), RegionBegin, RegionEnd, LIS);
316 : if (!Regions.empty() && Regions.back()->Begin == RegionBegin) {
317 : dbgs() << "Max RP: ";
318 : Regions.back()->MaxPressure.print(
319 : dbgs(), &MF.getSubtarget<GCNSubtarget>());
320 : } dbgs()
321 : << '\n';);
322 8 : }
323 :
324 8 : void GCNIterativeScheduler::finalizeSchedule() { // overriden
325 8 : if (Regions.empty())
326 : return;
327 8 : switch (Strategy) {
328 0 : case SCHEDULE_MINREGONLY: scheduleMinReg(); break;
329 3 : case SCHEDULE_MINREGFORCED: scheduleMinReg(true); break;
330 3 : case SCHEDULE_LEGACYMAXOCCUPANCY: scheduleLegacyMaxOccupancy(); break;
331 2 : case SCHEDULE_ILP: scheduleILP(false); break;
332 : }
333 : }
334 :
335 : // Detach schedule from SUnits and interleave it with debug values.
336 : // Returned schedule becomes independent of DAG state.
337 : std::vector<MachineInstr*>
338 2 : GCNIterativeScheduler::detachSchedule(ScheduleRef Schedule) const {
339 : std::vector<MachineInstr*> Res;
340 2 : Res.reserve(Schedule.size() * 2);
341 :
342 2 : if (FirstDbgValue)
343 0 : Res.push_back(FirstDbgValue);
344 :
345 2 : const auto DbgB = DbgValues.begin(), DbgE = DbgValues.end();
346 760 : for (auto SU : Schedule) {
347 758 : Res.push_back(SU->getInstr());
348 : const auto &D = std::find_if(DbgB, DbgE, [SU](decltype(*DbgB) &P) {
349 0 : return P.second == SU->getInstr();
350 : });
351 758 : if (D != DbgE)
352 0 : Res.push_back(D->first);
353 : }
354 2 : return Res;
355 : }
356 :
357 2 : void GCNIterativeScheduler::setBestSchedule(Region &R,
358 : ScheduleRef Schedule,
359 : const GCNRegPressure &MaxRP) {
360 2 : R.BestSchedule.reset(
361 2 : new TentativeSchedule{ detachSchedule(Schedule), MaxRP });
362 2 : }
363 :
364 1 : void GCNIterativeScheduler::scheduleBest(Region &R) {
365 : assert(R.BestSchedule.get() && "No schedule specified");
366 1 : scheduleRegion(R, R.BestSchedule->Schedule, R.BestSchedule->MaxPressure);
367 : R.BestSchedule.reset();
368 1 : }
369 :
370 : // minimal required region scheduler, works for ranges of SUnits*,
371 : // SUnits or MachineIntrs*
372 : template <typename Range>
373 7 : void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,
374 : const GCNRegPressure &MaxRP) {
375 : assert(RegionBegin == R.Begin && RegionEnd == R.End);
376 : assert(LIS != nullptr);
377 : #ifndef NDEBUG
378 : const auto SchedMaxRP = getSchedulePressure(R, Schedule);
379 : #endif
380 7 : auto BB = R.Begin->getParent();
381 : auto Top = R.Begin;
382 2095 : for (const auto &I : Schedule) {
383 1861 : auto MI = getMachineInstr(I);
384 2088 : if (MI != &*Top) {
385 : BB->remove(MI);
386 : BB->insert(Top, MI);
387 : if (!MI->isDebugInstr())
388 1628 : LIS->handleMove(*MI, true);
389 : }
390 : if (!MI->isDebugInstr()) {
391 : // Reset read - undef flags and update them later.
392 12920 : for (auto &Op : MI->operands())
393 10832 : if (Op.isReg() && Op.isDef())
394 : Op.setIsUndef(false);
395 :
396 2088 : RegisterOperands RegOpers;
397 2088 : RegOpers.collect(*MI, *TRI, MRI, /*ShouldTrackLaneMasks*/true,
398 : /*IgnoreDead*/false);
399 : // Adjust liveness and add missing dead+read-undef flags.
400 2088 : auto SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
401 2088 : RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
402 : }
403 2088 : Top = std::next(MI->getIterator());
404 : }
405 12 : RegionBegin = getMachineInstr(Schedule.front());
406 :
407 : // Schedule consisting of MachineInstr* is considered 'detached'
408 : // and already interleaved with debug values
409 : if (!std::is_same<decltype(*Schedule.begin()), MachineInstr*>::value) {
410 7 : placeDebugValues();
411 : // Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
412 : //assert(R.End == RegionEnd);
413 7 : RegionEnd = R.End;
414 : }
415 :
416 7 : R.Begin = RegionBegin;
417 7 : R.MaxPressure = MaxRP;
418 :
419 : #ifndef NDEBUG
420 : const auto RegionMaxRP = getRegionPressure(R);
421 : const auto &ST = MF.getSubtarget<GCNSubtarget>();
422 : #endif
423 : assert((SchedMaxRP == RegionMaxRP && (MaxRP.empty() || SchedMaxRP == MaxRP))
424 : || (dbgs() << "Max RP mismatch!!!\n"
425 : "RP for schedule (calculated): ",
426 : SchedMaxRP.print(dbgs(), &ST),
427 : dbgs() << "RP for schedule (reported): ",
428 : MaxRP.print(dbgs(), &ST),
429 : dbgs() << "RP after scheduling: ",
430 : RegionMaxRP.print(dbgs(), &ST),
431 : false));
432 7 : }
433 5 :
434 : // Sort recorded regions by pressure - highest at the front
435 : void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
436 : const auto &ST = MF.getSubtarget<GCNSubtarget>();
437 : llvm::sort(Regions, [&ST, TargetOcc](const Region *R1, const Region *R2) {
438 : return R2->MaxPressure.less(ST, R1->MaxPressure, TargetOcc);
439 : });
440 5 : }
441 :
442 1444 : ///////////////////////////////////////////////////////////////////////////////
443 1439 : // Legacy MaxOccupancy Strategy
444 1439 :
445 : // Tries to increase occupancy applying minreg scheduler for a sequence of
446 : // most demanding regions. Obtained schedules are saved as BestSchedule for a
447 : // region.
448 1167 : // TargetOcc is the best achievable occupancy for a kernel.
449 : // Returns better occupancy on success or current occupancy on fail.
450 : // BestSchedules aren't deleted on fail.
451 : unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
452 9048 : // TODO: assert Regions are sorted descending by pressure
453 7609 : const auto &ST = MF.getSubtarget<GCNSubtarget>();
454 : const auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
455 : LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc
456 1439 : << ", current = " << Occ << '\n');
457 1439 :
458 : auto NewOcc = TargetOcc;
459 : for (auto R : Regions) {
460 1439 : if (R->MaxPressure.getOccupancy(ST) >= NewOcc)
461 1439 : break;
462 :
463 1439 : LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3);
464 : printLivenessInfo(dbgs(), R->Begin, R->End, LIS));
465 10 :
466 : BuildDAG DAG(*R, *this);
467 : const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);
468 : const auto MaxRP = getSchedulePressure(*R, MinSchedule);
469 : LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n";
470 5 : printSchedRP(dbgs(), R->MaxPressure, MaxRP));
471 :
472 : NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST));
473 5 : if (NewOcc <= Occ)
474 : break;
475 :
476 5 : setBestSchedule(*R, MinSchedule, MaxRP);
477 5 : }
478 : LLVM_DEBUG(dbgs() << "New occupancy = " << NewOcc
479 : << ", prev occupancy = " << Occ << '\n');
480 : if (NewOcc > Occ) {
481 : SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
482 : MFI->increaseOccupancy(MF, NewOcc);
483 : }
484 :
485 : return std::max(NewOcc, Occ);
486 : }
487 :
488 : void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
489 : bool TryMaximizeOccupancy) {
490 : const auto &ST = MF.getSubtarget<GCNSubtarget>();
491 : SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
492 5 : auto TgtOcc = MFI->getMinAllowedOccupancy();
493 1 :
494 : sortRegionsByPressure(TgtOcc);
495 : auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
496 :
497 : if (TryMaximizeOccupancy && Occ < TgtOcc)
498 : Occ = tryMaximizeOccupancy(TgtOcc);
499 :
500 1 : // This is really weird but for some magic scheduling regions twice
501 : // gives performance improvement
502 423 : const int NumPasses = Occ < TgtOcc ? 2 : 1;
503 422 :
504 422 : TgtOcc = std::min(Occ, TgtOcc);
505 : LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
506 : "target occupancy = "
507 : << TgtOcc << '\n');
508 335 : GCNMaxOccupancySchedStrategy LStrgy(Context);
509 : unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
510 :
511 : for (int I = 0; I < NumPasses; ++I) {
512 2195 : // running first pass with TargetOccupancy = 0 mimics previous scheduling
513 1773 : // approach and is a performance magic
514 : LStrgy.setTargetOccupancy(I == 0 ? 0 : TgtOcc);
515 : for (auto R : Regions) {
516 422 : OverrideLegacyStrategy Ovr(*R, LStrgy, *this);
517 422 :
518 : Ovr.schedule();
519 : const auto RP = getRegionPressure(*R);
520 422 : LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
521 422 :
522 : if (RP.getOccupancy(ST) < TgtOcc) {
523 422 : LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
524 : if (R->BestSchedule.get() &&
525 1 : R->BestSchedule->MaxPressure.getOccupancy(ST) >= TgtOcc) {
526 : LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
527 : scheduleBest(*R);
528 : } else {
529 : LLVM_DEBUG(dbgs() << ", restoring\n");
530 1 : Ovr.restoreOrder();
531 : assert(R->MaxPressure.getOccupancy(ST) >= TgtOcc);
532 : }
533 1 : }
534 : FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(ST));
535 : }
536 1 : }
537 1 : MFI->limitOccupancy(FinalOccupancy);
538 : }
539 :
540 : ///////////////////////////////////////////////////////////////////////////////
541 : // Minimal Register Strategy
542 :
543 : void GCNIterativeScheduler::scheduleMinReg(bool force) {
544 : const auto &ST = MF.getSubtarget<GCNSubtarget>();
545 : const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
546 : const auto TgtOcc = MFI->getOccupancy();
547 : sortRegionsByPressure(TgtOcc);
548 :
549 : auto MaxPressure = Regions.front()->MaxPressure;
550 : for (auto R : Regions) {
551 : if (!force && R->MaxPressure.less(ST, MaxPressure, TgtOcc))
552 1 : break;
553 1 :
554 : BuildDAG DAG(*R, *this);
555 : const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);
556 :
557 : const auto RP = getSchedulePressure(*R, MinSchedule);
558 : LLVM_DEBUG(if (R->MaxPressure.less(ST, RP, TgtOcc)) {
559 : dbgs() << "\nWarning: Pressure becomes worse after minreg!";
560 1 : printSchedRP(dbgs(), R->MaxPressure, RP);
561 : });
562 228 :
563 : if (!force && MaxPressure.less(ST, RP, TgtOcc))
564 227 : break;
565 :
566 : scheduleRegion(*R, MinSchedule, RP);
567 : LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
568 126 :
569 : MaxPressure = RP;
570 : }
571 : }
572 1677 :
573 1450 : ///////////////////////////////////////////////////////////////////////////////
574 : // ILP scheduler port
575 :
576 227 : void GCNIterativeScheduler::scheduleILP(
577 227 : bool TryMaximizeOccupancy) {
578 : const auto &ST = MF.getSubtarget<GCNSubtarget>();
579 : SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
580 227 : auto TgtOcc = MFI->getMinAllowedOccupancy();
581 227 :
582 : sortRegionsByPressure(TgtOcc);
583 227 : auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
584 :
585 1 : if (TryMaximizeOccupancy && Occ < TgtOcc)
586 : Occ = tryMaximizeOccupancy(TgtOcc);
587 :
588 : TgtOcc = std::min(Occ, TgtOcc);
589 : LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
590 1 : "target occupancy = "
591 : << TgtOcc << '\n');
592 :
593 1 : unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
594 : for (auto R : Regions) {
595 : BuildDAG DAG(*R, *this);
596 1 : const auto ILPSchedule = makeGCNILPScheduler(DAG.getBottomRoots(), *this);
597 1 :
598 : const auto RP = getSchedulePressure(*R, ILPSchedule);
599 : LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
600 :
601 : if (RP.getOccupancy(ST) < TgtOcc) {
602 : LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
603 : if (R->BestSchedule.get() &&
604 : R->BestSchedule->MaxPressure.getOccupancy(ST) >= TgtOcc) {
605 : LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
606 : scheduleBest(*R);
607 : }
608 : } else {
609 : scheduleRegion(*R, ILPSchedule, RP);
610 : LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
611 : FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(ST));
612 1 : }
613 : }
614 : MFI->limitOccupancy(FinalOccupancy);
615 8 : }
|