LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - GCNIterativeScheduler.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 173 186 93.0 %
Date: 2017-09-14 15:23:50 Functions: 24 30 80.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- GCNIterativeScheduler.cpp ------------------------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : 
      10             : #include "GCNIterativeScheduler.h"
      11             : #include "AMDGPUSubtarget.h"
      12             : #include "GCNRegPressure.h"
      13             : #include "GCNSchedStrategy.h"
      14             : #include "llvm/ADT/ArrayRef.h"
      15             : #include "llvm/ADT/STLExtras.h"
      16             : #include "llvm/ADT/SmallVector.h"
      17             : #include "llvm/CodeGen/LiveIntervalAnalysis.h"
      18             : #include "llvm/CodeGen/MachineBasicBlock.h"
      19             : #include "llvm/CodeGen/MachineFunction.h"
      20             : #include "llvm/CodeGen/RegisterPressure.h"
      21             : #include "llvm/CodeGen/ScheduleDAG.h"
      22             : #include "llvm/Support/Compiler.h"
      23             : #include "llvm/Support/Debug.h"
      24             : #include "llvm/Support/raw_ostream.h"
      25             : #include <algorithm>
      26             : #include <cassert>
      27             : #include <iterator>
      28             : #include <limits>
      29             : #include <memory>
      30             : #include <type_traits>
      31             : #include <vector>
      32             : 
      33             : using namespace llvm;
      34             : 
      35             : #define DEBUG_TYPE "machine-scheduler"
      36             : 
      37             : namespace llvm {
      38             : 
      39             : std::vector<const SUnit *> makeMinRegSchedule(ArrayRef<const SUnit *> TopRoots,
      40             :                                               const ScheduleDAG &DAG);
      41             : 
      42             : } // end namespace llvm
      43             : 
      44             : // shim accessors for different order containers
      45             : static inline MachineInstr *getMachineInstr(MachineInstr *MI) {
      46             :   return MI;
      47             : }
      48             : static inline MachineInstr *getMachineInstr(const SUnit *SU) {
      49        3090 :   return SU->getInstr();
      50             : }
      51             : static inline MachineInstr *getMachineInstr(const SUnit &SU) {
      52             :   return SU.getInstr();
      53             : }
      54             : 
      55             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
      56             : LLVM_DUMP_METHOD
      57             : static void printRegion(raw_ostream &OS,
      58             :                         MachineBasicBlock::iterator Begin,
      59             :                         MachineBasicBlock::iterator End,
      60             :                         const LiveIntervals *LIS,
      61             :                         unsigned MaxInstNum =
      62             :                           std::numeric_limits<unsigned>::max()) {
      63             :   auto BB = Begin->getParent();
      64             :   OS << BB->getParent()->getName() << ":BB#" << BB->getNumber()
      65             :      << ' ' << BB->getName() << ":\n";
      66             :   auto I = Begin;
      67             :   MaxInstNum = std::max(MaxInstNum, 1u);
      68             :   for (; I != End && MaxInstNum; ++I, --MaxInstNum) {
      69             :     if (!I->isDebugValue() && LIS)
      70             :       OS << LIS->getInstructionIndex(*I);
      71             :     OS << '\t' << *I;
      72             :   }
      73             :   if (I != End) {
      74             :     OS << "\t...\n";
      75             :     I = std::prev(End);
      76             :     if (!I->isDebugValue() && LIS)
      77             :       OS << LIS->getInstructionIndex(*I);
      78             :     OS << '\t' << *I;
      79             :   }
      80             :   if (End != BB->end()) { // print boundary inst if present
      81             :     OS << "----\n";
      82             :     if (LIS) OS << LIS->getInstructionIndex(*End) << '\t';
      83             :     OS << *End;
      84             :   }
      85             : }
      86             : 
      87             : LLVM_DUMP_METHOD
      88             : static void printLivenessInfo(raw_ostream &OS,
      89             :                               MachineBasicBlock::iterator Begin,
      90             :                               MachineBasicBlock::iterator End,
      91             :                               const LiveIntervals *LIS) {
      92             :   const auto BB = Begin->getParent();
      93             :   const auto &MRI = BB->getParent()->getRegInfo();
      94             : 
      95             :   const auto LiveIns = getLiveRegsBefore(*Begin, *LIS);
      96             :   OS << "LIn RP: ";
      97             :   getRegPressure(MRI, LiveIns).print(OS);
      98             : 
      99             :   const auto BottomMI = End == BB->end() ? std::prev(End) : End;
     100             :   const auto LiveOuts = getLiveRegsAfter(*BottomMI, *LIS);
     101             :   OS << "LOt RP: ";
     102             :   getRegPressure(MRI, LiveOuts).print(OS);
     103             : }
     104             : 
     105             : LLVM_DUMP_METHOD
     106             : void GCNIterativeScheduler::printRegions(raw_ostream &OS) const {
     107             :   const auto &ST = MF.getSubtarget<SISubtarget>();
     108             :   for (const auto R : Regions) {
     109             :     OS << "Region to schedule ";
     110             :     printRegion(OS, R->Begin, R->End, LIS, 1);
     111             :     printLivenessInfo(OS, R->Begin, R->End, LIS);
     112             :     OS << "Max RP: ";
     113             :     R->MaxPressure.print(OS, &ST);
     114             :   }
     115             : }
     116             : 
     117             : LLVM_DUMP_METHOD
     118             : void GCNIterativeScheduler::printSchedResult(raw_ostream &OS,
     119             :                                              const Region *R,
     120             :                                              const GCNRegPressure &RP) const {
     121             :   OS << "\nAfter scheduling ";
     122             :   printRegion(OS, R->Begin, R->End, LIS);
     123             :   printSchedRP(OS, R->MaxPressure, RP);
     124             :   OS << '\n';
     125             : }
     126             : 
     127             : LLVM_DUMP_METHOD
     128             : void GCNIterativeScheduler::printSchedRP(raw_ostream &OS,
     129             :                                          const GCNRegPressure &Before,
     130             :                                          const GCNRegPressure &After) const {
     131             :   const auto &ST = MF.getSubtarget<SISubtarget>();
     132             :   OS << "RP before: ";
     133             :   Before.print(OS, &ST);
     134             :   OS << "RP after:  ";
     135             :   After.print(OS, &ST);
     136             : }
     137             : #endif
     138             : 
     139             : // DAG builder helper
     140             : class GCNIterativeScheduler::BuildDAG {
     141             :   GCNIterativeScheduler &Sch;
     142             :   SmallVector<SUnit *, 8> TopRoots;
     143             : 
     144             : public:
     145           6 :   BuildDAG(const Region &R, GCNIterativeScheduler &_Sch)
     146          12 :     : Sch(_Sch) {
     147          12 :     auto BB = R.Begin->getParent();
     148           6 :     Sch.BaseClass::startBlock(BB);
     149           6 :     Sch.BaseClass::enterRegion(BB, R.Begin, R.End, R.NumRegionInstrs);
     150             : 
     151           6 :     Sch.buildSchedGraph(Sch.AA, nullptr, nullptr, nullptr,
     152             :                         /*TrackLaneMask*/true);
     153           6 :     Sch.Topo.InitDAGTopologicalSorting();
     154             : 
     155          12 :     SmallVector<SUnit *, 8> BotRoots;
     156           6 :     Sch.findRootsAndBiasEdges(TopRoots, BotRoots);
     157           6 :   }
     158             : 
     159          18 :   ~BuildDAG() {
     160           6 :     Sch.BaseClass::exitRegion();
     161           6 :     Sch.BaseClass::finishBlock();
     162           6 :   }
     163             : 
     164             :   ArrayRef<const SUnit *> getTopRoots() const {
     165          12 :     return TopRoots;
     166             :   }
     167             : };
     168             : 
     169             : class GCNIterativeScheduler::OverrideLegacyStrategy {
     170             :   GCNIterativeScheduler &Sch;
     171             :   Region &Rgn;
     172             :   std::unique_ptr<MachineSchedStrategy> SaveSchedImpl;
     173             :   GCNRegPressure SaveMaxRP;
     174             : 
     175             : public:
     176           5 :   OverrideLegacyStrategy(Region &R,
     177             :                          MachineSchedStrategy &OverrideStrategy,
     178             :                          GCNIterativeScheduler &_Sch)
     179           5 :     : Sch(_Sch)
     180             :     , Rgn(R)
     181           5 :     , SaveSchedImpl(std::move(_Sch.SchedImpl))
     182          10 :     , SaveMaxRP(R.MaxPressure) {
     183          10 :     Sch.SchedImpl.reset(&OverrideStrategy);
     184          10 :     auto BB = R.Begin->getParent();
     185           5 :     Sch.BaseClass::startBlock(BB);
     186           5 :     Sch.BaseClass::enterRegion(BB, R.Begin, R.End, R.NumRegionInstrs);
     187           5 :   }
     188             : 
     189          15 :   ~OverrideLegacyStrategy() {
     190           5 :     Sch.BaseClass::exitRegion();
     191           5 :     Sch.BaseClass::finishBlock();
     192          10 :     Sch.SchedImpl.release();
     193          10 :     Sch.SchedImpl = std::move(SaveSchedImpl);
     194           5 :   }
     195             : 
     196           5 :   void schedule() {
     197             :     assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End);
     198             :     DEBUG(dbgs() << "\nScheduling ";
     199             :       printRegion(dbgs(), Rgn.Begin, Rgn.End, Sch.LIS, 2));
     200           5 :     Sch.BaseClass::schedule();
     201             : 
     202             :     // Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
     203           5 :     Sch.RegionEnd = Rgn.End;
     204             :     //assert(Rgn.End == Sch.RegionEnd);
     205           5 :     Rgn.Begin = Sch.RegionBegin;
     206          10 :     Rgn.MaxPressure.clear();
     207           5 :   }
     208             : 
     209             :   void restoreOrder() {
     210             :     assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End);
     211             :     // DAG SUnits are stored using original region's order
     212             :     // so just use SUnits as the restoring schedule
     213           1 :     Sch.scheduleRegion(Rgn, Sch.SUnits, SaveMaxRP);
     214             :   }
     215             : };
     216             : 
     217             : namespace {
     218             : 
     219             : // just a stub to make base class happy
     220          12 : class SchedStrategyStub : public MachineSchedStrategy {
     221             : public:
     222          12 :   bool shouldTrackPressure() const override { return false; }
     223          12 :   bool shouldTrackLaneMasks() const override { return false; }
     224           0 :   void initialize(ScheduleDAGMI *DAG) override {}
     225           0 :   SUnit *pickNode(bool &IsTopNode) override { return nullptr; }
     226           0 :   void schedNode(SUnit *SU, bool IsTopNode) override {}
     227           0 :   void releaseTopNode(SUnit *SU) override {}
     228           0 :   void releaseBottomNode(SUnit *SU) override {}
     229             : };
     230             : 
     231             : } // end anonymous namespace
     232             : 
     233           6 : GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C,
     234           6 :                                              StrategyKind S)
     235          12 :   : BaseClass(C, llvm::make_unique<SchedStrategyStub>())
     236             :   , Context(C)
     237             :   , Strategy(S)
     238          42 :   , UPTracker(*LIS) {
     239           6 : }
     240             : 
     241             : // returns max pressure for a region
     242             : GCNRegPressure
     243          11 : GCNIterativeScheduler::getRegionPressure(MachineBasicBlock::iterator Begin,
     244             :                                          MachineBasicBlock::iterator End)
     245             :   const {
     246             :   // For the purpose of pressure tracking bottom inst of the region should
     247             :   // be also processed. End is either BB end, BB terminator inst or sched
     248             :   // boundary inst.
     249          22 :   auto const BBEnd = Begin->getParent()->end();
     250          11 :   auto const BottomMI = End == BBEnd ? std::prev(End) : End;
     251             : 
     252             :   // scheduleRegions walks bottom to top, so its likely we just get next
     253             :   // instruction to track
     254          11 :   auto AfterBottomMI = std::next(BottomMI);
     255          11 :   if (AfterBottomMI == BBEnd ||
     256           0 :       &*AfterBottomMI != UPTracker.getLastTrackedMI()) {
     257          22 :     UPTracker.reset(*BottomMI);
     258             :   } else {
     259             :     assert(UPTracker.isValid());
     260             :   }
     261             : 
     262        3802 :   for (auto I = BottomMI; I != Begin; --I)
     263        7560 :     UPTracker.recede(*I);
     264             : 
     265          22 :   UPTracker.recede(*Begin);
     266             : 
     267             :   assert(UPTracker.isValid() ||
     268             :          (dbgs() << "Tracked region ",
     269             :           printRegion(dbgs(), Begin, End, LIS), false));
     270          22 :   return UPTracker.moveMaxPressure();
     271             : }
     272             : 
     273             : // returns max pressure for a tentative schedule
     274             : template <typename Range> GCNRegPressure
     275           6 : GCNIterativeScheduler::getSchedulePressure(const Region &R,
     276             :                                            Range &&Schedule) const {
     277          18 :   auto const BBEnd = R.Begin->getParent()->end();
     278          18 :   GCNUpwardRPTracker RPTracker(*LIS);
     279          12 :   if (R.End != BBEnd) {
     280             :     // R.End points to the boundary instruction but the
     281             :     // schedule doesn't include it
     282          12 :     RPTracker.reset(*R.End);
     283          12 :     RPTracker.recede(*R.End);
     284             :   } else {
     285             :     // R.End doesn't point to the boundary instruction
     286           0 :     RPTracker.reset(*std::prev(BBEnd));
     287             :   }
     288        4140 :   for (auto I = Schedule.end(), B = Schedule.begin(); I != B;) {
     289        4116 :     RPTracker.recede(*getMachineInstr(*--I));
     290             :   }
     291          12 :   return RPTracker.moveMaxPressure();
     292             : }
     293             : 
     294           6 : void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overriden
     295             :                                         MachineBasicBlock::iterator Begin,
     296             :                                         MachineBasicBlock::iterator End,
     297             :                                         unsigned NumRegionInstrs) {
     298           6 :   BaseClass::enterRegion(BB, Begin, End, NumRegionInstrs);
     299           6 :   if (NumRegionInstrs > 2) {
     300          12 :     Regions.push_back(
     301          24 :       new (Alloc.Allocate())
     302             :       Region { Begin, End, NumRegionInstrs,
     303           6 :                getRegionPressure(Begin, End), nullptr });
     304             :   }
     305           6 : }
     306             : 
     307           6 : void GCNIterativeScheduler::schedule() { // overriden
     308             :   // do nothing
     309             :   DEBUG(
     310             :     printLivenessInfo(dbgs(), RegionBegin, RegionEnd, LIS);
     311             :     if (!Regions.empty() && Regions.back()->Begin == RegionBegin) {
     312             :       dbgs() << "Max RP: ";
     313             :       Regions.back()->MaxPressure.print(dbgs(), &MF.getSubtarget<SISubtarget>());
     314             :     }
     315             :     dbgs() << '\n';
     316             :   );
     317           6 : }
     318             : 
     319           6 : void GCNIterativeScheduler::finalizeSchedule() { // overriden
     320          12 :   if (Regions.empty())
     321             :     return;
     322           6 :   switch (Strategy) {
     323           0 :   case SCHEDULE_MINREGONLY: scheduleMinReg(); break;
     324           3 :   case SCHEDULE_MINREGFORCED: scheduleMinReg(true); break;
     325           3 :   case SCHEDULE_LEGACYMAXOCCUPANCY: scheduleLegacyMaxOccupancy(); break;
     326             :   }
     327             : }
     328             : 
     329             : // Detach schedule from SUnits and interleave it with debug values.
     330             : // Returned schedule becomes independent of DAG state.
     331             : std::vector<MachineInstr*>
     332           2 : GCNIterativeScheduler::detachSchedule(ScheduleRef Schedule) const {
     333           2 :   std::vector<MachineInstr*> Res;
     334           2 :   Res.reserve(Schedule.size() * 2);
     335             : 
     336           2 :   if (FirstDbgValue)
     337           0 :     Res.push_back(FirstDbgValue);
     338             : 
     339           6 :   const auto DbgB = DbgValues.begin(), DbgE = DbgValues.end();
     340         791 :   for (auto SU : Schedule) {
     341        1574 :     Res.push_back(SU->getInstr());
     342             :     const auto &D = std::find_if(DbgB, DbgE, [SU](decltype(*DbgB) &P) {
     343           0 :       return P.second == SU->getInstr();
     344         787 :     });
     345         787 :     if (D != DbgE)
     346           0 :       Res.push_back(D->first);
     347             :   }
     348           2 :   return Res;
     349             : }
     350             : 
     351           2 : void GCNIterativeScheduler::setBestSchedule(Region &R,
     352             :                                             ScheduleRef Schedule,
     353             :                                             const GCNRegPressure &MaxRP) {
     354           6 :   R.BestSchedule.reset(
     355           2 :     new TentativeSchedule{ detachSchedule(Schedule), MaxRP });
     356           2 : }
     357             : 
     358           2 : void GCNIterativeScheduler::scheduleBest(Region &R) {
     359             :   assert(R.BestSchedule.get() && "No schedule specified");
     360           6 :   scheduleRegion(R, R.BestSchedule->Schedule, R.BestSchedule->MaxPressure);
     361           4 :   R.BestSchedule.reset();
     362           2 : }
     363             : 
     364             : // minimal required region scheduler, works for ranges of SUnits*,
     365             : // SUnits or MachineIntrs*
     366             : template <typename Range>
     367           6 : void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,
     368             :                                            const GCNRegPressure &MaxRP) {
     369             :   assert(RegionBegin == R.Begin && RegionEnd == R.End);
     370             :   assert(LIS != nullptr);
     371             : #ifndef NDEBUG
     372             :   const auto SchedMaxRP = getSchedulePressure(R, Schedule);
     373             : #endif
     374          12 :   auto BB = R.Begin->getParent();
     375           6 :   auto Top = R.Begin;
     376        2082 :   for (const auto &I : Schedule) {
     377        3087 :     auto MI = getMachineInstr(I);
     378        2058 :     if (MI != &*Top) {
     379        1535 :       BB->remove(MI);
     380        1535 :       BB->insert(Top, MI);
     381        1535 :       if (!MI->isDebugValue())
     382        1535 :         LIS->handleMove(*MI, true);
     383             :     }
     384        2058 :     if (!MI->isDebugValue()) {
     385             :       // Reset read - undef flags and update them later.
     386       11652 :       for (auto &Op : MI->operands())
     387       16416 :         if (Op.isReg() && Op.isDef())
     388             :           Op.setIsUndef(false);
     389             : 
     390        4116 :       RegisterOperands RegOpers;
     391        2058 :       RegOpers.collect(*MI, *TRI, MRI, /*ShouldTrackLaneMasks*/true,
     392             :                                        /*IgnoreDead*/false);
     393             :       // Adjust liveness and add missing dead+read-undef flags.
     394        6174 :       auto SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
     395        2058 :       RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
     396             :     }
     397        6174 :     Top = std::next(MI->getIterator());
     398             :   }
     399          15 :   RegionBegin = getMachineInstr(Schedule.front());
     400             : 
     401             :   // Schedule consisting of MachineInstr* is considered 'detached'
     402             :   // and already interleaved with debug values
     403             :   if (!std::is_same<decltype(*Schedule.begin()), MachineInstr*>::value) {
     404           6 :     placeDebugValues();
     405             :     // Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
     406             :     //assert(R.End == RegionEnd);
     407           6 :     RegionEnd = R.End;
     408             :   }
     409             : 
     410           6 :   R.Begin = RegionBegin;
     411           6 :   R.MaxPressure = MaxRP;
     412             : 
     413             : #ifndef NDEBUG
     414             :   const auto RegionMaxRP = getRegionPressure(R);
     415             :   const auto &ST = MF.getSubtarget<SISubtarget>();
     416             : #endif
     417             :   assert((SchedMaxRP == RegionMaxRP && (MaxRP.empty() || SchedMaxRP == MaxRP))
     418             :   || (dbgs() << "Max RP mismatch!!!\n"
     419             :                 "RP for schedule (calculated): ",
     420             :       SchedMaxRP.print(dbgs(), &ST),
     421             :       dbgs() << "RP for schedule (reported): ",
     422             :       MaxRP.print(dbgs(), &ST),
     423             :       dbgs() << "RP after scheduling: ",
     424             :       RegionMaxRP.print(dbgs(), &ST),
     425             :       false));
     426           6 : }
     427             : 
     428             : // Sort recorded regions by pressure - highest at the front
     429           6 : void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
     430           6 :   const auto &ST = MF.getSubtarget<SISubtarget>();
     431          18 :   std::sort(Regions.begin(), Regions.end(),
     432             :     [&ST, TargetOcc](const Region *R1, const Region *R2) {
     433           0 :     return R2->MaxPressure.less(ST, R1->MaxPressure, TargetOcc);
     434           0 :   });
     435           6 : }
     436             : 
     437             : ///////////////////////////////////////////////////////////////////////////////
     438             : // Legacy MaxOccupancy Strategy
     439             : 
     440             : // Tries to increase occupancy applying minreg scheduler for a sequence of
     441             : // most demanding regions. Obtained schedules are saved as BestSchedule for a
     442             : // region.
     443             : // TargetOcc is the best achievable occupancy for a kernel.
     444             : // Returns better occupancy on success or current occupancy on fail.
     445             : // BestSchedules aren't deleted on fail.
     446           3 : unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
     447             :   // TODO: assert Regions are sorted descending by pressure
     448           3 :   const auto &ST = MF.getSubtarget<SISubtarget>();
     449           6 :   const auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
     450             :   DEBUG(dbgs() << "Trying to to improve occupancy, target = " << TargetOcc
     451             :                << ", current = " << Occ << '\n');
     452             : 
     453           3 :   auto NewOcc = TargetOcc;
     454          14 :   for (auto R : Regions) {
     455           3 :     if (R->MaxPressure.getOccupancy(ST) >= NewOcc)
     456             :       break;
     457             : 
     458             :     DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3);
     459             :           printLivenessInfo(dbgs(), R->Begin, R->End, LIS));
     460             : 
     461           5 :     BuildDAG DAG(*R, *this);
     462           8 :     const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);
     463           3 :     const auto MaxRP = getSchedulePressure(*R, MinSchedule);
     464             :     DEBUG(dbgs() << "Occupancy improvement attempt:\n";
     465             :           printSchedRP(dbgs(), R->MaxPressure, MaxRP));
     466             : 
     467           6 :     NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST));
     468           3 :     if (NewOcc <= Occ)
     469             :       break;
     470             : 
     471           2 :     setBestSchedule(*R, MinSchedule, MaxRP);
     472             :   }
     473             :   DEBUG(dbgs() << "New occupancy = " << NewOcc
     474             :                << ", prev occupancy = " << Occ << '\n');
     475           3 :   return std::max(NewOcc, Occ);
     476             : }
     477             : 
     478           3 : void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
     479             :   bool TryMaximizeOccupancy) {
     480           3 :   const auto &ST = MF.getSubtarget<SISubtarget>();
     481           3 :   auto TgtOcc = ST.getOccupancyWithLocalMemSize(MF);
     482             : 
     483           3 :   sortRegionsByPressure(TgtOcc);
     484           6 :   auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
     485             : 
     486           3 :   if (TryMaximizeOccupancy && Occ < TgtOcc)
     487           3 :     Occ = tryMaximizeOccupancy(TgtOcc);
     488             : 
     489             :   // This is really weird but for some magic scheduling regions twice
     490             :   // gives performance improvement
     491           3 :   const int NumPasses = Occ < TgtOcc ? 2 : 1;
     492             : 
     493           3 :   TgtOcc = std::min(Occ, TgtOcc);
     494             :   DEBUG(dbgs() << "Scheduling using default scheduler, "
     495             :                   "target occupancy = " << TgtOcc << '\n');
     496           6 :   GCNMaxOccupancySchedStrategy LStrgy(Context);
     497             : 
     498           8 :   for (int I = 0; I < NumPasses; ++I) {
     499             :     // running first pass with TargetOccupancy = 0 mimics previous scheduling
     500             :     // approach and is a performance magic
     501          10 :     LStrgy.setTargetOccupancy(I == 0 ? 0 : TgtOcc);
     502          25 :     for (auto R : Regions) {
     503          10 :       OverrideLegacyStrategy Ovr(*R, LStrgy, *this);
     504             : 
     505           5 :       Ovr.schedule();
     506          10 :       const auto RP = getRegionPressure(*R);
     507             :       DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
     508             : 
     509           5 :       if (RP.getOccupancy(ST) < TgtOcc) {
     510             :         DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
     511           8 :         if (R->BestSchedule.get() &&
     512           4 :             R->BestSchedule->MaxPressure.getOccupancy(ST) >= TgtOcc) {
     513             :           DEBUG(dbgs() << ", scheduling minimal register\n");
     514           2 :           scheduleBest(*R);
     515             :         } else {
     516             :           DEBUG(dbgs() << ", restoring\n");
     517             :           Ovr.restoreOrder();
     518             :           assert(R->MaxPressure.getOccupancy(ST) >= TgtOcc);
     519             :         }
     520             :       }
     521             :     }
     522             :   }
     523           3 : }
     524             : 
     525             : ///////////////////////////////////////////////////////////////////////////////
     526             : // Minimal Register Strategy
     527             : 
     528           3 : void GCNIterativeScheduler::scheduleMinReg(bool force) {
     529           3 :   const auto &ST = MF.getSubtarget<SISubtarget>();
     530           3 :   const auto TgtOcc = ST.getOccupancyWithLocalMemSize(MF);
     531           3 :   sortRegionsByPressure(TgtOcc);
     532             : 
     533           6 :   auto MaxPressure = Regions.front()->MaxPressure;
     534          15 :   for (auto R : Regions) {
     535           3 :     if (!force && R->MaxPressure.less(ST, MaxPressure, TgtOcc))
     536             :       break;
     537             : 
     538           6 :     BuildDAG DAG(*R, *this);
     539           9 :     const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);
     540             : 
     541           3 :     const auto RP = getSchedulePressure(*R, MinSchedule);
     542             :     DEBUG(if (R->MaxPressure.less(ST, RP, TgtOcc)) {
     543             :       dbgs() << "\nWarning: Pressure becomes worse after minreg!";
     544             :       printSchedRP(dbgs(), R->MaxPressure, RP);
     545             :     });
     546             : 
     547           3 :     if (!force && MaxPressure.less(ST, RP, TgtOcc))
     548             :       break;
     549             : 
     550           3 :     scheduleRegion(*R, MinSchedule, RP);
     551             :     DEBUG(printSchedResult(dbgs(), R, RP));
     552             : 
     553           3 :     MaxPressure = RP;
     554             :   }
     555           3 : }

Generated by: LCOV version 1.13