LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - GCNILPSched.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 100 108 92.6 %
Date: 2018-07-13 00:08:38 Functions: 11 11 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===---------------------------- GCNILPSched.cpp - -----------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "llvm/CodeGen/ScheduleDAG.h"
      15             : 
      16             : using namespace llvm;
      17             : 
      18             : #define DEBUG_TYPE "machine-scheduler"
      19             : 
      20             : namespace {
      21             : 
      22          10 : class GCNILPScheduler {
      23             :   struct Candidate : ilist_node<Candidate> {
      24             :     SUnit *SU;
      25             : 
      26             :     Candidate(SUnit *SU_)
      27         906 :       : SU(SU_) {}
      28             :   };
      29             : 
      30             :   SpecificBumpPtrAllocator<Candidate> Alloc;
      31             :   typedef simple_ilist<Candidate> Queue;
      32             :   Queue PendingQueue;
      33             :   Queue AvailQueue;
      34             :   unsigned CurQueueId = 0;
      35             : 
      36             :   std::vector<unsigned> SUNumbers;
      37             : 
      38             :   /// CurCycle - The current scheduler state corresponds to this cycle.
      39             :   unsigned CurCycle = 0;
      40             : 
      41             :   unsigned getNodePriority(const SUnit *SU) const;
      42             : 
      43             :   const SUnit *pickBest(const SUnit *left, const SUnit *right);
      44             :   Candidate* pickCandidate();
      45             : 
      46             :   void releasePending();
      47             :   void advanceToCycle(unsigned NextCycle);
      48             :   void releasePredecessors(const SUnit* SU);
      49             : 
      50             : public:
      51             :   std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
      52             :                                      const ScheduleDAG &DAG);
      53             : };
      54             : } // namespace
      55             : 
      56             : /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
      57             : /// Smaller number is the higher priority.
      58             : static unsigned
      59        1412 : CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
      60        1412 :   unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
      61        1412 :   if (SethiUllmanNumber != 0)
      62             :     return SethiUllmanNumber;
      63             : 
      64             :   unsigned Extra = 0;
      65        9470 :   for (const SDep &Pred : SU->Preds) {
      66        4508 :     if (Pred.isCtrl()) continue;  // ignore chain preds
      67             :     SUnit *PredSU = Pred.getSUnit();
      68         958 :     unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
      69         958 :     if (PredSethiUllman > SethiUllmanNumber) {
      70         476 :       SethiUllmanNumber = PredSethiUllman;
      71             :       Extra = 0;
      72             :     }
      73         482 :     else if (PredSethiUllman == SethiUllmanNumber)
      74         446 :       ++Extra;
      75             :   }
      76             : 
      77         454 :   SethiUllmanNumber += Extra;
      78             : 
      79         454 :   if (SethiUllmanNumber == 0)
      80           4 :     SethiUllmanNumber = 1;
      81             : 
      82         454 :   return SethiUllmanNumber;
      83             : }
      84             : 
      85             : // Lower priority means schedule further down. For bottom-up scheduling, lower
      86             : // priority SUs are scheduled before higher priority SUs.
      87             : unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
      88             :   assert(SU->NodeNum < SUNumbers.size());
      89       19424 :   if (SU->NumSuccs == 0 && SU->NumPreds != 0)
      90             :     // If SU does not have a register use, i.e. it doesn't produce a value
      91             :     // that would be consumed (e.g. store), then it terminates a chain of
      92             :     // computation.  Give it a large SethiUllman number so it will be
      93             :     // scheduled right before its predecessors that it doesn't lengthen
      94             :     // their live ranges.
      95             :     return 0xffff;
      96             : 
      97       19318 :   if (SU->NumPreds == 0 && SU->NumSuccs != 0)
      98             :     // If SU does not have a register def, schedule it close to its uses
      99             :     // because it does not lengthen any live ranges.
     100             :     return 0;
     101             : 
     102       38628 :   return SUNumbers[SU->NodeNum];
     103             : }
     104             : 
     105             : /// closestSucc - Returns the scheduled cycle of the successor which is
     106             : /// closest to the current cycle.
     107       17624 : static unsigned closestSucc(const SUnit *SU) {
     108             :   unsigned MaxHeight = 0;
     109      599560 :   for (const SDep &Succ : SU->Succs) {
     110      290968 :     if (Succ.isCtrl()) continue;  // ignore chain succs
     111             :     unsigned Height = Succ.getSUnit()->getHeight();
     112             :     // If there are bunch of CopyToRegs stacked up, they should be considered
     113             :     // to be at the same position.
     114       22988 :     if (Height > MaxHeight)
     115             :       MaxHeight = Height;
     116             :   }
     117       17624 :   return MaxHeight;
     118             : }
     119             : 
     120             : /// calcMaxScratches - Returns an cost estimate of the worse case requirement
     121             : /// for scratch registers, i.e. number of data dependencies.
     122             : static unsigned calcMaxScratches(const SUnit *SU) {
     123             :   unsigned Scratches = 0;
     124       91028 :   for (const SDep &Pred : SU->Preds) {
     125       36708 :     if (Pred.isCtrl()) continue;  // ignore chain preds
     126       35224 :     Scratches++;
     127             :   }
     128             :   return Scratches;
     129             : }
     130             : 
     131             : // Return -1 if left has higher priority, 1 if right has higher priority.
     132             : // Return 0 if latency-based priority is equivalent.
     133        8806 : static int BUCompareLatency(const SUnit *left, const SUnit *right) {
     134             :   // Scheduling an instruction that uses a VReg whose postincrement has not yet
     135             :   // been scheduled will induce a copy. Model this as an extra cycle of latency.
     136        8806 :   int LHeight = (int)left->getHeight();
     137        8806 :   int RHeight = (int)right->getHeight();
     138             : 
     139             :   // If either node is scheduling for latency, sort them by height/depth
     140             :   // and latency.
     141             : 
     142             :   // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
     143             :   // is enabled, grouping instructions by cycle, then its height is already
     144             :   // covered so only its depth matters. We also reach this point if both stall
     145             :   // but have the same height.
     146        8806 :   if (LHeight != RHeight)
     147           0 :     return LHeight > RHeight ? 1 : -1;
     148             : 
     149        8806 :   int LDepth = left->getDepth();
     150        8806 :   int RDepth = right->getDepth();
     151        8806 :   if (LDepth != RDepth) {
     152             :     LLVM_DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
     153             :                       << ") depth " << LDepth << " vs SU (" << right->NodeNum
     154             :                       << ") depth " << RDepth << "\n");
     155           0 :     return LDepth < RDepth ? 1 : -1;
     156             :   }
     157        8806 :   if (left->Latency != right->Latency)
     158           0 :     return left->Latency > right->Latency ? 1 : -1;
     159             : 
     160             :   return 0;
     161             : }
     162             : 
     163        9788 : const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
     164             : {
     165             :   // TODO: add register pressure lowering checks
     166             : 
     167             :   bool const DisableSchedCriticalPath = false;
     168             :   int MaxReorderWindow = 6;
     169             :   if (!DisableSchedCriticalPath) {
     170       19576 :     int spread = (int)left->getDepth() - (int)right->getDepth();
     171        9788 :     if (std::abs(spread) > MaxReorderWindow) {
     172             :       LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
     173             :                         << left->getDepth() << " != SU(" << right->NodeNum
     174             :                         << "): " << right->getDepth() << "\n");
     175          76 :       return left->getDepth() < right->getDepth() ? right : left;
     176             :     }
     177             :   }
     178             : 
     179             :   bool const DisableSchedHeight = false;
     180        9712 :   if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
     181        1676 :     int spread = (int)left->getHeight() - (int)right->getHeight();
     182         838 :     if (std::abs(spread) > MaxReorderWindow)
     183           0 :       return left->getHeight() > right->getHeight() ? right : left;
     184             :   }
     185             : 
     186             :   // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
     187        9712 :   unsigned LPriority = getNodePriority(left);
     188        9712 :   unsigned RPriority = getNodePriority(right);
     189             : 
     190        9712 :   if (LPriority != RPriority)
     191         900 :     return LPriority > RPriority ? right : left;
     192             : 
     193             :   // Try schedule def + use closer when Sethi-Ullman numbers are the same.
     194             :   // e.g.
     195             :   // t1 = op t2, c1
     196             :   // t3 = op t4, c2
     197             :   //
     198             :   // and the following instructions are both ready.
     199             :   // t2 = op c3
     200             :   // t4 = op c4
     201             :   //
     202             :   // Then schedule t2 = op first.
     203             :   // i.e.
     204             :   // t4 = op c4
     205             :   // t2 = op c3
     206             :   // t1 = op t2, c1
     207             :   // t3 = op t4, c2
     208             :   //
     209             :   // This creates more short live intervals.
     210        8812 :   unsigned LDist = closestSucc(left);
     211        8812 :   unsigned RDist = closestSucc(right);
     212        8812 :   if (LDist != RDist)
     213           6 :     return LDist < RDist ? right : left;
     214             : 
     215             :   // How many registers becomes live when the node is scheduled.
     216             :   unsigned LScratch = calcMaxScratches(left);
     217             :   unsigned RScratch = calcMaxScratches(right);
     218        8806 :   if (LScratch != RScratch)
     219           0 :     return LScratch > RScratch ? right : left;
     220             : 
     221             :   bool const DisableSchedCycles = false;
     222             :   if (!DisableSchedCycles) {
     223        8806 :     int result = BUCompareLatency(left, right);
     224        8806 :     if (result != 0)
     225           0 :       return result > 0 ? right : left;
     226             :     return left;
     227             :   }
     228             :   else {
     229             :     if (left->getHeight() != right->getHeight())
     230             :       return (left->getHeight() > right->getHeight()) ? right : left;
     231             : 
     232             :     if (left->getDepth() != right->getDepth())
     233             :       return (left->getDepth() < right->getDepth()) ? right : left;
     234             :   }
     235             : 
     236             :   assert(left->NodeQueueId && right->NodeQueueId &&
     237             :         "NodeQueueId cannot be zero");
     238             :   return (left->NodeQueueId > right->NodeQueueId) ? right : left;
     239             : }
     240             : 
     241         454 : GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
     242         454 :   if (AvailQueue.empty())
     243             :     return nullptr;
     244             :   auto Best = AvailQueue.begin();
     245       10242 :   for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
     246        9788 :     auto NewBestSU = pickBest(Best->SU, I->SU);
     247        9788 :     if (NewBestSU != Best->SU) {
     248             :       assert(NewBestSU == I->SU);
     249             :       Best = I;
     250             :     }
     251             :   }
     252             :   return &*Best;
     253             : }
     254             : 
     255          66 : void GCNILPScheduler::releasePending() {
     256             :   // Check to see if any of the pending instructions are ready to issue.  If
     257             :   // so, add them to the available queue.
     258         518 :   for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
     259             :     auto &C = *I++;
     260         904 :     if (C.SU->getHeight() <= CurCycle) {
     261             :       PendingQueue.remove(C);
     262             :       AvailQueue.push_back(C);
     263         452 :       C.SU->NodeQueueId = CurQueueId++;
     264             :     }
     265             :   }
     266          66 : }
     267             : 
     268             : /// Move the scheduler state forward by the specified number of Cycles.
     269             : void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
     270         520 :   if (NextCycle <= CurCycle)
     271             :     return;
     272          66 :   CurCycle = NextCycle;
     273          66 :   releasePending();
     274             : }
     275             : 
     276         456 : void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
     277        9472 :   for (const auto &PredEdge : SU->Preds) {
     278             :     auto PredSU = PredEdge.getSUnit();
     279           0 :     if (PredEdge.isWeak())
     280           0 :       continue;
     281             :     assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);
     282             : 
     283        4508 :     PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());
     284             : 
     285        4508 :     if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
     286             :       PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
     287             :   }
     288         456 : }
     289             : 
     290             : std::vector<const SUnit*>
     291           2 : GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
     292             :                           const ScheduleDAG &DAG) {
     293             :   auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;
     294             : 
     295           2 :   std::vector<SUnit> SUSavedCopy;
     296           4 :   SUSavedCopy.resize(SUnits.size());
     297             : 
     298             :   // we cannot save only those fields we touch: some of them are private
     299             :   // so save units verbatim: this assumes SUnit should have value semantics
     300         456 :   for (const SUnit &SU : SUnits)
     301         908 :     SUSavedCopy[SU.NodeNum] = SU;
     302             : 
     303           4 :   SUNumbers.assign(SUnits.size(), 0);
     304         456 :   for (const SUnit &SU : SUnits)
     305         454 :     CalcNodeSethiUllmanNumber(&SU, SUNumbers);
     306             : 
     307           6 :   for (auto SU : BotRoots) {
     308             :     AvailQueue.push_back(
     309             :       *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));
     310             :   }
     311           2 :   releasePredecessors(&DAG.ExitSU);
     312             : 
     313             :   std::vector<const SUnit*> Schedule;
     314           4 :   Schedule.reserve(SUnits.size());
     315             :   while (true) {
     316         524 :     if (AvailQueue.empty() && !PendingQueue.empty()) {
     317             :       auto EarliestSU = std::min_element(
     318             :         PendingQueue.begin(), PendingQueue.end(),
     319         386 :         [=](const Candidate& C1, const Candidate& C2) {
     320         772 :         return C1.SU->getHeight() < C2.SU->getHeight();
     321         452 :       })->SU;
     322         198 :       advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
     323             :     }
     324         456 :     if (AvailQueue.empty())
     325             :       break;
     326             : 
     327             :     LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
     328             :                          "Ready queue:";
     329             :                for (auto &C
     330             :                     : AvailQueue) dbgs()
     331             :                << ' ' << C.SU->NodeNum;
     332             :                dbgs() << '\n';);
     333             : 
     334         454 :     auto C = pickCandidate();
     335             :     assert(C);
     336             :     AvailQueue.remove(*C);
     337         454 :     auto SU = C->SU;
     338             :     LLVM_DEBUG(dbgs() << "Selected "; SU->dump(&DAG));
     339             : 
     340             :     advanceToCycle(SU->getHeight());
     341             : 
     342         454 :     releasePredecessors(SU);
     343         908 :     Schedule.push_back(SU);
     344         454 :     SU->isScheduled = true;
     345         454 :   }
     346             :   assert(SUnits.size() == Schedule.size());
     347             : 
     348             :   std::reverse(Schedule.begin(), Schedule.end());
     349             : 
     350             :   // restore units
     351         456 :   for (auto &SU : SUnits)
     352         908 :     SU = SUSavedCopy[SU.NodeNum];
     353             : 
     354           2 :   return Schedule;
     355             : }
     356             : 
     357             : namespace llvm {
     358           2 : std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
     359             :                                               const ScheduleDAG &DAG) {
     360           2 :   GCNILPScheduler S;
     361           4 :   return S.schedule(BotRoots, DAG);
     362             : }
     363             : }

Generated by: LCOV version 1.13