doxygen/GCNILPSched_8cpp_source.html

//===---------------------------- GCNILPSched.cpp - -----------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

//

//===----------------------------------------------------------------------===//


#include "llvm/CodeGen/ScheduleDAG.h"


using namespace llvm;


#define DEBUG_TYPE "machine-scheduler"


namespace {


class GCNILPScheduler {

  struct Candidate : ilist_node<Candidate> {

    SUnit *SU;


    Candidate(SUnit *SU_)

      : SU(SU_) {}

  };


  SpecificBumpPtrAllocator<Candidate> Alloc;

  typedef simple_ilist<Candidate> Queue;

  Queue PendingQueue;

  Queue AvailQueue;

  unsigned CurQueueId = 0;


  std::vector<unsigned> SUNumbers;


  /// CurCycle - The current scheduler state corresponds to this cycle.

  unsigned CurCycle = 0;


  unsigned getNodePriority(const SUnit *SU) const;


  const SUnit *pickBest(const SUnit *left, const SUnit *right);

  Candidate* pickCandidate();


  void releasePending();

  void advanceToCycle(unsigned NextCycle);

  void releasePredecessors(const SUnit* SU);


public:

  std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,

                                     const ScheduleDAG &DAG);

};

} // namespace


/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.

/// Smaller number is the higher priority.

static unsigned

CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {

  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];

  if (SethiUllmanNumber != 0)

    return SethiUllmanNumber;


  unsigned Extra = 0;

  for (const SDep &Pred : SU->Preds) {

    if (Pred.isCtrl()) continue;  // ignore chain preds

    SUnit *PredSU = Pred.getSUnit();

    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);

    if (PredSethiUllman > SethiUllmanNumber) {

      SethiUllmanNumber = PredSethiUllman;

      Extra = 0;

    }

    else if (PredSethiUllman == SethiUllmanNumber)

      ++Extra;

  }


  SethiUllmanNumber += Extra;


  if (SethiUllmanNumber == 0)

    SethiUllmanNumber = 1;


  return SethiUllmanNumber;

}


// Lower priority means schedule further down. For bottom-up scheduling, lower

// priority SUs are scheduled before higher priority SUs.

unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {

  assert(SU->NodeNum < SUNumbers.size());

  if (SU->NumSuccs == 0 && SU->NumPreds != 0)

    // If SU does not have a register use, i.e. it doesn't produce a value

    // that would be consumed (e.g. store), then it terminates a chain of

    // computation.  Give it a large SethiUllman number so it will be

    // scheduled right before its predecessors that it doesn't lengthen

    // their live ranges.

    return 0xffff;


  if (SU->NumPreds == 0 && SU->NumSuccs != 0)

    // If SU does not have a register def, schedule it close to its uses

    // because it does not lengthen any live ranges.

    return 0;


  return SUNumbers[SU->NodeNum];

}


/// closestSucc - Returns the scheduled cycle of the successor which is

/// closest to the current cycle.

static unsigned closestSucc(const SUnit *SU) {

  unsigned MaxHeight = 0;

  for (const SDep &Succ : SU->Succs) {

    if (Succ.isCtrl()) continue;  // ignore chain succs

    unsigned Height = Succ.getSUnit()->getHeight();

    // If there are bunch of CopyToRegs stacked up, they should be considered

    // to be at the same position.

    if (Height > MaxHeight)

      MaxHeight = Height;

  }

  return MaxHeight;

}


/// calcMaxScratches - Returns an cost estimate of the worse case requirement

/// for scratch registers, i.e. number of data dependencies.

static unsigned calcMaxScratches(const SUnit *SU) {

  unsigned Scratches = 0;

  for (const SDep &Pred : SU->Preds) {

    if (Pred.isCtrl()) continue;  // ignore chain preds

    Scratches++;

  }

  return Scratches;

}


// Return -1 if left has higher priority, 1 if right has higher priority.

// Return 0 if latency-based priority is equivalent.

static int BUCompareLatency(const SUnit *left, const SUnit *right) {

  // Scheduling an instruction that uses a VReg whose postincrement has not yet

  // been scheduled will induce a copy. Model this as an extra cycle of latency.

  int LHeight = (int)left->getHeight();

  int RHeight = (int)right->getHeight();


  // If either node is scheduling for latency, sort them by height/depth

  // and latency.


  // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer

  // is enabled, grouping instructions by cycle, then its height is already

  // covered so only its depth matters. We also reach this point if both stall

  // but have the same height.

  if (LHeight != RHeight)

    return LHeight > RHeight ? 1 : -1;


  int LDepth = left->getDepth();

  int RDepth = right->getDepth();

  if (LDepth != RDepth) {

    LLVM_DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum

                      << ") depth " << LDepth << " vs SU (" << right->NodeNum

                      << ") depth " << RDepth << "\n");

    return LDepth < RDepth ? 1 : -1;

  }

  if (left->Latency != right->Latency)

    return left->Latency > right->Latency ? 1 : -1;


  return 0;

}


const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)

{

  // TODO: add register pressure lowering checks


  bool const DisableSchedCriticalPath = false;

  int MaxReorderWindow = 6;

  if (!DisableSchedCriticalPath) {

    int spread = (int)left->getDepth() - (int)right->getDepth();

    if (std::abs(spread) > MaxReorderWindow) {

      LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "

                        << left->getDepth() << " != SU(" << right->NodeNum

                        << "): " << right->getDepth() << "\n");

      return left->getDepth() < right->getDepth() ? right : left;

    }

  }


  bool const DisableSchedHeight = false;

  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {

    int spread = (int)left->getHeight() - (int)right->getHeight();

    if (std::abs(spread) > MaxReorderWindow)

      return left->getHeight() > right->getHeight() ? right : left;

  }


  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.

  unsigned LPriority = getNodePriority(left);

  unsigned RPriority = getNodePriority(right);


  if (LPriority != RPriority)

    return LPriority > RPriority ? right : left;


  // Try schedule def + use closer when Sethi-Ullman numbers are the same.

  // e.g.

  // t1 = op t2, c1

  // t3 = op t4, c2

  //

  // and the following instructions are both ready.

  // t2 = op c3

  // t4 = op c4

  //

  // Then schedule t2 = op first.

  // i.e.

  // t4 = op c4

  // t2 = op c3

  // t1 = op t2, c1

  // t3 = op t4, c2

  //

  // This creates more short live intervals.

  unsigned LDist = closestSucc(left);

  unsigned RDist = closestSucc(right);

  if (LDist != RDist)

    return LDist < RDist ? right : left;


  // How many registers becomes live when the node is scheduled.

  unsigned LScratch = calcMaxScratches(left);

  unsigned RScratch = calcMaxScratches(right);

  if (LScratch != RScratch)

    return LScratch > RScratch ? right : left;


  bool const DisableSchedCycles = false;

  if (!DisableSchedCycles) {

    int result = BUCompareLatency(left, right);

    if (result != 0)

      return result > 0 ? right : left;

    return left;

  }

  else {

    if (left->getHeight() != right->getHeight())

      return (left->getHeight() > right->getHeight()) ? right : left;


    if (left->getDepth() != right->getDepth())

      return (left->getDepth() < right->getDepth()) ? right : left;

  }


  assert(left->NodeQueueId && right->NodeQueueId &&

        "NodeQueueId cannot be zero");

  return (left->NodeQueueId > right->NodeQueueId) ? right : left;

}


GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {

  if (AvailQueue.empty())

    return nullptr;

  auto Best = AvailQueue.begin();

  for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {

    auto NewBestSU = pickBest(Best->SU, I->SU);

    if (NewBestSU != Best->SU) {

      assert(NewBestSU == I->SU);

      Best = I;

    }

  }

  return &*Best;

}


void GCNILPScheduler::releasePending() {

  // Check to see if any of the pending instructions are ready to issue.  If

  // so, add them to the available queue.

  for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {

    auto &C = *I++;

    if (C.SU->getHeight() <= CurCycle) {

      PendingQueue.remove(C);

      AvailQueue.push_back(C);

      C.SU->NodeQueueId = CurQueueId++;

    }

  }

}


/// Move the scheduler state forward by the specified number of Cycles.

void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {

  if (NextCycle <= CurCycle)

    return;

  CurCycle = NextCycle;

  releasePending();

}


void GCNILPScheduler::releasePredecessors(const SUnit* SU) {

  for (const auto &PredEdge : SU->Preds) {

    auto PredSU = PredEdge.getSUnit();

    if (PredEdge.isWeak())

      continue;

    assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);


    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());


    if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)

      PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));

  }

}


std::vector<const SUnit*>

GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,

                          const ScheduleDAG &DAG) {

  auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;


  std::vector<SUnit> SUSavedCopy;

  SUSavedCopy.resize(SUnits.size());


  // we cannot save only those fields we touch: some of them are private

  // so save units verbatim: this assumes SUnit should have value semantics

  for (const SUnit &SU : SUnits)

    SUSavedCopy[SU.NodeNum] = SU;


  SUNumbers.assign(SUnits.size(), 0);

  for (const SUnit &SU : SUnits)

    CalcNodeSethiUllmanNumber(&SU, SUNumbers);


  for (const auto *SU : BotRoots) {

    AvailQueue.push_back(

      *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));

  }

  releasePredecessors(&DAG.ExitSU);


  std::vector<const SUnit*> Schedule;

  Schedule.reserve(SUnits.size());

  while (true) {

    if (AvailQueue.empty() && !PendingQueue.empty()) {

      auto EarliestSU =

          llvm::min_element(PendingQueue, [=](const Candidate &C1,

                                              const Candidate &C2) {

            return C1.SU->getHeight() < C2.SU->getHeight();

          })->SU;

      advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));

    }

    if (AvailQueue.empty())

      break;


    LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"

                         "Ready queue:";

               for (auto &C

                    : AvailQueue) dbgs()

               << ' ' << C.SU->NodeNum;

               dbgs() << '\n';);


    auto C = pickCandidate();

    assert(C);

    AvailQueue.remove(*C);

    auto SU = C->SU;

    LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));


    advanceToCycle(SU->getHeight());


    releasePredecessors(SU);

    Schedule.push_back(SU);

    SU->isScheduled = true;

  }

  assert(SUnits.size() == Schedule.size());


  std::reverse(Schedule.begin(), Schedule.end());


  // restore units

  for (auto &SU : SUnits)

    SU = SUSavedCopy[SU.NodeNum];


  return Schedule;

}


namespace llvm {

std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,

                                              const ScheduleDAG &DAG) {

  GCNILPScheduler S;

  return S.schedule(BotRoots, DAG);

}

}

LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101

BUCompareLatency
static int BUCompareLatency(const SUnit *left, const SUnit *right)
Definition: GCNILPSched.cpp:132

closestSucc
static unsigned closestSucc(const SUnit *SU)
closestSucc - Returns the scheduled cycle of the successor which is closest to the current cycle.
Definition: GCNILPSched.cpp:106

calcMaxScratches
static unsigned calcMaxScratches(const SUnit *SU)
calcMaxScratches - Returns an cost estimate of the worse case requirement for scratch registers,...
Definition: GCNILPSched.cpp:121

CalcNodeSethiUllmanNumber
static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector< unsigned > &SUNumbers)
CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
Definition: GCNILPSched.cpp:58

I
#define I(x, y, z)
Definition: MD5.cpp:58

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

DisableSchedCycles
static cl::opt< bool > DisableSchedCycles("disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling"))

DisableSchedCriticalPath
static cl::opt< bool > DisableSchedCriticalPath("disable-sched-critical-path", cl::Hidden, cl::init(false), cl::desc("Disable critical path priority in sched=list-ilp"))

MaxReorderWindow
static cl::opt< int > MaxReorderWindow("max-sched-reorder", cl::Hidden, cl::init(6), cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp"))

DisableSchedHeight
static cl::opt< bool > DisableSchedHeight("disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp"))

ScheduleDAG.h

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41

llvm::BumpPtrAllocatorImpl::Allocate
LLVM_ATTRIBUTE_RETURNS_NONNULL void * Allocate(size_t Size, Align Alignment)
Allocate space at the specified alignment.
Definition: Allocator.h:148

llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49

llvm::SDep::getSUnit
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480

llvm::SDep::isCtrl
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161

llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242

llvm::SUnit::NumSuccs
unsigned NumSuccs
Definition: ScheduleDAG.h:267

llvm::SUnit::NumPreds
unsigned NumPreds
Definition: ScheduleDAG.h:266

llvm::SUnit::NodeQueueId
unsigned NodeQueueId
Queue id of node.
Definition: ScheduleDAG.h:265

llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264

llvm::SUnit::getHeight
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406

llvm::SUnit::Latency
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273

llvm::SUnit::getDepth
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398

llvm::SUnit::isScheduled
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284

llvm::SUnit::Succs
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257

llvm::SUnit::Preds
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256

llvm::ScheduleDAG
Definition: ScheduleDAG.h:554

llvm::ScheduleDAG::dumpNode
virtual void dumpNode(const SUnit &SU) const =0

llvm::ScheduleDAG::ExitSU
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:563

llvm::SpecificBumpPtrAllocator
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition: Allocator.h:382

llvm::ilist_node
Definition: ilist_node.h:178

llvm::simple_ilist
A simple intrusive list implementation.
Definition: simple_ilist.h:81

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::min_element
auto min_element(R &&Range)
Definition: STLExtras.h:1978

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163

llvm::makeGCNILPScheduler
std::vector< const SUnit * > makeGCNILPScheduler(ArrayRef< const SUnit * > BotRoots, const ScheduleDAG &DAG)
Definition: GCNILPSched.cpp:357