docs/doxygen/MachineScheduler_8cpp_source.html

//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// MachineScheduler schedules machine instructions after phi elimination. It

// preserves LiveIntervals so it can be invoked before register allocation.

//

//===----------------------------------------------------------------------===//


#include "llvm/CodeGen/MachineScheduler.h"

#include "llvm/ADT/ArrayRef.h"

#include "llvm/ADT/BitVector.h"

#include "llvm/ADT/DenseMap.h"

#include "llvm/ADT/PriorityQueue.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/Statistic.h"

#include "llvm/ADT/iterator_range.h"

#include "llvm/Analysis/AliasAnalysis.h"

#include "llvm/CodeGen/LiveInterval.h"

#include "llvm/CodeGen/LiveIntervals.h"

#include "llvm/CodeGen/MachineBasicBlock.h"

#include "llvm/CodeGen/MachineDominators.h"

#include "llvm/CodeGen/MachineFunction.h"

#include "llvm/CodeGen/MachineFunctionPass.h"

#include "llvm/CodeGen/MachineInstr.h"

#include "llvm/CodeGen/MachineLoopInfo.h"

#include "llvm/CodeGen/MachineOperand.h"

#include "llvm/CodeGen/MachinePassRegistry.h"

#include "llvm/CodeGen/MachineRegisterInfo.h"

#include "llvm/CodeGen/RegisterClassInfo.h"

#include "llvm/CodeGen/RegisterPressure.h"

#include "llvm/CodeGen/ScheduleDAG.h"

#include "llvm/CodeGen/ScheduleDAGInstrs.h"

#include "llvm/CodeGen/ScheduleDAGMutation.h"

#include "llvm/CodeGen/ScheduleDFS.h"

#include "llvm/CodeGen/ScheduleHazardRecognizer.h"

#include "llvm/CodeGen/SlotIndexes.h"

#include "llvm/CodeGen/TargetFrameLowering.h"

#include "llvm/CodeGen/TargetInstrInfo.h"

#include "llvm/CodeGen/TargetLowering.h"

#include "llvm/CodeGen/TargetPassConfig.h"

#include "llvm/CodeGen/TargetRegisterInfo.h"

#include "llvm/CodeGen/TargetSchedule.h"

#include "llvm/CodeGen/TargetSubtargetInfo.h"

#include "llvm/CodeGenTypes/MachineValueType.h"

#include "llvm/Config/llvm-config.h"

#include "llvm/InitializePasses.h"

#include "llvm/MC/LaneBitmask.h"

#include "llvm/Pass.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Compiler.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/GraphWriter.h"

#include "llvm/Support/raw_ostream.h"

#include <algorithm>

#include <cassert>

#include <cstdint>

#include <iterator>

#include <limits>

#include <memory>

#include <string>

#include <tuple>

#include <utility>

#include <vector>


using namespace llvm;


#define DEBUG_TYPE "machine-scheduler"


STATISTIC(NumClustered, "Number of load/store pairs clustered");


namespace llvm {


cl::opt<MISched::Direction> PreRADirection(

    "misched-prera-direction", cl::Hidden,

    cl::desc("Pre reg-alloc list scheduling direction"),

    cl::init(MISched::Unspecified),

    cl::values(

        clEnumValN(MISched::TopDown, "topdown",

                   "Force top-down pre reg-alloc list scheduling"),

        clEnumValN(MISched::BottomUp, "bottomup",

                   "Force bottom-up pre reg-alloc list scheduling"),

        clEnumValN(MISched::Bidirectional, "bidirectional",

                   "Force bidirectional pre reg-alloc list scheduling")));


cl::opt<MISched::Direction> PostRADirection(

    "misched-postra-direction", cl::Hidden,

    cl::desc("Post reg-alloc list scheduling direction"),

    cl::init(MISched::Unspecified),

    cl::values(

        clEnumValN(MISched::TopDown, "topdown",

                   "Force top-down post reg-alloc list scheduling"),

        clEnumValN(MISched::BottomUp, "bottomup",

                   "Force bottom-up post reg-alloc list scheduling"),

        clEnumValN(MISched::Bidirectional, "bidirectional",

                   "Force bidirectional post reg-alloc list scheduling")));


cl::opt<bool>

DumpCriticalPathLength("misched-dcpl", cl::Hidden,

                       cl::desc("Print critical path length to stdout"));


cl::opt<bool> VerifyScheduling(

    "verify-misched", cl::Hidden,

    cl::desc("Verify machine instrs before and after machine scheduling"));


#ifndef NDEBUG

cl::opt<bool> ViewMISchedDAGs(

    "view-misched-dags", cl::Hidden,

    cl::desc("Pop up a window to show MISched dags after they are processed"));

cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,

                        cl::desc("Print schedule DAGs"));

cl::opt<bool> MISchedDumpReservedCycles(

    "misched-dump-reserved-cycles", cl::Hidden, cl::init(false),

    cl::desc("Dump resource usage at schedule boundary."));

cl::opt<bool> MischedDetailResourceBooking(

    "misched-detail-resource-booking", cl::Hidden, cl::init(false),

    cl::desc("Show details of invoking getNextResoufceCycle."));

#else

const bool ViewMISchedDAGs = false;

const bool PrintDAGs = false;

const bool MischedDetailResourceBooking = false;

#ifdef LLVM_ENABLE_DUMP

const bool MISchedDumpReservedCycles = false;

#endif // LLVM_ENABLE_DUMP

#endif // NDEBUG


} // end namespace llvm


#ifndef NDEBUG

/// In some situations a few uninteresting nodes depend on nearly all other

/// nodes in the graph, provide a cutoff to hide them.

static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,

  cl::desc("Hide nodes with more predecessor/successor than cutoff"));


static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,

  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));


static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,

  cl::desc("Only schedule this function"));

static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,

                                        cl::desc("Only schedule this MBB#"));

#endif // NDEBUG


/// Avoid quadratic complexity in unusually large basic blocks by limiting the

/// size of the ready lists.

static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,

  cl::desc("Limit ready list to N instructions"), cl::init(256));


static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,

  cl::desc("Enable register pressure scheduling."), cl::init(true));


static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,

  cl::desc("Enable cyclic critical path analysis."), cl::init(true));


static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,

                                        cl::desc("Enable memop clustering."),

                                        cl::init(true));

static cl::opt<bool>

    ForceFastCluster("force-fast-cluster", cl::Hidden,

                     cl::desc("Switch to fast cluster algorithm with the lost "

                              "of some fusion opportunities"),

                     cl::init(false));

static cl::opt<unsigned>

    FastClusterThreshold("fast-cluster-threshold", cl::Hidden,

                         cl::desc("The threshold for fast cluster"),

                         cl::init(1000));


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

static cl::opt<bool> MISchedDumpScheduleTrace(

    "misched-dump-schedule-trace", cl::Hidden, cl::init(false),

    cl::desc("Dump resource usage at schedule boundary."));

static cl::opt<unsigned>

    HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,

                   cl::desc("Set width of the columns with "

                            "the resources and schedule units"),

                   cl::init(19));

static cl::opt<unsigned>

    ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,

             cl::desc("Set width of the columns showing resource booking."),

             cl::init(5));

static cl::opt<bool> MISchedSortResourcesInTrace(

    "misched-sort-resources-in-trace", cl::Hidden, cl::init(true),

    cl::desc("Sort the resources printed in the dump trace"));

#endif


static cl::opt<unsigned>

    MIResourceCutOff("misched-resource-cutoff", cl::Hidden,

                     cl::desc("Number of intervals to track"), cl::init(10));


// DAG subtrees must have at least this many nodes.

static const unsigned MinSubtreeSize = 8;


// Pin the vtables to this file.

void MachineSchedStrategy::anchor() {}


void ScheduleDAGMutation::anchor() {}


//===----------------------------------------------------------------------===//

// Machine Instruction Scheduling Pass and Registry

//===----------------------------------------------------------------------===//


MachineSchedContext::MachineSchedContext() {

  RegClassInfo = new RegisterClassInfo();

}


MachineSchedContext::~MachineSchedContext() {

  delete RegClassInfo;

}


namespace {


/// Base class for a machine scheduler class that can run at any point.

class MachineSchedulerBase : public MachineSchedContext,

                             public MachineFunctionPass {

public:

  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}


  void print(raw_ostream &O, const Module* = nullptr) const override;


protected:

  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);

};


/// MachineScheduler runs after coalescing and before register allocation.

class MachineScheduler : public MachineSchedulerBase {

public:

  MachineScheduler();


  void getAnalysisUsage(AnalysisUsage &AU) const override;


  bool runOnMachineFunction(MachineFunction&) override;


  static char ID; // Class identification, replacement for typeinfo


protected:

  ScheduleDAGInstrs *createMachineScheduler();

};


/// PostMachineScheduler runs after shortly before code emission.

class PostMachineScheduler : public MachineSchedulerBase {

public:

  PostMachineScheduler();


  void getAnalysisUsage(AnalysisUsage &AU) const override;


  bool runOnMachineFunction(MachineFunction&) override;


  static char ID; // Class identification, replacement for typeinfo


protected:

  ScheduleDAGInstrs *createPostMachineScheduler();

};


} // end anonymous namespace


char MachineScheduler::ID = 0;


char &llvm::MachineSchedulerID = MachineScheduler::ID;


INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,

                      "Machine Instruction Scheduler", false, false)

INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)

INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)

INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)

INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)

INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)

INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,

                    "Machine Instruction Scheduler", false, false)


MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {

  initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());

}


void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {

  AU.setPreservesCFG();

  AU.addRequired<MachineDominatorTreeWrapperPass>();

  AU.addRequired<MachineLoopInfoWrapperPass>();

  AU.addRequired<AAResultsWrapperPass>();

  AU.addRequired<TargetPassConfig>();

  AU.addRequired<SlotIndexesWrapperPass>();

  AU.addPreserved<SlotIndexesWrapperPass>();

  AU.addRequired<LiveIntervalsWrapperPass>();

  AU.addPreserved<LiveIntervalsWrapperPass>();

  MachineFunctionPass::getAnalysisUsage(AU);

}


char PostMachineScheduler::ID = 0;


char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;


INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",

                      "PostRA Machine Instruction Scheduler", false, false)

INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)

INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)

INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)

INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",

                    "PostRA Machine Instruction Scheduler", false, false)


PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {

  initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());

}


void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {

  AU.setPreservesCFG();

  AU.addRequired<MachineDominatorTreeWrapperPass>();

  AU.addRequired<MachineLoopInfoWrapperPass>();

  AU.addRequired<AAResultsWrapperPass>();

  AU.addRequired<TargetPassConfig>();

  MachineFunctionPass::getAnalysisUsage(AU);

}


MachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor>

    MachineSchedRegistry::Registry;


/// A dummy default scheduler factory indicates whether the scheduler

/// is overridden on the command line.

static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {

  return nullptr;

}


/// MachineSchedOpt allows command line selection of the scheduler.

static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,

               RegisterPassParser<MachineSchedRegistry>>

MachineSchedOpt("misched",

                cl::init(&useDefaultMachineSched), cl::Hidden,

                cl::desc("Machine instruction scheduler to use"));


static MachineSchedRegistry

DefaultSchedRegistry("default", "Use the target's default scheduler choice.",

                     useDefaultMachineSched);


static cl::opt<bool> EnableMachineSched(

    "enable-misched",

    cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),

    cl::Hidden);


static cl::opt<bool> EnablePostRAMachineSched(

    "enable-post-misched",

    cl::desc("Enable the post-ra machine instruction scheduling pass."),

    cl::init(true), cl::Hidden);


/// Decrement this iterator until reaching the top or a non-debug instr.

static MachineBasicBlock::const_iterator

priorNonDebug(MachineBasicBlock::const_iterator I,

              MachineBasicBlock::const_iterator Beg) {

  assert(I != Beg && "reached the top of the region, cannot decrement");

  while (--I != Beg) {

    if (!I->isDebugOrPseudoInstr())

      break;

  }

  return I;

}


/// Non-const version.

static MachineBasicBlock::iterator

priorNonDebug(MachineBasicBlock::iterator I,

              MachineBasicBlock::const_iterator Beg) {

  return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)

      .getNonConstIterator();

}


/// If this iterator is a debug value, increment until reaching the End or a

/// non-debug instruction.

static MachineBasicBlock::const_iterator

nextIfDebug(MachineBasicBlock::const_iterator I,

            MachineBasicBlock::const_iterator End) {

  for(; I != End; ++I) {

    if (!I->isDebugOrPseudoInstr())

      break;

  }

  return I;

}


/// Non-const version.

static MachineBasicBlock::iterator

nextIfDebug(MachineBasicBlock::iterator I,

            MachineBasicBlock::const_iterator End) {

  return nextIfDebug(MachineBasicBlock::const_iterator(I), End)

      .getNonConstIterator();

}


/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.

ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {

  // Select the scheduler, or set the default.

  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;

  if (Ctor != useDefaultMachineSched)

    return Ctor(this);


  // Get the default scheduler set by the target for this function.

  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);

  if (Scheduler)

    return Scheduler;


  // Default to GenericScheduler.

  return createGenericSchedLive(this);

}


/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by

/// the caller. We don't have a command line option to override the postRA

/// scheduler. The Target must configure it.

ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {

  // Get the postRA scheduler set by the target for this function.

  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);

  if (Scheduler)

    return Scheduler;


  // Default to GenericScheduler.

  return createGenericSchedPostRA(this);

}


/// Top-level MachineScheduler pass driver.

///

/// Visit blocks in function order. Divide each block into scheduling regions

/// and visit them bottom-up. Visiting regions bottom-up is not required, but is

/// consistent with the DAG builder, which traverses the interior of the

/// scheduling regions bottom-up.

///

/// This design avoids exposing scheduling boundaries to the DAG builder,

/// simplifying the DAG builder's support for "special" target instructions.

/// At the same time the design allows target schedulers to operate across

/// scheduling boundaries, for example to bundle the boundary instructions

/// without reordering them. This creates complexity, because the target

/// scheduler must update the RegionBegin and RegionEnd positions cached by

/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler

/// design would be to split blocks at scheduling boundaries, but LLVM has a

/// general bias against block splitting purely for implementation simplicity.

bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {

  if (skipFunction(mf.getFunction()))

    return false;


  if (EnableMachineSched.getNumOccurrences()) {

    if (!EnableMachineSched)

      return false;

  } else if (!mf.getSubtarget().enableMachineScheduler())

    return false;


  LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));


  // Initialize the context of the pass.

  MF = &mf;

  MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();

  MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();

  PassConfig = &getAnalysis<TargetPassConfig>();

  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();


  LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();


  if (VerifyScheduling) {

    LLVM_DEBUG(LIS->dump());

    MF->verify(this, "Before machine scheduling.", &errs());

  }

  RegClassInfo->runOnMachineFunction(*MF);


  // Instantiate the selected scheduler for this target, function, and

  // optimization level.

  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());

  scheduleRegions(*Scheduler, false);


  LLVM_DEBUG(LIS->dump());

  if (VerifyScheduling)

    MF->verify(this, "After machine scheduling.", &errs());

  return true;

}


bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {

  if (skipFunction(mf.getFunction()))

    return false;


  if (EnablePostRAMachineSched.getNumOccurrences()) {

    if (!EnablePostRAMachineSched)

      return false;

  } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {

    LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");

    return false;

  }

  LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));


  // Initialize the context of the pass.

  MF = &mf;

  MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();

  PassConfig = &getAnalysis<TargetPassConfig>();

  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();


  if (VerifyScheduling)

    MF->verify(this, "Before post machine scheduling.", &errs());


  // Instantiate the selected scheduler for this target, function, and

  // optimization level.

  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());

  scheduleRegions(*Scheduler, true);


  if (VerifyScheduling)

    MF->verify(this, "After post machine scheduling.", &errs());

  return true;

}


/// Return true of the given instruction should not be included in a scheduling

/// region.

///

/// MachineScheduler does not currently support scheduling across calls. To

/// handle calls, the DAG builder needs to be modified to create register

/// anti/output dependencies on the registers clobbered by the call's regmask

/// operand. In PreRA scheduling, the stack pointer adjustment already prevents

/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce

/// the boundary, but there would be no benefit to postRA scheduling across

/// calls this late anyway.

static bool isSchedBoundary(MachineBasicBlock::iterator MI,

                            MachineBasicBlock *MBB,

                            MachineFunction *MF,

                            const TargetInstrInfo *TII) {

  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF) ||

         MI->isFakeUse();

}


/// A region of an MBB for scheduling.

namespace {

struct SchedRegion {

  /// RegionBegin is the first instruction in the scheduling region, and

  /// RegionEnd is either MBB->end() or the scheduling boundary after the

  /// last instruction in the scheduling region. These iterators cannot refer

  /// to instructions outside of the identified scheduling region because

  /// those may be reordered before scheduling this region.

  MachineBasicBlock::iterator RegionBegin;

  MachineBasicBlock::iterator RegionEnd;

  unsigned NumRegionInstrs;


  SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,

              unsigned N) :

    RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}

};

} // end anonymous namespace


using MBBRegionsVector = SmallVector<SchedRegion, 16>;


static void

getSchedRegions(MachineBasicBlock *MBB,

                MBBRegionsVector &Regions,

                bool RegionsTopDown) {

  MachineFunction *MF = MBB->getParent();

  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();


  MachineBasicBlock::iterator I = nullptr;

  for(MachineBasicBlock::iterator RegionEnd = MBB->end();

      RegionEnd != MBB->begin(); RegionEnd = I) {


    // Avoid decrementing RegionEnd for blocks with no terminator.

    if (RegionEnd != MBB->end() ||

        isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {

      --RegionEnd;

    }


    // The next region starts above the previous region. Look backward in the

    // instruction stream until we find the nearest boundary.

    unsigned NumRegionInstrs = 0;

    I = RegionEnd;

    for (;I != MBB->begin(); --I) {

      MachineInstr &MI = *std::prev(I);

      if (isSchedBoundary(&MI, &*MBB, MF, TII))

        break;

      if (!MI.isDebugOrPseudoInstr()) {

        // MBB::size() uses instr_iterator to count. Here we need a bundle to

        // count as a single instruction.

        ++NumRegionInstrs;

      }

    }


    // It's possible we found a scheduling region that only has debug

    // instructions. Don't bother scheduling these.

    if (NumRegionInstrs != 0)

      Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));

  }


  if (RegionsTopDown)

    std::reverse(Regions.begin(), Regions.end());

}


/// Main driver for both MachineScheduler and PostMachineScheduler.

void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,

                                           bool FixKillFlags) {

  // Visit all machine basic blocks.

  //

  // TODO: Visit blocks in global postorder or postorder within the bottom-up

  // loop tree. Then we can optionally compute global RegPressure.

  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();

       MBB != MBBEnd; ++MBB) {


    Scheduler.startBlock(&*MBB);


#ifndef NDEBUG

    if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())

      continue;

    if (SchedOnlyBlock.getNumOccurrences()

        && (int)SchedOnlyBlock != MBB->getNumber())

      continue;

#endif


    // Break the block into scheduling regions [I, RegionEnd). RegionEnd

    // points to the scheduling boundary at the bottom of the region. The DAG

    // does not include RegionEnd, but the region does (i.e. the next

    // RegionEnd is above the previous RegionBegin). If the current block has

    // no terminator then RegionEnd == MBB->end() for the bottom region.

    //

    // All the regions of MBB are first found and stored in MBBRegions, which

    // will be processed (MBB) top-down if initialized with true.

    //

    // The Scheduler may insert instructions during either schedule() or

    // exitRegion(), even for empty regions. So the local iterators 'I' and

    // 'RegionEnd' are invalid across these calls. Instructions must not be

    // added to other regions than the current one without updating MBBRegions.


    MBBRegionsVector MBBRegions;

    getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());

    for (const SchedRegion &R : MBBRegions) {

      MachineBasicBlock::iterator I = R.RegionBegin;

      MachineBasicBlock::iterator RegionEnd = R.RegionEnd;

      unsigned NumRegionInstrs = R.NumRegionInstrs;


      // Notify the scheduler of the region, even if we may skip scheduling

      // it. Perhaps it still needs to be bundled.

      Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);


      // Skip empty scheduling regions (0 or 1 schedulable instructions).

      if (I == RegionEnd || I == std::prev(RegionEnd)) {

        // Close the current region. Bundle the terminator if needed.

        // This invalidates 'RegionEnd' and 'I'.

        Scheduler.exitRegion();

        continue;

      }

      LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");

      LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)

                        << " " << MBB->getName() << "\n  From: " << *I

                        << "    To: ";

                 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;

                 else dbgs() << "End\n";

                 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');

      if (DumpCriticalPathLength) {

        errs() << MF->getName();

        errs() << ":%bb. " << MBB->getNumber();

        errs() << " " << MBB->getName() << " \n";

      }


      // Schedule a region: possibly reorder instructions.

      // This invalidates the original region iterators.

      Scheduler.schedule();


      // Close the current region.

      Scheduler.exitRegion();

    }

    Scheduler.finishBlock();

    // FIXME: Ideally, no further passes should rely on kill flags. However,

    // thumb2 size reduction is currently an exception, so the PostMIScheduler

    // needs to do this.

    if (FixKillFlags)

      Scheduler.fixupKills(*MBB);

  }

  Scheduler.finalizeSchedule();

}


void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {

  // unimplemented

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

LLVM_DUMP_METHOD void ReadyQueue::dump() const {

  dbgs() << "Queue " << Name << ": ";

  for (const SUnit *SU : Queue)

    dbgs() << SU->NodeNum << " ";

  dbgs() << "\n";

}

#endif


//===----------------------------------------------------------------------===//

// ScheduleDAGMI - Basic machine instruction scheduling. This is

// independent of PreRA/PostRA scheduling and involves no extra book-keeping for

// virtual registers.

// ===----------------------------------------------------------------------===/


// Provide a vtable anchor.

ScheduleDAGMI::~ScheduleDAGMI() = default;


/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When

/// NumPredsLeft reaches zero, release the successor node.

///

/// FIXME: Adjust SuccSU height based on MinLatency.

void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {

  SUnit *SuccSU = SuccEdge->getSUnit();


  if (SuccEdge->isWeak()) {

    --SuccSU->WeakPredsLeft;

    if (SuccEdge->isCluster())

      NextClusterSucc = SuccSU;

    return;

  }

#ifndef NDEBUG

  if (SuccSU->NumPredsLeft == 0) {

    dbgs() << "*** Scheduling failed! ***\n";

    dumpNode(*SuccSU);

    dbgs() << " has been released too many times!\n";

    llvm_unreachable(nullptr);

  }

#endif

  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,

  // CurrCycle may have advanced since then.

  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())

    SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();


  --SuccSU->NumPredsLeft;

  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)

    SchedImpl->releaseTopNode(SuccSU);

}


/// releaseSuccessors - Call releaseSucc on each of SU's successors.

void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {

  for (SDep &Succ : SU->Succs)

    releaseSucc(SU, &Succ);

}


/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When

/// NumSuccsLeft reaches zero, release the predecessor node.

///

/// FIXME: Adjust PredSU height based on MinLatency.

void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {

  SUnit *PredSU = PredEdge->getSUnit();


  if (PredEdge->isWeak()) {

    --PredSU->WeakSuccsLeft;

    if (PredEdge->isCluster())

      NextClusterPred = PredSU;

    return;

  }

#ifndef NDEBUG

  if (PredSU->NumSuccsLeft == 0) {

    dbgs() << "*** Scheduling failed! ***\n";

    dumpNode(*PredSU);

    dbgs() << " has been released too many times!\n";

    llvm_unreachable(nullptr);

  }

#endif

  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,

  // CurrCycle may have advanced since then.

  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())

    PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();


  --PredSU->NumSuccsLeft;

  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)

    SchedImpl->releaseBottomNode(PredSU);

}


/// releasePredecessors - Call releasePred on each of SU's predecessors.

void ScheduleDAGMI::releasePredecessors(SUnit *SU) {

  for (SDep &Pred : SU->Preds)

    releasePred(SU, &Pred);

}


void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {

  ScheduleDAGInstrs::startBlock(bb);

  SchedImpl->enterMBB(bb);

}


void ScheduleDAGMI::finishBlock() {

  SchedImpl->leaveMBB();

  ScheduleDAGInstrs::finishBlock();

}


/// enterRegion - Called back from PostMachineScheduler::runOnMachineFunction

/// after crossing a scheduling boundary. [begin, end) includes all instructions

/// in the region, including the boundary itself and single-instruction regions

/// that don't get scheduled.

void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,

                                     MachineBasicBlock::iterator begin,

                                     MachineBasicBlock::iterator end,

                                     unsigned regioninstrs)

{

  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);


  SchedImpl->initPolicy(begin, end, regioninstrs);


  // Set dump direction after initializing sched policy.

  ScheduleDAGMI::DumpDirection D;

  if (SchedImpl->getPolicy().OnlyTopDown)

    D = ScheduleDAGMI::DumpDirection::TopDown;

  else if (SchedImpl->getPolicy().OnlyBottomUp)

    D = ScheduleDAGMI::DumpDirection::BottomUp;

  else

    D = ScheduleDAGMI::DumpDirection::Bidirectional;

  setDumpDirection(D);

}


/// This is normally called from the main scheduler loop but may also be invoked

/// by the scheduling strategy to perform additional code motion.

void ScheduleDAGMI::moveInstruction(

  MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {

  // Advance RegionBegin if the first instruction moves down.

  if (&*RegionBegin == MI)

    ++RegionBegin;


  // Update the instruction stream.

  BB->splice(InsertPos, BB, MI);


  // Update LiveIntervals

  if (LIS)

    LIS->handleMove(*MI, /*UpdateFlags=*/true);


  // Recede RegionBegin if an instruction moves above the first.

  if (RegionBegin == InsertPos)

    RegionBegin = MI;

}


bool ScheduleDAGMI::checkSchedLimit() {

#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)

  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {

    CurrentTop = CurrentBottom;

    return false;

  }

  ++NumInstrsScheduled;

#endif

  return true;

}


/// Per-region scheduling driver, called back from

/// PostMachineScheduler::runOnMachineFunction. This is a simplified driver

/// that does not consider liveness or register pressure. It is useful for

/// PostRA scheduling and potentially other custom schedulers.

void ScheduleDAGMI::schedule() {

  LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");

  LLVM_DEBUG(SchedImpl->dumpPolicy());


  // Build the DAG.

  buildSchedGraph(AA);


  postProcessDAG();


  SmallVector<SUnit*, 8> TopRoots, BotRoots;

  findRootsAndBiasEdges(TopRoots, BotRoots);


  LLVM_DEBUG(dump());

  if (PrintDAGs) dump();

  if (ViewMISchedDAGs) viewGraph();


  // Initialize the strategy before modifying the DAG.

  // This may initialize a DFSResult to be used for queue priority.

  SchedImpl->initialize(this);


  // Initialize ready queues now that the DAG and priority data are finalized.

  initQueues(TopRoots, BotRoots);


  bool IsTopNode = false;

  while (true) {

    LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");

    SUnit *SU = SchedImpl->pickNode(IsTopNode);

    if (!SU) break;


    assert(!SU->isScheduled && "Node already scheduled");

    if (!checkSchedLimit())

      break;


    MachineInstr *MI = SU->getInstr();

    if (IsTopNode) {

      assert(SU->isTopReady() && "node still has unscheduled dependencies");

      if (&*CurrentTop == MI)

        CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);

      else

        moveInstruction(MI, CurrentTop);

    } else {

      assert(SU->isBottomReady() && "node still has unscheduled dependencies");

      MachineBasicBlock::iterator priorII =

        priorNonDebug(CurrentBottom, CurrentTop);

      if (&*priorII == MI)

        CurrentBottom = priorII;

      else {

        if (&*CurrentTop == MI)

          CurrentTop = nextIfDebug(++CurrentTop, priorII);

        moveInstruction(MI, CurrentBottom);

        CurrentBottom = MI;

      }

    }

    // Notify the scheduling strategy before updating the DAG.

    // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues

    // runs, it can then use the accurate ReadyCycle time to determine whether

    // newly released nodes can move to the readyQ.

    SchedImpl->schedNode(SU, IsTopNode);


    updateQueues(SU, IsTopNode);

  }

  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");


  placeDebugValues();


  LLVM_DEBUG({

    dbgs() << "*** Final schedule for "

           << printMBBReference(*begin()->getParent()) << " ***\n";

    dumpSchedule();

    dbgs() << '\n';

  });

}


/// Apply each ScheduleDAGMutation step in order.

void ScheduleDAGMI::postProcessDAG() {

  for (auto &m : Mutations)

    m->apply(this);

}


void ScheduleDAGMI::

findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,

                      SmallVectorImpl<SUnit*> &BotRoots) {

  for (SUnit &SU : SUnits) {

    assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");


    // Order predecessors so DFSResult follows the critical path.

    SU.biasCriticalPath();


    // A SUnit is ready to top schedule if it has no predecessors.

    if (!SU.NumPredsLeft)

      TopRoots.push_back(&SU);

    // A SUnit is ready to bottom schedule if it has no successors.

    if (!SU.NumSuccsLeft)

      BotRoots.push_back(&SU);

  }

  ExitSU.biasCriticalPath();

}


/// Identify DAG roots and setup scheduler queues.

void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,

                               ArrayRef<SUnit*> BotRoots) {

  NextClusterSucc = nullptr;

  NextClusterPred = nullptr;


  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.

  //

  // Nodes with unreleased weak edges can still be roots.

  // Release top roots in forward order.

  for (SUnit *SU : TopRoots)

    SchedImpl->releaseTopNode(SU);


  // Release bottom roots in reverse order so the higher priority nodes appear

  // first. This is more natural and slightly more efficient.

  for (SmallVectorImpl<SUnit*>::const_reverse_iterator

         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {

    SchedImpl->releaseBottomNode(*I);

  }


  releaseSuccessors(&EntrySU);

  releasePredecessors(&ExitSU);


  SchedImpl->registerRoots();


  // Advance past initial DebugValues.

  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);

  CurrentBottom = RegionEnd;

}


/// Update scheduler queues after scheduling an instruction.

void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {

  // Release dependent instructions for scheduling.

  if (IsTopNode)

    releaseSuccessors(SU);

  else

    releasePredecessors(SU);


  SU->isScheduled = true;

}


/// Reinsert any remaining debug_values, just like the PostRA scheduler.

void ScheduleDAGMI::placeDebugValues() {

  // If first instruction was a DBG_VALUE then put it back.

  if (FirstDbgValue) {

    BB->splice(RegionBegin, BB, FirstDbgValue);

    RegionBegin = FirstDbgValue;

  }


  for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator

         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {

    std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);

    MachineInstr *DbgValue = P.first;

    MachineBasicBlock::iterator OrigPrevMI = P.second;

    if (&*RegionBegin == DbgValue)

      ++RegionBegin;

    BB->splice(std::next(OrigPrevMI), BB, DbgValue);

    if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)

      RegionEnd = DbgValue;

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

static const char *scheduleTableLegend = "  i: issue\n  x: resource booked";


LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceTopDown() const {

  // Bail off when there is no schedule model to query.

  if (!SchedModel.hasInstrSchedModel())

    return;


  //  Nothing to show if there is no or just one instruction.

  if (BB->size() < 2)

    return;


  dbgs() << " * Schedule table (TopDown):\n";

  dbgs() << scheduleTableLegend << "\n";

  const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle;

  unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle;

  for (MachineInstr &MI : *this) {

    SUnit *SU = getSUnit(&MI);

    if (!SU)

      continue;

    const MCSchedClassDesc *SC = getSchedClass(SU);

    for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),

                                       PE = SchedModel.getWriteProcResEnd(SC);

         PI != PE; ++PI) {

      if (SU->TopReadyCycle + PI->ReleaseAtCycle - 1 > LastCycle)

        LastCycle = SU->TopReadyCycle + PI->ReleaseAtCycle - 1;

    }

  }

  // Print the header with the cycles

  dbgs() << llvm::left_justify("Cycle", HeaderColWidth);

  for (unsigned C = FirstCycle; C <= LastCycle; ++C)

    dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);

  dbgs() << "|\n";


  for (MachineInstr &MI : *this) {

    SUnit *SU = getSUnit(&MI);

    if (!SU) {

      dbgs() << "Missing SUnit\n";

      continue;

    }

    std::string NodeName("SU(");

    NodeName += std::to_string(SU->NodeNum) + ")";

    dbgs() << llvm::left_justify(NodeName, HeaderColWidth);

    unsigned C = FirstCycle;

    for (; C <= LastCycle; ++C) {

      if (C == SU->TopReadyCycle)

        dbgs() << llvm::left_justify("| i", ColWidth);

      else

        dbgs() << llvm::left_justify("|", ColWidth);

    }

    dbgs() << "|\n";

    const MCSchedClassDesc *SC = getSchedClass(SU);


    SmallVector<MCWriteProcResEntry, 4> ResourcesIt(

        make_range(SchedModel.getWriteProcResBegin(SC),

                   SchedModel.getWriteProcResEnd(SC)));


    if (MISchedSortResourcesInTrace)

      llvm::stable_sort(ResourcesIt,

                        [](const MCWriteProcResEntry &LHS,

                           const MCWriteProcResEntry &RHS) -> bool {

                          return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||

                                 (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&

                                  LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);

                        });

    for (const MCWriteProcResEntry &PI : ResourcesIt) {

      C = FirstCycle;

      const std::string ResName =

          SchedModel.getResourceName(PI.ProcResourceIdx);

      dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);

      for (; C < SU->TopReadyCycle + PI.AcquireAtCycle; ++C) {

        dbgs() << llvm::left_justify("|", ColWidth);

      }

      for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;

           ++I, ++C)

        dbgs() << llvm::left_justify("| x", ColWidth);

      while (C++ <= LastCycle)

        dbgs() << llvm::left_justify("|", ColWidth);

      // Place end char

      dbgs() << "| \n";

    }

  }

}


LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const {

  // Bail off when there is no schedule model to query.

  if (!SchedModel.hasInstrSchedModel())

    return;


  //  Nothing to show if there is no or just one instruction.

  if (BB->size() < 2)

    return;


  dbgs() << " * Schedule table (BottomUp):\n";

  dbgs() << scheduleTableLegend << "\n";


  const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle;

  int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle;

  for (MachineInstr &MI : *this) {

    SUnit *SU = getSUnit(&MI);

    if (!SU)

      continue;

    const MCSchedClassDesc *SC = getSchedClass(SU);

    for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),

                                       PE = SchedModel.getWriteProcResEnd(SC);

         PI != PE; ++PI) {

      if ((int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1 < LastCycle)

        LastCycle = (int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1;

    }

  }

  // Print the header with the cycles

  dbgs() << llvm::left_justify("Cycle", HeaderColWidth);

  for (int C = FirstCycle; C >= LastCycle; --C)

    dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);

  dbgs() << "|\n";


  for (MachineInstr &MI : *this) {

    SUnit *SU = getSUnit(&MI);

    if (!SU) {

      dbgs() << "Missing SUnit\n";

      continue;

    }

    std::string NodeName("SU(");

    NodeName += std::to_string(SU->NodeNum) + ")";

    dbgs() << llvm::left_justify(NodeName, HeaderColWidth);

    int C = FirstCycle;

    for (; C >= LastCycle; --C) {

      if (C == (int)SU->BotReadyCycle)

        dbgs() << llvm::left_justify("| i", ColWidth);

      else

        dbgs() << llvm::left_justify("|", ColWidth);

    }

    dbgs() << "|\n";

    const MCSchedClassDesc *SC = getSchedClass(SU);

    SmallVector<MCWriteProcResEntry, 4> ResourcesIt(

        make_range(SchedModel.getWriteProcResBegin(SC),

                   SchedModel.getWriteProcResEnd(SC)));


    if (MISchedSortResourcesInTrace)

      llvm::stable_sort(ResourcesIt,

                        [](const MCWriteProcResEntry &LHS,

                           const MCWriteProcResEntry &RHS) -> bool {

                          return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||

                                 (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&

                                  LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);

                        });

    for (const MCWriteProcResEntry &PI : ResourcesIt) {

      C = FirstCycle;

      const std::string ResName =

          SchedModel.getResourceName(PI.ProcResourceIdx);

      dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);

      for (; C > ((int)SU->BotReadyCycle - (int)PI.AcquireAtCycle); --C) {

        dbgs() << llvm::left_justify("|", ColWidth);

      }

      for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;

           ++I, --C)

        dbgs() << llvm::left_justify("| x", ColWidth);

      while (C-- >= LastCycle)

        dbgs() << llvm::left_justify("|", ColWidth);

      // Place end char

      dbgs() << "| \n";

    }

  }

}

#endif


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {

  if (MISchedDumpScheduleTrace) {

    if (DumpDir == DumpDirection::TopDown)

      dumpScheduleTraceTopDown();

    else if (DumpDir == DumpDirection::BottomUp)

      dumpScheduleTraceBottomUp();

    else if (DumpDir == DumpDirection::Bidirectional) {

      dbgs() << "* Schedule table (Bidirectional): not implemented\n";

    } else {

      dbgs() << "* Schedule table: DumpDirection not set.\n";

    }

  }


  for (MachineInstr &MI : *this) {

    if (SUnit *SU = getSUnit(&MI))

      dumpNode(*SU);

    else

      dbgs() << "Missing SUnit\n";

  }

}

#endif


//===----------------------------------------------------------------------===//

// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals

// preservation.

//===----------------------------------------------------------------------===//


ScheduleDAGMILive::~ScheduleDAGMILive() {

  delete DFSResult;

}


void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {

  const MachineInstr &MI = *SU.getInstr();

  for (const MachineOperand &MO : MI.operands()) {

    if (!MO.isReg())

      continue;

    if (!MO.readsReg())

      continue;

    if (TrackLaneMasks && !MO.isUse())

      continue;


    Register Reg = MO.getReg();

    if (!Reg.isVirtual())

      continue;


    // Ignore re-defs.

    if (TrackLaneMasks) {

      bool FoundDef = false;

      for (const MachineOperand &MO2 : MI.all_defs()) {

        if (MO2.getReg() == Reg && !MO2.isDead()) {

          FoundDef = true;

          break;

        }

      }

      if (FoundDef)

        continue;

    }


    // Record this local VReg use.

    VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);

    for (; UI != VRegUses.end(); ++UI) {

      if (UI->SU == &SU)

        break;

    }

    if (UI == VRegUses.end())

      VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));

  }

}


/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after

/// crossing a scheduling boundary. [begin, end) includes all instructions in

/// the region, including the boundary itself and single-instruction regions

/// that don't get scheduled.

void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,

                                MachineBasicBlock::iterator begin,

                                MachineBasicBlock::iterator end,

                                unsigned regioninstrs)

{

  // ScheduleDAGMI initializes SchedImpl's per-region policy.

  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);


  // For convenience remember the end of the liveness region.

  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);


  SUPressureDiffs.clear();


  ShouldTrackPressure = SchedImpl->shouldTrackPressure();

  ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();


  assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&

         "ShouldTrackLaneMasks requires ShouldTrackPressure");

}


// Setup the register pressure trackers for the top scheduled and bottom

// scheduled regions.

void ScheduleDAGMILive::initRegPressure() {

  VRegUses.clear();

  VRegUses.setUniverse(MRI.getNumVirtRegs());

  for (SUnit &SU : SUnits)

    collectVRegUses(SU);


  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,

                    ShouldTrackLaneMasks, false);

  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,

                    ShouldTrackLaneMasks, false);


  // Close the RPTracker to finalize live ins.

  RPTracker.closeRegion();


  LLVM_DEBUG(RPTracker.dump());


  // Initialize the live ins and live outs.

  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);

  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);


  // Close one end of the tracker so we can call

  // getMaxUpward/DownwardPressureDelta before advancing across any

  // instructions. This converts currently live regs into live ins/outs.

  TopRPTracker.closeTop();

  BotRPTracker.closeBottom();


  BotRPTracker.initLiveThru(RPTracker);

  if (!BotRPTracker.getLiveThru().empty()) {

    TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());

    LLVM_DEBUG(dbgs() << "Live Thru: ";

               dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));

  };


  // For each live out vreg reduce the pressure change associated with other

  // uses of the same vreg below the live-out reaching def.

  updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);


  // Account for liveness generated by the region boundary.

  if (LiveRegionEnd != RegionEnd) {

    SmallVector<VRegMaskOrUnit, 8> LiveUses;

    BotRPTracker.recede(&LiveUses);

    updatePressureDiffs(LiveUses);

  }


  LLVM_DEBUG(dbgs() << "Top Pressure:\n";

             dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);

             dbgs() << "Bottom Pressure:\n";

             dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););


  assert((BotRPTracker.getPos() == RegionEnd ||

          (RegionEnd->isDebugInstr() &&

           BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&

         "Can't find the region bottom");


  // Cache the list of excess pressure sets in this region. This will also track

  // the max pressure in the scheduled code for these sets.

  RegionCriticalPSets.clear();

  const std::vector<unsigned> &RegionPressure =

    RPTracker.getPressure().MaxSetPressure;

  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {

    unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);

    if (RegionPressure[i] > Limit) {

      LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit

                        << " Actual " << RegionPressure[i] << "\n");

      RegionCriticalPSets.push_back(PressureChange(i));

    }

  }

  LLVM_DEBUG(dbgs() << "Excess PSets: ";

             for (const PressureChange &RCPS

                  : RegionCriticalPSets) dbgs()

             << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";

             dbgs() << "\n");

}


void ScheduleDAGMILive::

updateScheduledPressure(const SUnit *SU,

                        const std::vector<unsigned> &NewMaxPressure) {

  const PressureDiff &PDiff = getPressureDiff(SU);

  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();

  for (const PressureChange &PC : PDiff) {

    if (!PC.isValid())

      break;

    unsigned ID = PC.getPSet();

    while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)

      ++CritIdx;

    if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {

      if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()

          && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())

        RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);

    }

    unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);

    if (NewMaxPressure[ID] >= Limit - 2) {

      LLVM_DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "

                        << NewMaxPressure[ID]

                        << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")

                        << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]

                        << " livethru)\n");

    }

  }

}


/// Update the PressureDiff array for liveness after scheduling this

/// instruction.

void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<VRegMaskOrUnit> LiveUses) {

  for (const VRegMaskOrUnit &P : LiveUses) {

    Register Reg = P.RegUnit;

    /// FIXME: Currently assuming single-use physregs.

    if (!Reg.isVirtual())

      continue;


    if (ShouldTrackLaneMasks) {

      // If the register has just become live then other uses won't change

      // this fact anymore => decrement pressure.

      // If the register has just become dead then other uses make it come

      // back to life => increment pressure.

      bool Decrement = P.LaneMask.any();


      for (const VReg2SUnit &V2SU

           : make_range(VRegUses.find(Reg), VRegUses.end())) {

        SUnit &SU = *V2SU.SU;

        if (SU.isScheduled || &SU == &ExitSU)

          continue;


        PressureDiff &PDiff = getPressureDiff(&SU);

        PDiff.addPressureChange(Reg, Decrement, &MRI);

        LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU.NodeNum << ") "

                          << printReg(Reg, TRI) << ':'

                          << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();

                   dbgs() << "              to "; PDiff.dump(*TRI););

      }

    } else {

      assert(P.LaneMask.any());

      LLVM_DEBUG(dbgs() << "  LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");

      // This may be called before CurrentBottom has been initialized. However,

      // BotRPTracker must have a valid position. We want the value live into the

      // instruction or live out of the block, so ask for the previous

      // instruction's live-out.

      const LiveInterval &LI = LIS->getInterval(Reg);

      VNInfo *VNI;

      MachineBasicBlock::const_iterator I =

        nextIfDebug(BotRPTracker.getPos(), BB->end());

      if (I == BB->end())

        VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));

      else {

        LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));

        VNI = LRQ.valueIn();

      }

      // RegisterPressureTracker guarantees that readsReg is true for LiveUses.

      assert(VNI && "No live value at use.");

      for (const VReg2SUnit &V2SU

           : make_range(VRegUses.find(Reg), VRegUses.end())) {

        SUnit *SU = V2SU.SU;

        // If this use comes before the reaching def, it cannot be a last use,

        // so decrease its pressure change.

        if (!SU->isScheduled && SU != &ExitSU) {

          LiveQueryResult LRQ =

              LI.Query(LIS->getInstructionIndex(*SU->getInstr()));

          if (LRQ.valueIn() == VNI) {

            PressureDiff &PDiff = getPressureDiff(SU);

            PDiff.addPressureChange(Reg, true, &MRI);

            LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "

                              << *SU->getInstr();

                       dbgs() << "              to "; PDiff.dump(*TRI););

          }

        }

      }

    }

  }

}


void ScheduleDAGMILive::dump() const {

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

  if (EntrySU.getInstr() != nullptr)

    dumpNodeAll(EntrySU);

  for (const SUnit &SU : SUnits) {

    dumpNodeAll(SU);

    if (ShouldTrackPressure) {

      dbgs() << "  Pressure Diff      : ";

      getPressureDiff(&SU).dump(*TRI);

    }

    dbgs() << "  Single Issue       : ";

    if (SchedModel.mustBeginGroup(SU.getInstr()) &&

        SchedModel.mustEndGroup(SU.getInstr()))

      dbgs() << "true;";

    else

      dbgs() << "false;";

    dbgs() << '\n';

  }

  if (ExitSU.getInstr() != nullptr)

    dumpNodeAll(ExitSU);

#endif

}


/// schedule - Called back from MachineScheduler::runOnMachineFunction

/// after setting up the current scheduling region. [RegionBegin, RegionEnd)

/// only includes instructions that have DAG nodes, not scheduling boundaries.

///

/// This is a skeletal driver, with all the functionality pushed into helpers,

/// so that it can be easily extended by experimental schedulers. Generally,

/// implementing MachineSchedStrategy should be sufficient to implement a new

/// scheduling algorithm. However, if a scheduler further subclasses

/// ScheduleDAGMILive then it will want to override this virtual method in order

/// to update any specialized state.

void ScheduleDAGMILive::schedule() {

  LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");

  LLVM_DEBUG(SchedImpl->dumpPolicy());

  buildDAGWithRegPressure();


  postProcessDAG();


  SmallVector<SUnit*, 8> TopRoots, BotRoots;

  findRootsAndBiasEdges(TopRoots, BotRoots);


  // Initialize the strategy before modifying the DAG.

  // This may initialize a DFSResult to be used for queue priority.

  SchedImpl->initialize(this);


  LLVM_DEBUG(dump());

  if (PrintDAGs) dump();

  if (ViewMISchedDAGs) viewGraph();


  // Initialize ready queues now that the DAG and priority data are finalized.

  initQueues(TopRoots, BotRoots);


  bool IsTopNode = false;

  while (true) {

    LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");

    SUnit *SU = SchedImpl->pickNode(IsTopNode);

    if (!SU) break;


    assert(!SU->isScheduled && "Node already scheduled");

    if (!checkSchedLimit())

      break;


    scheduleMI(SU, IsTopNode);


    if (DFSResult) {

      unsigned SubtreeID = DFSResult->getSubtreeID(SU);

      if (!ScheduledTrees.test(SubtreeID)) {

        ScheduledTrees.set(SubtreeID);

        DFSResult->scheduleTree(SubtreeID);

        SchedImpl->scheduleTree(SubtreeID);

      }

    }


    // Notify the scheduling strategy after updating the DAG.

    SchedImpl->schedNode(SU, IsTopNode);


    updateQueues(SU, IsTopNode);

  }

  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");


  placeDebugValues();


  LLVM_DEBUG({

    dbgs() << "*** Final schedule for "

           << printMBBReference(*begin()->getParent()) << " ***\n";

    dumpSchedule();

    dbgs() << '\n';

  });

}


/// Build the DAG and setup three register pressure trackers.

void ScheduleDAGMILive::buildDAGWithRegPressure() {

  if (!ShouldTrackPressure) {

    RPTracker.reset();

    RegionCriticalPSets.clear();

    buildSchedGraph(AA);

    return;

  }


  // Initialize the register pressure tracker used by buildSchedGraph.

  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,

                 ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);


  // Account for liveness generate by the region boundary.

  if (LiveRegionEnd != RegionEnd)

    RPTracker.recede();


  // Build the DAG, and compute current register pressure.

  buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);


  // Initialize top/bottom trackers after computing region pressure.

  initRegPressure();

}


void ScheduleDAGMILive::computeDFSResult() {

  if (!DFSResult)

    DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);

  DFSResult->clear();

  ScheduledTrees.clear();

  DFSResult->resize(SUnits.size());

  DFSResult->compute(SUnits);

  ScheduledTrees.resize(DFSResult->getNumSubtrees());

}


/// Compute the max cyclic critical path through the DAG. The scheduling DAG

/// only provides the critical path for single block loops. To handle loops that

/// span blocks, we could use the vreg path latencies provided by

/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently

/// available for use in the scheduler.

///

/// The cyclic path estimation identifies a def-use pair that crosses the back

/// edge and considers the depth and height of the nodes. For example, consider

/// the following instruction sequence where each instruction has unit latency

/// and defines an eponymous virtual register:

///

/// a->b(a,c)->c(b)->d(c)->exit

///

/// The cyclic critical path is a two cycles: b->c->b

/// The acyclic critical path is four cycles: a->b->c->d->exit

/// LiveOutHeight = height(c) = len(c->d->exit) = 2

/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3

/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4

/// LiveInDepth = depth(b) = len(a->b) = 1

///

/// LiveOutDepth - LiveInDepth = 3 - 1 = 2

/// LiveInHeight - LiveOutHeight = 4 - 2 = 2

/// CyclicCriticalPath = min(2, 2) = 2

///

/// This could be relevant to PostRA scheduling, but is currently implemented

/// assuming LiveIntervals.

unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {

  // This only applies to single block loop.

  if (!BB->isSuccessor(BB))

    return 0;


  unsigned MaxCyclicLatency = 0;

  // Visit each live out vreg def to find def/use pairs that cross iterations.

  for (const VRegMaskOrUnit &P : RPTracker.getPressure().LiveOutRegs) {

    Register Reg = P.RegUnit;

    if (!Reg.isVirtual())

      continue;

    const LiveInterval &LI = LIS->getInterval(Reg);

    const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));

    if (!DefVNI)

      continue;


    MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);

    const SUnit *DefSU = getSUnit(DefMI);

    if (!DefSU)

      continue;


    unsigned LiveOutHeight = DefSU->getHeight();

    unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;

    // Visit all local users of the vreg def.

    for (const VReg2SUnit &V2SU

         : make_range(VRegUses.find(Reg), VRegUses.end())) {

      SUnit *SU = V2SU.SU;

      if (SU == &ExitSU)

        continue;


      // Only consider uses of the phi.

      LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));

      if (!LRQ.valueIn()->isPHIDef())

        continue;


      // Assume that a path spanning two iterations is a cycle, which could

      // overestimate in strange cases. This allows cyclic latency to be

      // estimated as the minimum slack of the vreg's depth or height.

      unsigned CyclicLatency = 0;

      if (LiveOutDepth > SU->getDepth())

        CyclicLatency = LiveOutDepth - SU->getDepth();


      unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;

      if (LiveInHeight > LiveOutHeight) {

        if (LiveInHeight - LiveOutHeight < CyclicLatency)

          CyclicLatency = LiveInHeight - LiveOutHeight;

      } else

        CyclicLatency = 0;


      LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("

                        << SU->NodeNum << ") = " << CyclicLatency << "c\n");

      if (CyclicLatency > MaxCyclicLatency)

        MaxCyclicLatency = CyclicLatency;

    }

  }

  LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");

  return MaxCyclicLatency;

}


/// Release ExitSU predecessors and setup scheduler queues. Re-position

/// the Top RP tracker in case the region beginning has changed.

void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,

                                   ArrayRef<SUnit*> BotRoots) {

  ScheduleDAGMI::initQueues(TopRoots, BotRoots);

  if (ShouldTrackPressure) {

    assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");

    TopRPTracker.setPos(CurrentTop);

  }

}


/// Move an instruction and update register pressure.

void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {

  // Move the instruction to its new location in the instruction stream.

  MachineInstr *MI = SU->getInstr();


  if (IsTopNode) {

    assert(SU->isTopReady() && "node still has unscheduled dependencies");

    if (&*CurrentTop == MI)

      CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);

    else {

      moveInstruction(MI, CurrentTop);

      TopRPTracker.setPos(MI);

    }


    if (ShouldTrackPressure) {

      // Update top scheduled pressure.

      RegisterOperands RegOpers;

      RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks,

                       /*IgnoreDead=*/false);

      if (ShouldTrackLaneMasks) {

        // Adjust liveness and add missing dead+read-undef flags.

        SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();

        RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);

      } else {

        // Adjust for missing dead-def flags.

        RegOpers.detectDeadDefs(*MI, *LIS);

      }


      TopRPTracker.advance(RegOpers);

      assert(TopRPTracker.getPos() == CurrentTop && "out of sync");

      LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(

                     TopRPTracker.getRegSetPressureAtPos(), TRI););


      updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);

    }

  } else {

    assert(SU->isBottomReady() && "node still has unscheduled dependencies");

    MachineBasicBlock::iterator priorII =

      priorNonDebug(CurrentBottom, CurrentTop);

    if (&*priorII == MI)

      CurrentBottom = priorII;

    else {

      if (&*CurrentTop == MI) {

        CurrentTop = nextIfDebug(++CurrentTop, priorII);

        TopRPTracker.setPos(CurrentTop);

      }

      moveInstruction(MI, CurrentBottom);

      CurrentBottom = MI;

      BotRPTracker.setPos(CurrentBottom);

    }

    if (ShouldTrackPressure) {

      RegisterOperands RegOpers;

      RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks,

                       /*IgnoreDead=*/false);

      if (ShouldTrackLaneMasks) {

        // Adjust liveness and add missing dead+read-undef flags.

        SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();

        RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);

      } else {

        // Adjust for missing dead-def flags.

        RegOpers.detectDeadDefs(*MI, *LIS);

      }


      if (BotRPTracker.getPos() != CurrentBottom)

        BotRPTracker.recedeSkipDebugValues();

      SmallVector<VRegMaskOrUnit, 8> LiveUses;

      BotRPTracker.recede(RegOpers, &LiveUses);

      assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");

      LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(

                     BotRPTracker.getRegSetPressureAtPos(), TRI););


      updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);

      updatePressureDiffs(LiveUses);

    }

  }

}


//===----------------------------------------------------------------------===//

// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.

//===----------------------------------------------------------------------===//


namespace {


/// Post-process the DAG to create cluster edges between neighboring

/// loads or between neighboring stores.

class BaseMemOpClusterMutation : public ScheduleDAGMutation {

  struct MemOpInfo {

    SUnit *SU;

    SmallVector<const MachineOperand *, 4> BaseOps;

    int64_t Offset;

    LocationSize Width;

    bool OffsetIsScalable;


    MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,

              int64_t Offset, bool OffsetIsScalable, LocationSize Width)

        : SU(SU), BaseOps(BaseOps), Offset(Offset), Width(Width),

          OffsetIsScalable(OffsetIsScalable) {}


    static bool Compare(const MachineOperand *const &A,

                        const MachineOperand *const &B) {

      if (A->getType() != B->getType())

        return A->getType() < B->getType();

      if (A->isReg())

        return A->getReg() < B->getReg();

      if (A->isFI()) {

        const MachineFunction &MF = *A->getParent()->getParent()->getParent();

        const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();

        bool StackGrowsDown = TFI.getStackGrowthDirection() ==

                              TargetFrameLowering::StackGrowsDown;

        return StackGrowsDown ? A->getIndex() > B->getIndex()

                              : A->getIndex() < B->getIndex();

      }


      llvm_unreachable("MemOpClusterMutation only supports register or frame "

                       "index bases.");

    }


    bool operator<(const MemOpInfo &RHS) const {

      // FIXME: Don't compare everything twice. Maybe use C++20 three way

      // comparison instead when it's available.

      if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),

                                       RHS.BaseOps.begin(), RHS.BaseOps.end(),

                                       Compare))

        return true;

      if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),

                                       BaseOps.begin(), BaseOps.end(), Compare))

        return false;

      if (Offset != RHS.Offset)

        return Offset < RHS.Offset;

      return SU->NodeNum < RHS.SU->NodeNum;

    }

  };


  const TargetInstrInfo *TII;

  const TargetRegisterInfo *TRI;

  bool IsLoad;

  bool ReorderWhileClustering;


public:

  BaseMemOpClusterMutation(const TargetInstrInfo *tii,

                           const TargetRegisterInfo *tri, bool IsLoad,

                           bool ReorderWhileClustering)

      : TII(tii), TRI(tri), IsLoad(IsLoad),

        ReorderWhileClustering(ReorderWhileClustering) {}


  void apply(ScheduleDAGInstrs *DAGInstrs) override;


protected:

  void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,

                                ScheduleDAGInstrs *DAG);

  void collectMemOpRecords(std::vector<SUnit> &SUnits,

                           SmallVectorImpl<MemOpInfo> &MemOpRecords);

  bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,

                   DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups);

};


class StoreClusterMutation : public BaseMemOpClusterMutation {

public:

  StoreClusterMutation(const TargetInstrInfo *tii,

                       const TargetRegisterInfo *tri,

                       bool ReorderWhileClustering)

      : BaseMemOpClusterMutation(tii, tri, false, ReorderWhileClustering) {}

};


class LoadClusterMutation : public BaseMemOpClusterMutation {

public:

  LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri,

                      bool ReorderWhileClustering)

      : BaseMemOpClusterMutation(tii, tri, true, ReorderWhileClustering) {}

};


} // end anonymous namespace


namespace llvm {


std::unique_ptr<ScheduleDAGMutation>

createLoadClusterDAGMutation(const TargetInstrInfo *TII,

                             const TargetRegisterInfo *TRI,

                             bool ReorderWhileClustering) {

  return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(

                                  TII, TRI, ReorderWhileClustering)

                            : nullptr;

}


std::unique_ptr<ScheduleDAGMutation>

createStoreClusterDAGMutation(const TargetInstrInfo *TII,

                              const TargetRegisterInfo *TRI,

                              bool ReorderWhileClustering) {

  return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(

                                  TII, TRI, ReorderWhileClustering)

                            : nullptr;

}


} // end namespace llvm


// Sorting all the loads/stores first, then for each load/store, checking the

// following load/store one by one, until reach the first non-dependent one and

// call target hook to see if they can cluster.

// If FastCluster is enabled, we assume that, all the loads/stores have been

// preprocessed and now, they didn't have dependencies on each other.

void BaseMemOpClusterMutation::clusterNeighboringMemOps(

    ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,

    ScheduleDAGInstrs *DAG) {

  // Keep track of the current cluster length and bytes for each SUnit.

  DenseMap<unsigned, std::pair<unsigned, unsigned>> SUnit2ClusterInfo;


  // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to

  // cluster mem ops collected within `MemOpRecords` array.

  for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {

    // Decision to cluster mem ops is taken based on target dependent logic

    auto MemOpa = MemOpRecords[Idx];


    // Seek for the next load/store to do the cluster.

    unsigned NextIdx = Idx + 1;

    for (; NextIdx < End; ++NextIdx)

      // Skip if MemOpb has been clustered already or has dependency with

      // MemOpa.

      if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&

          (FastCluster ||

           (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&

            !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))

        break;

    if (NextIdx == End)

      continue;


    auto MemOpb = MemOpRecords[NextIdx];

    unsigned ClusterLength = 2;

    unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() +

                                   MemOpb.Width.getValue().getKnownMinValue();

    if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {

      ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;

      CurrentClusterBytes = SUnit2ClusterInfo[MemOpa.SU->NodeNum].second +

                            MemOpb.Width.getValue().getKnownMinValue();

    }


    if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset,

                                  MemOpa.OffsetIsScalable, MemOpb.BaseOps,

                                  MemOpb.Offset, MemOpb.OffsetIsScalable,

                                  ClusterLength, CurrentClusterBytes))

      continue;


    SUnit *SUa = MemOpa.SU;

    SUnit *SUb = MemOpb.SU;

    if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)

      std::swap(SUa, SUb);


    // FIXME: Is this check really required?

    if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))

      continue;


    LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("

                      << SUb->NodeNum << ")\n");

    ++NumClustered;


    if (IsLoad) {

      // Copy successor edges from SUa to SUb. Interleaving computation

      // dependent on SUa can prevent load combining due to register reuse.

      // Predecessor edges do not need to be copied from SUb to SUa since

      // nearby loads should have effectively the same inputs.

      for (const SDep &Succ : SUa->Succs) {

        if (Succ.getSUnit() == SUb)

          continue;

        LLVM_DEBUG(dbgs() << "  Copy Succ SU(" << Succ.getSUnit()->NodeNum

                          << ")\n");

        DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));

      }

    } else {

      // Copy predecessor edges from SUb to SUa to avoid the SUnits that

      // SUb dependent on scheduled in-between SUb and SUa. Successor edges

      // do not need to be copied from SUa to SUb since no one will depend

      // on stores.

      // Notice that, we don't need to care about the memory dependency as

      // we won't try to cluster them if they have any memory dependency.

      for (const SDep &Pred : SUb->Preds) {

        if (Pred.getSUnit() == SUa)

          continue;

        LLVM_DEBUG(dbgs() << "  Copy Pred SU(" << Pred.getSUnit()->NodeNum

                          << ")\n");

        DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));

      }

    }


    SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,

                                             CurrentClusterBytes};


    LLVM_DEBUG(dbgs() << "  Curr cluster length: " << ClusterLength

                      << ", Curr cluster bytes: " << CurrentClusterBytes

                      << "\n");

  }

}


void BaseMemOpClusterMutation::collectMemOpRecords(

    std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {

  for (auto &SU : SUnits) {

    if ((IsLoad && !SU.getInstr()->mayLoad()) ||

        (!IsLoad && !SU.getInstr()->mayStore()))

      continue;


    const MachineInstr &MI = *SU.getInstr();

    SmallVector<const MachineOperand *, 4> BaseOps;

    int64_t Offset;

    bool OffsetIsScalable;

    LocationSize Width = 0;

    if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,

                                           OffsetIsScalable, Width, TRI)) {

      if (!Width.hasValue())

        continue;


      MemOpRecords.push_back(

          MemOpInfo(&SU, BaseOps, Offset, OffsetIsScalable, Width));


      LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "

                        << Offset << ", OffsetIsScalable: " << OffsetIsScalable

                        << ", Width: " << Width << "\n");

    }

#ifndef NDEBUG

    for (const auto *Op : BaseOps)

      assert(Op);

#endif

  }

}


bool BaseMemOpClusterMutation::groupMemOps(

    ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,

    DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups) {

  bool FastCluster =

      ForceFastCluster ||

      MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;


  for (const auto &MemOp : MemOps) {

    unsigned ChainPredID = DAG->SUnits.size();

    if (FastCluster) {

      for (const SDep &Pred : MemOp.SU->Preds) {

        // We only want to cluster the mem ops that have the same ctrl(non-data)

        // pred so that they didn't have ctrl dependency for each other. But for

        // store instrs, we can still cluster them if the pred is load instr.

        if ((Pred.isCtrl() &&

             (IsLoad ||

              (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&

            !Pred.isArtificial()) {

          ChainPredID = Pred.getSUnit()->NodeNum;

          break;

        }

      }

    } else

      ChainPredID = 0;


    Groups[ChainPredID].push_back(MemOp);

  }

  return FastCluster;

}


/// Callback from DAG postProcessing to create cluster edges for loads/stores.

void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {

  // Collect all the clusterable loads/stores

  SmallVector<MemOpInfo, 32> MemOpRecords;

  collectMemOpRecords(DAG->SUnits, MemOpRecords);


  if (MemOpRecords.size() < 2)

    return;


  // Put the loads/stores without dependency into the same group with some

  // heuristic if the DAG is too complex to avoid compiling time blow up.

  // Notice that, some fusion pair could be lost with this.

  DenseMap<unsigned, SmallVector<MemOpInfo, 32>> Groups;

  bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);


  for (auto &Group : Groups) {

    // Sorting the loads/stores, so that, we can stop the cluster as early as

    // possible.

    llvm::sort(Group.second);


    // Trying to cluster all the neighboring loads/stores.

    clusterNeighboringMemOps(Group.second, FastCluster, DAG);

  }

}


//===----------------------------------------------------------------------===//

// CopyConstrain - DAG post-processing to encourage copy elimination.

//===----------------------------------------------------------------------===//


namespace {


/// Post-process the DAG to create weak edges from all uses of a copy to

/// the one use that defines the copy's source vreg, most likely an induction

/// variable increment.

class CopyConstrain : public ScheduleDAGMutation {

  // Transient state.

  SlotIndex RegionBeginIdx;


  // RegionEndIdx is the slot index of the last non-debug instruction in the

  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.

  SlotIndex RegionEndIdx;


public:

  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}


  void apply(ScheduleDAGInstrs *DAGInstrs) override;


protected:

  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);

};


} // end anonymous namespace


namespace llvm {


std::unique_ptr<ScheduleDAGMutation>

createCopyConstrainDAGMutation(const TargetInstrInfo *TII,

                               const TargetRegisterInfo *TRI) {

  return std::make_unique<CopyConstrain>(TII, TRI);

}


} // end namespace llvm


/// constrainLocalCopy handles two possibilities:

/// 1) Local src:

/// I0:     = dst

/// I1: src = ...

/// I2:     = dst

/// I3: dst = src (copy)

/// (create pred->succ edges I0->I1, I2->I1)

///

/// 2) Local copy:

/// I0: dst = src (copy)

/// I1:     = dst

/// I2: src = ...

/// I3:     = dst

/// (create pred->succ edges I1->I2, I3->I2)

///

/// Although the MachineScheduler is currently constrained to single blocks,

/// this algorithm should handle extended blocks. An EBB is a set of

/// contiguously numbered blocks such that the previous block in the EBB is

/// always the single predecessor.

void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {

  LiveIntervals *LIS = DAG->getLIS();

  MachineInstr *Copy = CopySU->getInstr();


  // Check for pure vreg copies.

  const MachineOperand &SrcOp = Copy->getOperand(1);

  Register SrcReg = SrcOp.getReg();

  if (!SrcReg.isVirtual() || !SrcOp.readsReg())

    return;


  const MachineOperand &DstOp = Copy->getOperand(0);

  Register DstReg = DstOp.getReg();

  if (!DstReg.isVirtual() || DstOp.isDead())

    return;


  // Check if either the dest or source is local. If it's live across a back

  // edge, it's not local. Note that if both vregs are live across the back

  // edge, we cannot successfully contrain the copy without cyclic scheduling.

  // If both the copy's source and dest are local live intervals, then we

  // should treat the dest as the global for the purpose of adding

  // constraints. This adds edges from source's other uses to the copy.

  unsigned LocalReg = SrcReg;

  unsigned GlobalReg = DstReg;

  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);

  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {

    LocalReg = DstReg;

    GlobalReg = SrcReg;

    LocalLI = &LIS->getInterval(LocalReg);

    if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))

      return;

  }

  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);


  // Find the global segment after the start of the local LI.

  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());

  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a

  // local live range. We could create edges from other global uses to the local

  // start, but the coalescer should have already eliminated these cases, so

  // don't bother dealing with it.

  if (GlobalSegment == GlobalLI->end())

    return;


  // If GlobalSegment is killed at the LocalLI->start, the call to find()

  // returned the next global segment. But if GlobalSegment overlaps with

  // LocalLI->start, then advance to the next segment. If a hole in GlobalLI

  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.

  if (GlobalSegment->contains(LocalLI->beginIndex()))

    ++GlobalSegment;


  if (GlobalSegment == GlobalLI->end())

    return;


  // Check if GlobalLI contains a hole in the vicinity of LocalLI.

  if (GlobalSegment != GlobalLI->begin()) {

    // Two address defs have no hole.

    if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,

                               GlobalSegment->start)) {

      return;

    }

    // If the prior global segment may be defined by the same two-address

    // instruction that also defines LocalLI, then can't make a hole here.

    if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,

                               LocalLI->beginIndex())) {

      return;

    }

    // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise

    // it would be a disconnected component in the live range.

    assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&

           "Disconnected LRG within the scheduling region.");

  }

  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);

  if (!GlobalDef)

    return;


  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);

  if (!GlobalSU)

    return;


  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by

  // constraining the uses of the last local def to precede GlobalDef.

  SmallVector<SUnit*,8> LocalUses;

  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());

  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);

  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);

  for (const SDep &Succ : LastLocalSU->Succs) {

    if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)

      continue;

    if (Succ.getSUnit() == GlobalSU)

      continue;

    if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))

      return;

    LocalUses.push_back(Succ.getSUnit());

  }

  // Open the top of the GlobalLI hole by constraining any earlier global uses

  // to precede the start of LocalLI.

  SmallVector<SUnit*,8> GlobalUses;

  MachineInstr *FirstLocalDef =

    LIS->getInstructionFromIndex(LocalLI->beginIndex());

  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);

  for (const SDep &Pred : GlobalSU->Preds) {

    if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)

      continue;

    if (Pred.getSUnit() == FirstLocalSU)

      continue;

    if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))

      return;

    GlobalUses.push_back(Pred.getSUnit());

  }

  LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");

  // Add the weak edges.

  for (SUnit *LU : LocalUses) {

    LLVM_DEBUG(dbgs() << "  Local use SU(" << LU->NodeNum << ") -> SU("

                      << GlobalSU->NodeNum << ")\n");

    DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));

  }

  for (SUnit *GU : GlobalUses) {

    LLVM_DEBUG(dbgs() << "  Global use SU(" << GU->NodeNum << ") -> SU("

                      << FirstLocalSU->NodeNum << ")\n");

    DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));

  }

}


/// Callback from DAG postProcessing to create weak edges to encourage

/// copy elimination.

void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {

  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);

  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");


  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());

  if (FirstPos == DAG->end())

    return;

  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);

  RegionEndIdx = DAG->getLIS()->getInstructionIndex(

      *priorNonDebug(DAG->end(), DAG->begin()));


  for (SUnit &SU : DAG->SUnits) {

    if (!SU.getInstr()->isCopy())

      continue;


    constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));

  }

}


//===----------------------------------------------------------------------===//

// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler

// and possibly other custom schedulers.

//===----------------------------------------------------------------------===//


static const unsigned InvalidCycle = ~0U;


SchedBoundary::~SchedBoundary() { delete HazardRec; }


/// Given a Count of resource usage and a Latency value, return true if a

/// SchedBoundary becomes resource limited.

/// If we are checking after scheduling a node, we should return true when

/// we just reach the resource limit.

static bool checkResourceLimit(unsigned LFactor, unsigned Count,

                               unsigned Latency, bool AfterSchedNode) {

  int ResCntFactor = (int)(Count - (Latency * LFactor));

  if (AfterSchedNode)

    return ResCntFactor >= (int)LFactor;

  else

    return ResCntFactor > (int)LFactor;

}


void SchedBoundary::reset() {

  // A new HazardRec is created for each DAG and owned by SchedBoundary.

  // Destroying and reconstructing it is very expensive though. So keep

  // invalid, placeholder HazardRecs.

  if (HazardRec && HazardRec->isEnabled()) {

    delete HazardRec;

    HazardRec = nullptr;

  }

  Available.clear();

  Pending.clear();

  CheckPending = false;

  CurrCycle = 0;

  CurrMOps = 0;

  MinReadyCycle = std::numeric_limits<unsigned>::max();

  ExpectedLatency = 0;

  DependentLatency = 0;

  RetiredMOps = 0;

  MaxExecutedResCount = 0;

  ZoneCritResIdx = 0;

  IsResourceLimited = false;

  ReservedCycles.clear();

  ReservedResourceSegments.clear();

  ReservedCyclesIndex.clear();

  ResourceGroupSubUnitMasks.clear();

#if LLVM_ENABLE_ABI_BREAKING_CHECKS

  // Track the maximum number of stall cycles that could arise either from the

  // latency of a DAG edge or the number of cycles that a processor resource is

  // reserved (SchedBoundary::ReservedCycles).

  MaxObservedStall = 0;

#endif

  // Reserve a zero-count for invalid CritResIdx.

  ExecutedResCounts.resize(1);

  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");

}


void SchedRemainder::

init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {

  reset();

  if (!SchedModel->hasInstrSchedModel())

    return;

  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());

  for (SUnit &SU : DAG->SUnits) {

    const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);

    RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)

      * SchedModel->getMicroOpFactor();

    for (TargetSchedModel::ProcResIter

           PI = SchedModel->getWriteProcResBegin(SC),

           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {

      unsigned PIdx = PI->ProcResourceIdx;

      unsigned Factor = SchedModel->getResourceFactor(PIdx);

      assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);

      RemainingCounts[PIdx] +=

          (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));

    }

  }

}


void SchedBoundary::

init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {

  reset();

  DAG = dag;

  SchedModel = smodel;

  Rem = rem;

  if (SchedModel->hasInstrSchedModel()) {

    unsigned ResourceCount = SchedModel->getNumProcResourceKinds();

    ReservedCyclesIndex.resize(ResourceCount);

    ExecutedResCounts.resize(ResourceCount);

    ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));

    unsigned NumUnits = 0;


    for (unsigned i = 0; i < ResourceCount; ++i) {

      ReservedCyclesIndex[i] = NumUnits;

      NumUnits += SchedModel->getProcResource(i)->NumUnits;

      if (isUnbufferedGroup(i)) {

        auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;

        for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;

             U != UE; ++U)

          ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);

      }

    }


    ReservedCycles.resize(NumUnits, InvalidCycle);

  }

}


/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat

/// these "soft stalls" differently than the hard stall cycles based on CPU

/// resources and computed by checkHazard(). A fully in-order model

/// (MicroOpBufferSize==0) will not make use of this since instructions are not

/// available for scheduling until they are ready. However, a weaker in-order

/// model may use this for heuristics. For example, if a processor has in-order

/// behavior when reading certain resources, this may come into play.

unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {

  if (!SU->isUnbuffered)

    return 0;


  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);

  if (ReadyCycle > CurrCycle)

    return ReadyCycle - CurrCycle;

  return 0;

}


/// Compute the next cycle at which the given processor resource unit

/// can be scheduled.

unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,

                                                       unsigned ReleaseAtCycle,

                                                       unsigned AcquireAtCycle) {

  if (SchedModel && SchedModel->enableIntervals()) {

    if (isTop())

      return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop(

          CurrCycle, AcquireAtCycle, ReleaseAtCycle);


    return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom(

        CurrCycle, AcquireAtCycle, ReleaseAtCycle);

  }


  unsigned NextUnreserved = ReservedCycles[InstanceIdx];

  // If this resource has never been used, always return cycle zero.

  if (NextUnreserved == InvalidCycle)

    return CurrCycle;

  // For bottom-up scheduling add the cycles needed for the current operation.

  if (!isTop())

    NextUnreserved = std::max(CurrCycle, NextUnreserved + ReleaseAtCycle);

  return NextUnreserved;

}


/// Compute the next cycle at which the given processor resource can be

/// scheduled.  Returns the next cycle and the index of the processor resource

/// instance in the reserved cycles vector.

std::pair<unsigned, unsigned>

SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx,

                                    unsigned ReleaseAtCycle,

                                    unsigned AcquireAtCycle) {

  if (MischedDetailResourceBooking) {

    LLVM_DEBUG(dbgs() << "  Resource booking (@" << CurrCycle << "c): \n");

    LLVM_DEBUG(dumpReservedCycles());

    LLVM_DEBUG(dbgs() << "  getNextResourceCycle (@" << CurrCycle << "c): \n");

  }

  unsigned MinNextUnreserved = InvalidCycle;

  unsigned InstanceIdx = 0;

  unsigned StartIndex = ReservedCyclesIndex[PIdx];

  unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;

  assert(NumberOfInstances > 0 &&

         "Cannot have zero instances of a ProcResource");


  if (isUnbufferedGroup(PIdx)) {

    // If any subunits are used by the instruction, report that the

    // subunits of the resource group are available at the first cycle

    // in which the unit is available, effectively removing the group

    // record from hazarding and basing the hazarding decisions on the

    // subunit records. Otherwise, choose the first available instance

    // from among the subunits.  Specifications which assign cycles to

    // both the subunits and the group or which use an unbuffered

    // group with buffered subunits will appear to schedule

    // strangely. In the first case, the additional cycles for the

    // group will be ignored.  In the second, the group will be

    // ignored entirely.

    for (const MCWriteProcResEntry &PE :

         make_range(SchedModel->getWriteProcResBegin(SC),

                    SchedModel->getWriteProcResEnd(SC)))

      if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])

        return std::make_pair(getNextResourceCycleByInstance(

                                  StartIndex, ReleaseAtCycle, AcquireAtCycle),

                              StartIndex);


    auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;

    for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {

      unsigned NextUnreserved, NextInstanceIdx;

      std::tie(NextUnreserved, NextInstanceIdx) =

          getNextResourceCycle(SC, SubUnits[I], ReleaseAtCycle, AcquireAtCycle);

      if (MinNextUnreserved > NextUnreserved) {

        InstanceIdx = NextInstanceIdx;

        MinNextUnreserved = NextUnreserved;

      }

    }

    return std::make_pair(MinNextUnreserved, InstanceIdx);

  }


  for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;

       ++I) {

    unsigned NextUnreserved =

        getNextResourceCycleByInstance(I, ReleaseAtCycle, AcquireAtCycle);

    if (MischedDetailResourceBooking)

      LLVM_DEBUG(dbgs() << "    Instance " << I - StartIndex << " available @"

                        << NextUnreserved << "c\n");

    if (MinNextUnreserved > NextUnreserved) {

      InstanceIdx = I;

      MinNextUnreserved = NextUnreserved;

    }

  }

  if (MischedDetailResourceBooking)

    LLVM_DEBUG(dbgs() << "    selecting " << SchedModel->getResourceName(PIdx)

                      << "[" << InstanceIdx - StartIndex << "]"

                      << " available @" << MinNextUnreserved << "c"

                      << "\n");

  return std::make_pair(MinNextUnreserved, InstanceIdx);

}


/// Does this SU have a hazard within the current instruction group.

///

/// The scheduler supports two modes of hazard recognition. The first is the

/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that

/// supports highly complicated in-order reservation tables

/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.

///

/// The second is a streamlined mechanism that checks for hazards based on

/// simple counters that the scheduler itself maintains. It explicitly checks

/// for instruction dispatch limitations, including the number of micro-ops that

/// can dispatch per cycle.

///

/// TODO: Also check whether the SU must start a new group.

bool SchedBoundary::checkHazard(SUnit *SU) {

  if (HazardRec->isEnabled()

      && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {

    return true;

  }


  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());

  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {

    LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="

                      << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');

    return true;

  }


  if (CurrMOps > 0 &&

      ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||

       (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {

    LLVM_DEBUG(dbgs() << "  hazard: SU(" << SU->NodeNum << ") must "

                      << (isTop() ? "begin" : "end") << " group\n");

    return true;

  }


  if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {

    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);

    for (const MCWriteProcResEntry &PE :

          make_range(SchedModel->getWriteProcResBegin(SC),

                     SchedModel->getWriteProcResEnd(SC))) {

      unsigned ResIdx = PE.ProcResourceIdx;

      unsigned ReleaseAtCycle = PE.ReleaseAtCycle;

      unsigned AcquireAtCycle = PE.AcquireAtCycle;

      unsigned NRCycle, InstanceIdx;

      std::tie(NRCycle, InstanceIdx) =

          getNextResourceCycle(SC, ResIdx, ReleaseAtCycle, AcquireAtCycle);

      if (NRCycle > CurrCycle) {

#if LLVM_ENABLE_ABI_BREAKING_CHECKS

        MaxObservedStall = std::max(ReleaseAtCycle, MaxObservedStall);

#endif

        LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "

                          << SchedModel->getResourceName(ResIdx)

                          << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx]  << ']'

                          << "=" << NRCycle << "c\n");

        return true;

      }

    }

  }

  return false;

}


// Find the unscheduled node in ReadySUs with the highest latency.

unsigned SchedBoundary::

findMaxLatency(ArrayRef<SUnit*> ReadySUs) {

  SUnit *LateSU = nullptr;

  unsigned RemLatency = 0;

  for (SUnit *SU : ReadySUs) {

    unsigned L = getUnscheduledLatency(SU);

    if (L > RemLatency) {

      RemLatency = L;

      LateSU = SU;

    }

  }

  if (LateSU) {

    LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("

                      << LateSU->NodeNum << ") " << RemLatency << "c\n");

  }

  return RemLatency;

}


// Count resources in this zone and the remaining unscheduled

// instruction. Return the max count, scaled. Set OtherCritIdx to the critical

// resource index, or zero if the zone is issue limited.

unsigned SchedBoundary::

getOtherResourceCount(unsigned &OtherCritIdx) {

  OtherCritIdx = 0;

  if (!SchedModel->hasInstrSchedModel())

    return 0;


  unsigned OtherCritCount = Rem->RemIssueCount

    + (RetiredMOps * SchedModel->getMicroOpFactor());

  LLVM_DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "

                    << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');

  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();

       PIdx != PEnd; ++PIdx) {

    unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];

    if (OtherCount > OtherCritCount) {

      OtherCritCount = OtherCount;

      OtherCritIdx = PIdx;

    }

  }

  if (OtherCritIdx) {

    LLVM_DEBUG(

        dbgs() << "  " << Available.getName() << " + Remain CritRes: "

               << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)

               << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");

  }

  return OtherCritCount;

}


void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,

                                unsigned Idx) {

  assert(SU->getInstr() && "Scheduled SUnit must have instr");


#if LLVM_ENABLE_ABI_BREAKING_CHECKS

  // ReadyCycle was been bumped up to the CurrCycle when this node was

  // scheduled, but CurrCycle may have been eagerly advanced immediately after

  // scheduling, so may now be greater than ReadyCycle.

  if (ReadyCycle > CurrCycle)

    MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);

#endif


  if (ReadyCycle < MinReadyCycle)

    MinReadyCycle = ReadyCycle;


  // Check for interlocks first. For the purpose of other heuristics, an

  // instruction that cannot issue appears as if it's not in the ReadyQueue.

  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;

  bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||

                        checkHazard(SU) || (Available.size() >= ReadyListLimit);


  if (!HazardDetected) {

    Available.push(SU);


    if (InPQueue)

      Pending.remove(Pending.begin() + Idx);

    return;

  }


  if (!InPQueue)

    Pending.push(SU);

}


/// Move the boundary of scheduled code by one cycle.

void SchedBoundary::bumpCycle(unsigned NextCycle) {

  if (SchedModel->getMicroOpBufferSize() == 0) {

    assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&

           "MinReadyCycle uninitialized");

    if (MinReadyCycle > NextCycle)

      NextCycle = MinReadyCycle;

  }

  // Update the current micro-ops, which will issue in the next cycle.

  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);

  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;


  // Decrement DependentLatency based on the next cycle.

  if ((NextCycle - CurrCycle) > DependentLatency)

    DependentLatency = 0;

  else

    DependentLatency -= (NextCycle - CurrCycle);


  if (!HazardRec->isEnabled()) {

    // Bypass HazardRec virtual calls.

    CurrCycle = NextCycle;

  } else {

    // Bypass getHazardType calls in case of long latency.

    for (; CurrCycle != NextCycle; ++CurrCycle) {

      if (isTop())

        HazardRec->AdvanceCycle();

      else

        HazardRec->RecedeCycle();

    }

  }

  CheckPending = true;

  IsResourceLimited =

      checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),

                         getScheduledLatency(), true);


  LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()

                    << '\n');

}


void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {

  ExecutedResCounts[PIdx] += Count;

  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)

    MaxExecutedResCount = ExecutedResCounts[PIdx];

}


/// Add the given processor resource to this scheduled zone.

///

/// \param ReleaseAtCycle indicates the number of consecutive (non-pipelined)

/// cycles during which this resource is released.

///

/// \param AcquireAtCycle indicates the number of consecutive (non-pipelined)

/// cycles at which the resource is aquired after issue (assuming no stalls).

///

/// \return the next cycle at which the instruction may execute without

/// oversubscribing resources.

unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,

                                      unsigned ReleaseAtCycle,

                                      unsigned NextCycle,

                                      unsigned AcquireAtCycle) {

  unsigned Factor = SchedModel->getResourceFactor(PIdx);

  unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);

  LLVM_DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx) << " +"

                    << ReleaseAtCycle << "x" << Factor << "u\n");


  // Update Executed resources counts.

  incExecutedResources(PIdx, Count);

  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");

  Rem->RemainingCounts[PIdx] -= Count;


  // Check if this resource exceeds the current critical resource. If so, it

  // becomes the critical resource.

  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {

    ZoneCritResIdx = PIdx;

    LLVM_DEBUG(dbgs() << "  *** Critical resource "

                      << SchedModel->getResourceName(PIdx) << ": "

                      << getResourceCount(PIdx) / SchedModel->getLatencyFactor()

                      << "c\n");

  }

  // For reserved resources, record the highest cycle using the resource.

  unsigned NextAvailable, InstanceIdx;

  std::tie(NextAvailable, InstanceIdx) =

      getNextResourceCycle(SC, PIdx, ReleaseAtCycle, AcquireAtCycle);

  if (NextAvailable > CurrCycle) {

    LLVM_DEBUG(dbgs() << "  Resource conflict: "

                      << SchedModel->getResourceName(PIdx)

                      << '[' << InstanceIdx - ReservedCyclesIndex[PIdx]  << ']'

                      << " reserved until @" << NextAvailable << "\n");

  }

  return NextAvailable;

}


/// Move the boundary of scheduled code by one SUnit.

void SchedBoundary::bumpNode(SUnit *SU) {

  // Update the reservation table.

  if (HazardRec->isEnabled()) {

    if (!isTop() && SU->isCall) {

      // Calls are scheduled with their preceding instructions. For bottom-up

      // scheduling, clear the pipeline state before emitting.

      HazardRec->Reset();

    }

    HazardRec->EmitInstruction(SU);

    // Scheduling an instruction may have made pending instructions available.

    CheckPending = true;

  }

  // checkHazard should prevent scheduling multiple instructions per cycle that

  // exceed the issue width.

  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);

  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());

  assert(

      (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&

      "Cannot schedule this instruction's MicroOps in the current cycle.");


  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);

  LLVM_DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");


  unsigned NextCycle = CurrCycle;

  switch (SchedModel->getMicroOpBufferSize()) {

  case 0:

    assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");

    break;

  case 1:

    if (ReadyCycle > NextCycle) {

      NextCycle = ReadyCycle;

      LLVM_DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");

    }

    break;

  default:

    // We don't currently model the OOO reorder buffer, so consider all

    // scheduled MOps to be "retired". We do loosely model in-order resource

    // latency. If this instruction uses an in-order resource, account for any

    // likely stall cycles.

    if (SU->isUnbuffered && ReadyCycle > NextCycle)

      NextCycle = ReadyCycle;

    break;

  }

  RetiredMOps += IncMOps;


  // Update resource counts and critical resource.

  if (SchedModel->hasInstrSchedModel()) {

    unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();

    assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");

    Rem->RemIssueCount -= DecRemIssue;

    if (ZoneCritResIdx) {

      // Scale scheduled micro-ops for comparing with the critical resource.

      unsigned ScaledMOps =

        RetiredMOps * SchedModel->getMicroOpFactor();


      // If scaled micro-ops are now more than the previous critical resource by

      // a full cycle, then micro-ops issue becomes critical.

      if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))

          >= (int)SchedModel->getLatencyFactor()) {

        ZoneCritResIdx = 0;

        LLVM_DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "

                          << ScaledMOps / SchedModel->getLatencyFactor()

                          << "c\n");

      }

    }

    for (TargetSchedModel::ProcResIter

           PI = SchedModel->getWriteProcResBegin(SC),

           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {

      unsigned RCycle =

          countResource(SC, PI->ProcResourceIdx, PI->ReleaseAtCycle, NextCycle,

                        PI->AcquireAtCycle);

      if (RCycle > NextCycle)

        NextCycle = RCycle;

    }

    if (SU->hasReservedResource) {

      // For reserved resources, record the highest cycle using the resource.

      // For top-down scheduling, this is the cycle in which we schedule this

      // instruction plus the number of cycles the operations reserves the

      // resource. For bottom-up is it simply the instruction's cycle.

      for (TargetSchedModel::ProcResIter

             PI = SchedModel->getWriteProcResBegin(SC),

             PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {

        unsigned PIdx = PI->ProcResourceIdx;

        if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {


          if (SchedModel && SchedModel->enableIntervals()) {

            unsigned ReservedUntil, InstanceIdx;

            std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(

                SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);

            if (isTop()) {

              ReservedResourceSegments[InstanceIdx].add(

                  ResourceSegments::getResourceIntervalTop(

                      NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),

                  MIResourceCutOff);

            } else {

              ReservedResourceSegments[InstanceIdx].add(

                  ResourceSegments::getResourceIntervalBottom(

                      NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),

                  MIResourceCutOff);

            }

          } else {


            unsigned ReservedUntil, InstanceIdx;

            std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(

                SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);

            if (isTop()) {

              ReservedCycles[InstanceIdx] =

                  std::max(ReservedUntil, NextCycle + PI->ReleaseAtCycle);

            } else

              ReservedCycles[InstanceIdx] = NextCycle;

          }

        }

      }

    }

  }

  // Update ExpectedLatency and DependentLatency.

  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;

  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;

  if (SU->getDepth() > TopLatency) {

    TopLatency = SU->getDepth();

    LLVM_DEBUG(dbgs() << "  " << Available.getName() << " TopLatency SU("

                      << SU->NodeNum << ") " << TopLatency << "c\n");

  }

  if (SU->getHeight() > BotLatency) {

    BotLatency = SU->getHeight();

    LLVM_DEBUG(dbgs() << "  " << Available.getName() << " BotLatency SU("

                      << SU->NodeNum << ") " << BotLatency << "c\n");

  }

  // If we stall for any reason, bump the cycle.

  if (NextCycle > CurrCycle)

    bumpCycle(NextCycle);

  else

    // After updating ZoneCritResIdx and ExpectedLatency, check if we're

    // resource limited. If a stall occurred, bumpCycle does this.

    IsResourceLimited =

        checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),

                           getScheduledLatency(), true);


  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle

  // resets CurrMOps. Loop to handle instructions with more MOps than issue in

  // one cycle.  Since we commonly reach the max MOps here, opportunistically

  // bump the cycle to avoid uselessly checking everything in the readyQ.

  CurrMOps += IncMOps;


  // Bump the cycle count for issue group constraints.

  // This must be done after NextCycle has been adjust for all other stalls.

  // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set

  // currCycle to X.

  if ((isTop() &&  SchedModel->mustEndGroup(SU->getInstr())) ||

      (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {

    LLVM_DEBUG(dbgs() << "  Bump cycle to " << (isTop() ? "end" : "begin")

                      << " group\n");

    bumpCycle(++NextCycle);

  }


  while (CurrMOps >= SchedModel->getIssueWidth()) {

    LLVM_DEBUG(dbgs() << "  *** Max MOps " << CurrMOps << " at cycle "

                      << CurrCycle << '\n');

    bumpCycle(++NextCycle);

  }

  LLVM_DEBUG(dumpScheduledState());

}


/// Release pending ready nodes in to the available queue. This makes them

/// visible to heuristics.

void SchedBoundary::releasePending() {

  // If the available queue is empty, it is safe to reset MinReadyCycle.

  if (Available.empty())

    MinReadyCycle = std::numeric_limits<unsigned>::max();


  // Check to see if any of the pending instructions are ready to issue.  If

  // so, add them to the available queue.

  for (unsigned I = 0, E = Pending.size(); I < E; ++I) {

    SUnit *SU = *(Pending.begin() + I);

    unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;


    if (ReadyCycle < MinReadyCycle)

      MinReadyCycle = ReadyCycle;


    if (Available.size() >= ReadyListLimit)

      break;


    releaseNode(SU, ReadyCycle, true, I);

    if (E != Pending.size()) {

      --I;

      --E;

    }

  }

  CheckPending = false;

}


/// Remove SU from the ready set for this boundary.

void SchedBoundary::removeReady(SUnit *SU) {

  if (Available.isInQueue(SU))

    Available.remove(Available.find(SU));

  else {

    assert(Pending.isInQueue(SU) && "bad ready count");

    Pending.remove(Pending.find(SU));

  }

}


/// If this queue only has one ready candidate, return it. As a side effect,

/// defer any nodes that now hit a hazard, and advance the cycle until at least

/// one node is ready. If multiple instructions are ready, return NULL.

SUnit *SchedBoundary::pickOnlyChoice() {

  if (CheckPending)

    releasePending();


  // Defer any ready instrs that now have a hazard.

  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {

    if (checkHazard(*I)) {

      Pending.push(*I);

      I = Available.remove(I);

      continue;

    }

    ++I;

  }

  for (unsigned i = 0; Available.empty(); ++i) {

//  FIXME: Re-enable assert once PR20057 is resolved.

//    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&

//           "permanent hazard");

    (void)i;

    bumpCycle(CurrCycle + 1);

    releasePending();

  }


  LLVM_DEBUG(Pending.dump());

  LLVM_DEBUG(Available.dump());


  if (Available.size() == 1)

    return *Available.begin();

  return nullptr;

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


/// Dump the content of the \ref ReservedCycles vector for the

/// resources that are used in the basic block.

///

LLVM_DUMP_METHOD void SchedBoundary::dumpReservedCycles() const {

  if (!SchedModel->hasInstrSchedModel())

    return;


  unsigned ResourceCount = SchedModel->getNumProcResourceKinds();

  unsigned StartIdx = 0;


  for (unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) {

    const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits;

    std::string ResName = SchedModel->getResourceName(ResIdx);

    for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {

      dbgs() << ResName << "(" << UnitIdx << ") = ";

      if (SchedModel && SchedModel->enableIntervals()) {

        if (ReservedResourceSegments.count(StartIdx + UnitIdx))

          dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx);

        else

          dbgs() << "{ }\n";

      } else

        dbgs() << ReservedCycles[StartIdx + UnitIdx] << "\n";

    }

    StartIdx += NumUnits;

  }

}


// This is useful information to dump after bumpNode.

// Note that the Queue contents are more useful before pickNodeFromQueue.

LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {

  unsigned ResFactor;

  unsigned ResCount;

  if (ZoneCritResIdx) {

    ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);

    ResCount = getResourceCount(ZoneCritResIdx);

  } else {

    ResFactor = SchedModel->getMicroOpFactor();

    ResCount = RetiredMOps * ResFactor;

  }

  unsigned LFactor = SchedModel->getLatencyFactor();

  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"

         << "  Retired: " << RetiredMOps;

  dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";

  dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "

         << ResCount / ResFactor << " "

         << SchedModel->getResourceName(ZoneCritResIdx)

         << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"

         << (IsResourceLimited ? "  - Resource" : "  - Latency")

         << " limited.\n";

  if (MISchedDumpReservedCycles)

    dumpReservedCycles();

}

#endif


//===----------------------------------------------------------------------===//

// GenericScheduler - Generic implementation of MachineSchedStrategy.

//===----------------------------------------------------------------------===//


void GenericSchedulerBase::SchedCandidate::

initResourceDelta(const ScheduleDAGMI *DAG,

                  const TargetSchedModel *SchedModel) {

  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)

    return;


  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);

  for (TargetSchedModel::ProcResIter

         PI = SchedModel->getWriteProcResBegin(SC),

         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {

    if (PI->ProcResourceIdx == Policy.ReduceResIdx)

      ResDelta.CritResources += PI->ReleaseAtCycle;

    if (PI->ProcResourceIdx == Policy.DemandResIdx)

      ResDelta.DemandedResources += PI->ReleaseAtCycle;

  }

}


/// Compute remaining latency. We need this both to determine whether the

/// overall schedule has become latency-limited and whether the instructions

/// outside this zone are resource or latency limited.

///

/// The "dependent" latency is updated incrementally during scheduling as the

/// max height/depth of scheduled nodes minus the cycles since it was

/// scheduled:

///   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone

///

/// The "independent" latency is the max ready queue depth:

///   ILat = max N.depth for N in Available|Pending

///

/// RemainingLatency is the greater of independent and dependent latency.

///

/// These computations are expensive, especially in DAGs with many edges, so

/// only do them if necessary.

static unsigned computeRemLatency(SchedBoundary &CurrZone) {

  unsigned RemLatency = CurrZone.getDependentLatency();

  RemLatency = std::max(RemLatency,

                        CurrZone.findMaxLatency(CurrZone.Available.elements()));

  RemLatency = std::max(RemLatency,

                        CurrZone.findMaxLatency(CurrZone.Pending.elements()));

  return RemLatency;

}


/// Returns true if the current cycle plus remaning latency is greater than

/// the critical path in the scheduling region.

bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,

                                               SchedBoundary &CurrZone,

                                               bool ComputeRemLatency,

                                               unsigned &RemLatency) const {

  // The current cycle is already greater than the critical path, so we are

  // already latency limited and don't need to compute the remaining latency.

  if (CurrZone.getCurrCycle() > Rem.CriticalPath)

    return true;


  // If we haven't scheduled anything yet, then we aren't latency limited.

  if (CurrZone.getCurrCycle() == 0)

    return false;


  if (ComputeRemLatency)

    RemLatency = computeRemLatency(CurrZone);


  return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;

}


/// Set the CandPolicy given a scheduling zone given the current resources and

/// latencies inside and outside the zone.

void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,

                                     SchedBoundary &CurrZone,

                                     SchedBoundary *OtherZone) {

  // Apply preemptive heuristics based on the total latency and resources

  // inside and outside this zone. Potential stalls should be considered before

  // following this policy.


  // Compute the critical resource outside the zone.

  unsigned OtherCritIdx = 0;

  unsigned OtherCount =

    OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;


  bool OtherResLimited = false;

  unsigned RemLatency = 0;

  bool RemLatencyComputed = false;

  if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {

    RemLatency = computeRemLatency(CurrZone);

    RemLatencyComputed = true;

    OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),

                                         OtherCount, RemLatency, false);

  }


  // Schedule aggressively for latency in PostRA mode. We don't check for

  // acyclic latency during PostRA, and highly out-of-order processors will

  // skip PostRA scheduling.

  if (!OtherResLimited &&

      (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,

                                       RemLatency))) {

    Policy.ReduceLatency |= true;

    LLVM_DEBUG(dbgs() << "  " << CurrZone.Available.getName()

                      << " RemainingLatency " << RemLatency << " + "

                      << CurrZone.getCurrCycle() << "c > CritPath "

                      << Rem.CriticalPath << "\n");

  }

  // If the same resource is limiting inside and outside the zone, do nothing.

  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)

    return;


  LLVM_DEBUG(if (CurrZone.isResourceLimited()) {

    dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "

           << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";

  } if (OtherResLimited) dbgs()

                 << "  RemainingLimit: "

                 << SchedModel->getResourceName(OtherCritIdx) << "\n";

             if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()

             << "  Latency limited both directions.\n");


  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)

    Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();


  if (OtherResLimited)

    Policy.DemandResIdx = OtherCritIdx;

}


#ifndef NDEBUG

const char *GenericSchedulerBase::getReasonStr(

  GenericSchedulerBase::CandReason Reason) {

  switch (Reason) {

  case NoCand:         return "NOCAND    ";

  case Only1:          return "ONLY1     ";

  case PhysReg:        return "PHYS-REG  ";

  case RegExcess:      return "REG-EXCESS";

  case RegCritical:    return "REG-CRIT  ";

  case Stall:          return "STALL     ";

  case Cluster:        return "CLUSTER   ";

  case Weak:           return "WEAK      ";

  case RegMax:         return "REG-MAX   ";

  case ResourceReduce: return "RES-REDUCE";

  case ResourceDemand: return "RES-DEMAND";

  case TopDepthReduce: return "TOP-DEPTH ";

  case TopPathReduce:  return "TOP-PATH  ";

  case BotHeightReduce:return "BOT-HEIGHT";

  case BotPathReduce:  return "BOT-PATH  ";

  case NextDefUse:     return "DEF-USE   ";

  case NodeOrder:      return "ORDER     ";

  };

  llvm_unreachable("Unknown reason!");

}


void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {

  PressureChange P;

  unsigned ResIdx = 0;

  unsigned Latency = 0;

  switch (Cand.Reason) {

  default:

    break;

  case RegExcess:

    P = Cand.RPDelta.Excess;

    break;

  case RegCritical:

    P = Cand.RPDelta.CriticalMax;

    break;

  case RegMax:

    P = Cand.RPDelta.CurrentMax;

    break;

  case ResourceReduce:

    ResIdx = Cand.Policy.ReduceResIdx;

    break;

  case ResourceDemand:

    ResIdx = Cand.Policy.DemandResIdx;

    break;

  case TopDepthReduce:

    Latency = Cand.SU->getDepth();

    break;

  case TopPathReduce:

    Latency = Cand.SU->getHeight();

    break;

  case BotHeightReduce:

    Latency = Cand.SU->getHeight();

    break;

  case BotPathReduce:

    Latency = Cand.SU->getDepth();

    break;

  }

  dbgs() << "  Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);

  if (P.isValid())

    dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())

           << ":" << P.getUnitInc() << " ";

  else

    dbgs() << "      ";

  if (ResIdx)

    dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";

  else

    dbgs() << "         ";

  if (Latency)

    dbgs() << " " << Latency << " cycles ";

  else

    dbgs() << "          ";

  dbgs() << '\n';

}

#endif


namespace llvm {

/// Return true if this heuristic determines order.

/// TODO: Consider refactor return type of these functions as integer or enum,

/// as we may need to differentiate whether TryCand is better than Cand.

bool tryLess(int TryVal, int CandVal,

             GenericSchedulerBase::SchedCandidate &TryCand,

             GenericSchedulerBase::SchedCandidate &Cand,

             GenericSchedulerBase::CandReason Reason) {

  if (TryVal < CandVal) {

    TryCand.Reason = Reason;

    return true;

  }

  if (TryVal > CandVal) {

    if (Cand.Reason > Reason)

      Cand.Reason = Reason;

    return true;

  }

  return false;

}


bool tryGreater(int TryVal, int CandVal,

                GenericSchedulerBase::SchedCandidate &TryCand,

                GenericSchedulerBase::SchedCandidate &Cand,

                GenericSchedulerBase::CandReason Reason) {

  if (TryVal > CandVal) {

    TryCand.Reason = Reason;

    return true;

  }

  if (TryVal < CandVal) {

    if (Cand.Reason > Reason)

      Cand.Reason = Reason;

    return true;

  }

  return false;

}


bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,

                GenericSchedulerBase::SchedCandidate &Cand,

                SchedBoundary &Zone) {

  if (Zone.isTop()) {

    // Prefer the candidate with the lesser depth, but only if one of them has

    // depth greater than the total latency scheduled so far, otherwise either

    // of them could be scheduled now with no stall.

    if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >

        Zone.getScheduledLatency()) {

      if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),

                  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))

        return true;

    }

    if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),

                   TryCand, Cand, GenericSchedulerBase::TopPathReduce))

      return true;

  } else {

    // Prefer the candidate with the lesser height, but only if one of them has

    // height greater than the total latency scheduled so far, otherwise either

    // of them could be scheduled now with no stall.

    if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >

        Zone.getScheduledLatency()) {

      if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),

                  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))

        return true;

    }

    if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),

                   TryCand, Cand, GenericSchedulerBase::BotPathReduce))

      return true;

  }

  return false;

}

} // end namespace llvm


static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {

  LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")

                    << GenericSchedulerBase::getReasonStr(Reason) << '\n');

}


static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {

  tracePick(Cand.Reason, Cand.AtTop);

}


void GenericScheduler::initialize(ScheduleDAGMI *dag) {

  assert(dag->hasVRegLiveness() &&

         "(PreRA)GenericScheduler needs vreg liveness");

  DAG = static_cast<ScheduleDAGMILive*>(dag);

  SchedModel = DAG->getSchedModel();

  TRI = DAG->TRI;


  if (RegionPolicy.ComputeDFSResult)

    DAG->computeDFSResult();


  Rem.init(DAG, SchedModel);

  Top.init(DAG, SchedModel, &Rem);

  Bot.init(DAG, SchedModel, &Rem);


  // Initialize resource counts.


  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or

  // are disabled, then these HazardRecs will be disabled.

  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();

  if (!Top.HazardRec) {

    Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);

  }

  if (!Bot.HazardRec) {

    Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);

  }

  TopCand.SU = nullptr;

  BotCand.SU = nullptr;

}


/// Initialize the per-region scheduling policy.

void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,

                                  MachineBasicBlock::iterator End,

                                  unsigned NumRegionInstrs) {

  const MachineFunction &MF = *Begin->getMF();

  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();


  // Avoid setting up the register pressure tracker for small regions to save

  // compile time. As a rough heuristic, only track pressure when the number of

  // schedulable instructions exceeds half the allocatable integer register file

  // that is the largest legal integer regiser type.

  RegionPolicy.ShouldTrackPressure = true;

  for (unsigned VT = MVT::i64; VT > (unsigned)MVT::i1; --VT) {

    MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;

    if (TLI->isTypeLegal(LegalIntVT)) {

      unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(

        TLI->getRegClassFor(LegalIntVT));

      RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);

      break;

    }

  }


  // For generic targets, we default to bottom-up, because it's simpler and more

  // compile-time optimizations have been implemented in that direction.

  RegionPolicy.OnlyBottomUp = true;


  // Allow the subtarget to override default policy.

  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);


  // After subtarget overrides, apply command line options.

  if (!EnableRegPressure) {

    RegionPolicy.ShouldTrackPressure = false;

    RegionPolicy.ShouldTrackLaneMasks = false;

  }


  if (PreRADirection == MISched::TopDown) {

    RegionPolicy.OnlyTopDown = true;

    RegionPolicy.OnlyBottomUp = false;

  } else if (PreRADirection == MISched::BottomUp) {

    RegionPolicy.OnlyTopDown = false;

    RegionPolicy.OnlyBottomUp = true;

  } else if (PreRADirection == MISched::Bidirectional) {

    RegionPolicy.OnlyBottomUp = false;

    RegionPolicy.OnlyTopDown = false;

  }

}


void GenericScheduler::dumpPolicy() const {

  // Cannot completely remove virtual function even in release mode.

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

  dbgs() << "GenericScheduler RegionPolicy: "

         << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure

         << " OnlyTopDown=" << RegionPolicy.OnlyTopDown

         << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp

         << "\n";

#endif

}


/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic

/// critical path by more cycles than it takes to drain the instruction buffer.

/// We estimate an upper bounds on in-flight instructions as:

///

/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )

/// InFlightIterations = AcyclicPath / CyclesPerIteration

/// InFlightResources = InFlightIterations * LoopResources

///

/// TODO: Check execution resources in addition to IssueCount.

void GenericScheduler::checkAcyclicLatency() {

  if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)

    return;


  // Scaled number of cycles per loop iteration.

  unsigned IterCount =

    std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),

             Rem.RemIssueCount);

  // Scaled acyclic critical path.

  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();

  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop

  unsigned InFlightCount =

    (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;

  unsigned BufferLimit =

    SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();


  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;


  LLVM_DEBUG(

      dbgs() << "IssueCycles="

             << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "

             << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()

             << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount

             << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()

             << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";

      if (Rem.IsAcyclicLatencyLimited) dbgs() << "  ACYCLIC LATENCY LIMIT\n");

}


void GenericScheduler::registerRoots() {

  Rem.CriticalPath = DAG->ExitSU.getDepth();


  // Some roots may not feed into ExitSU. Check all of them in case.

  for (const SUnit *SU : Bot.Available) {

    if (SU->getDepth() > Rem.CriticalPath)

      Rem.CriticalPath = SU->getDepth();

  }

  LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');

  if (DumpCriticalPathLength) {

    errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";

  }


  if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {

    Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();

    checkAcyclicLatency();

  }

}


namespace llvm {

bool tryPressure(const PressureChange &TryP,

                 const PressureChange &CandP,

                 GenericSchedulerBase::SchedCandidate &TryCand,

                 GenericSchedulerBase::SchedCandidate &Cand,

                 GenericSchedulerBase::CandReason Reason,

                 const TargetRegisterInfo *TRI,

                 const MachineFunction &MF) {

  // If one candidate decreases and the other increases, go with it.

  // Invalid candidates have UnitInc==0.

  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,

                 Reason)) {

    return true;

  }

  // Do not compare the magnitude of pressure changes between top and bottom

  // boundary.

  if (Cand.AtTop != TryCand.AtTop)

    return false;


  // If both candidates affect the same set in the same boundary, go with the

  // smallest increase.

  unsigned TryPSet = TryP.getPSetOrMax();

  unsigned CandPSet = CandP.getPSetOrMax();

  if (TryPSet == CandPSet) {

    return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,

                   Reason);

  }


  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :

                                 std::numeric_limits<int>::max();


  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :

                                   std::numeric_limits<int>::max();


  // If the candidates are decreasing pressure, reverse priority.

  if (TryP.getUnitInc() < 0)

    std::swap(TryRank, CandRank);

  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);

}


unsigned getWeakLeft(const SUnit *SU, bool isTop) {

  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;

}


/// Minimize physical register live ranges. Regalloc wants them adjacent to

/// their physreg def/use.

///

/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf

/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled

/// with the operation that produces or consumes the physreg. We'll do this when

/// regalloc has support for parallel copies.

int biasPhysReg(const SUnit *SU, bool isTop) {

  const MachineInstr *MI = SU->getInstr();


  if (MI->isCopy()) {

    unsigned ScheduledOper = isTop ? 1 : 0;

    unsigned UnscheduledOper = isTop ? 0 : 1;

    // If we have already scheduled the physreg produce/consumer, immediately

    // schedule the copy.

    if (MI->getOperand(ScheduledOper).getReg().isPhysical())

      return 1;

    // If the physreg is at the boundary, defer it. Otherwise schedule it

    // immediately to free the dependent. We can hoist the copy later.

    bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;

    if (MI->getOperand(UnscheduledOper).getReg().isPhysical())

      return AtBoundary ? -1 : 1;

  }


  if (MI->isMoveImmediate()) {

    // If we have a move immediate and all successors have been assigned, bias

    // towards scheduling this later. Make sure all register defs are to

    // physical registers.

    bool DoBias = true;

    for (const MachineOperand &Op : MI->defs()) {

      if (Op.isReg() && !Op.getReg().isPhysical()) {

        DoBias = false;

        break;

      }

    }


    if (DoBias)

      return isTop ? -1 : 1;

  }


  return 0;

}

} // end namespace llvm


void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,

                                     bool AtTop,

                                     const RegPressureTracker &RPTracker,

                                     RegPressureTracker &TempTracker) {

  Cand.SU = SU;

  Cand.AtTop = AtTop;

  if (DAG->isTrackingPressure()) {

    if (AtTop) {

      TempTracker.getMaxDownwardPressureDelta(

        Cand.SU->getInstr(),

        Cand.RPDelta,

        DAG->getRegionCriticalPSets(),

        DAG->getRegPressure().MaxSetPressure);

    } else {

      if (VerifyScheduling) {

        TempTracker.getMaxUpwardPressureDelta(

          Cand.SU->getInstr(),

          &DAG->getPressureDiff(Cand.SU),

          Cand.RPDelta,

          DAG->getRegionCriticalPSets(),

          DAG->getRegPressure().MaxSetPressure);

      } else {

        RPTracker.getUpwardPressureDelta(

          Cand.SU->getInstr(),

          DAG->getPressureDiff(Cand.SU),

          Cand.RPDelta,

          DAG->getRegionCriticalPSets(),

          DAG->getRegPressure().MaxSetPressure);

      }

    }

  }

  LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()

             << "  Try  SU(" << Cand.SU->NodeNum << ") "

             << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"

             << Cand.RPDelta.Excess.getUnitInc() << "\n");

}


/// Apply a set of heuristics to a new candidate. Heuristics are currently

/// hierarchical. This may be more efficient than a graduated cost model because

/// we don't need to evaluate all aspects of the model for each node in the

/// queue. But it's really done to make the heuristics easier to debug and

/// statistically analyze.

///

/// \param Cand provides the policy and current best candidate.

/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.

/// \param Zone describes the scheduled zone that we are extending, or nullptr

///             if Cand is from a different zone than TryCand.

/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)

bool GenericScheduler::tryCandidate(SchedCandidate &Cand,

                                    SchedCandidate &TryCand,

                                    SchedBoundary *Zone) const {

  // Initialize the candidate if needed.

  if (!Cand.isValid()) {

    TryCand.Reason = NodeOrder;

    return true;

  }


  // Bias PhysReg Defs and copies to their uses and defined respectively.

  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),

                 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))

    return TryCand.Reason != NoCand;


  // Avoid exceeding the target's limit.

  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,

                                               Cand.RPDelta.Excess,

                                               TryCand, Cand, RegExcess, TRI,

                                               DAG->MF))

    return TryCand.Reason != NoCand;


  // Avoid increasing the max critical pressure in the scheduled region.

  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,

                                               Cand.RPDelta.CriticalMax,

                                               TryCand, Cand, RegCritical, TRI,

                                               DAG->MF))

    return TryCand.Reason != NoCand;


  // We only compare a subset of features when comparing nodes between

  // Top and Bottom boundary. Some properties are simply incomparable, in many

  // other instances we should only override the other boundary if something

  // is a clear good pick on one boundary. Skip heuristics that are more

  // "tie-breaking" in nature.

  bool SameBoundary = Zone != nullptr;

  if (SameBoundary) {

    // For loops that are acyclic path limited, aggressively schedule for

    // latency. Within an single cycle, whenever CurrMOps > 0, allow normal

    // heuristics to take precedence.

    if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&

        tryLatency(TryCand, Cand, *Zone))

      return TryCand.Reason != NoCand;


    // Prioritize instructions that read unbuffered resources by stall cycles.

    if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),

                Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))

      return TryCand.Reason != NoCand;

  }


  // Keep clustered nodes together to encourage downstream peephole

  // optimizations which may reduce resource requirements.

  //

  // This is a best effort to set things up for a post-RA pass. Optimizations

  // like generating loads of multiple registers should ideally be done within

  // the scheduler pass by combining the loads during DAG postprocessing.

  const SUnit *CandNextClusterSU =

    Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();

  const SUnit *TryCandNextClusterSU =

    TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();

  if (tryGreater(TryCand.SU == TryCandNextClusterSU,

                 Cand.SU == CandNextClusterSU,

                 TryCand, Cand, Cluster))

    return TryCand.Reason != NoCand;


  if (SameBoundary) {

    // Weak edges are for clustering and other constraints.

    if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),

                getWeakLeft(Cand.SU, Cand.AtTop),

                TryCand, Cand, Weak))

      return TryCand.Reason != NoCand;

  }


  // Avoid increasing the max pressure of the entire region.

  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,

                                               Cand.RPDelta.CurrentMax,

                                               TryCand, Cand, RegMax, TRI,

                                               DAG->MF))

    return TryCand.Reason != NoCand;


  if (SameBoundary) {

    // Avoid critical resource consumption and balance the schedule.

    TryCand.initResourceDelta(DAG, SchedModel);

    if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,

                TryCand, Cand, ResourceReduce))

      return TryCand.Reason != NoCand;

    if (tryGreater(TryCand.ResDelta.DemandedResources,

                   Cand.ResDelta.DemandedResources,

                   TryCand, Cand, ResourceDemand))

      return TryCand.Reason != NoCand;


    // Avoid serializing long latency dependence chains.

    // For acyclic path limited loops, latency was already checked above.

    if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&

        !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))

      return TryCand.Reason != NoCand;


    // Fall through to original instruction order.

    if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)

        || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {

      TryCand.Reason = NodeOrder;

      return true;

    }

  }


  return false;

}


/// Pick the best candidate from the queue.

///

/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during

/// DAG building. To adjust for the current scheduling location we need to

/// maintain the number of vreg uses remaining to be top-scheduled.

void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,

                                         const CandPolicy &ZonePolicy,

                                         const RegPressureTracker &RPTracker,

                                         SchedCandidate &Cand) {

  // getMaxPressureDelta temporarily modifies the tracker.

  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);


  ReadyQueue &Q = Zone.Available;

  for (SUnit *SU : Q) {


    SchedCandidate TryCand(ZonePolicy);

    initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);

    // Pass SchedBoundary only when comparing nodes from the same boundary.

    SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;

    if (tryCandidate(Cand, TryCand, ZoneArg)) {

      // Initialize resource delta if needed in case future heuristics query it.

      if (TryCand.ResDelta == SchedResourceDelta())

        TryCand.initResourceDelta(DAG, SchedModel);

      Cand.setBest(TryCand);

      LLVM_DEBUG(traceCandidate(Cand));

    }

  }

}


/// Pick the best candidate node from either the top or bottom queue.

SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {

  // Schedule as far as possible in the direction of no choice. This is most

  // efficient, but also provides the best heuristics for CriticalPSets.

  if (SUnit *SU = Bot.pickOnlyChoice()) {

    IsTopNode = false;

    tracePick(Only1, false);

    return SU;

  }

  if (SUnit *SU = Top.pickOnlyChoice()) {

    IsTopNode = true;

    tracePick(Only1, true);

    return SU;

  }

  // Set the bottom-up policy based on the state of the current bottom zone and

  // the instructions outside the zone, including the top zone.

  CandPolicy BotPolicy;

  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);

  // Set the top-down policy based on the state of the current top zone and

  // the instructions outside the zone, including the bottom zone.

  CandPolicy TopPolicy;

  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);


  // See if BotCand is still valid (because we previously scheduled from Top).

  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");

  if (!BotCand.isValid() || BotCand.SU->isScheduled ||

      BotCand.Policy != BotPolicy) {

    BotCand.reset(CandPolicy());

    pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);

    assert(BotCand.Reason != NoCand && "failed to find the first candidate");

  } else {

    LLVM_DEBUG(traceCandidate(BotCand));

#ifndef NDEBUG

    if (VerifyScheduling) {

      SchedCandidate TCand;

      TCand.reset(CandPolicy());

      pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);

      assert(TCand.SU == BotCand.SU &&

             "Last pick result should correspond to re-picking right now");

    }

#endif

  }


  // Check if the top Q has a better candidate.

  LLVM_DEBUG(dbgs() << "Picking from Top:\n");

  if (!TopCand.isValid() || TopCand.SU->isScheduled ||

      TopCand.Policy != TopPolicy) {

    TopCand.reset(CandPolicy());

    pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);

    assert(TopCand.Reason != NoCand && "failed to find the first candidate");

  } else {

    LLVM_DEBUG(traceCandidate(TopCand));

#ifndef NDEBUG

    if (VerifyScheduling) {

      SchedCandidate TCand;

      TCand.reset(CandPolicy());

      pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);

      assert(TCand.SU == TopCand.SU &&

             "Last pick result should correspond to re-picking right now");

    }

#endif

  }


  // Pick best from BotCand and TopCand.

  assert(BotCand.isValid());

  assert(TopCand.isValid());

  SchedCandidate Cand = BotCand;

  TopCand.Reason = NoCand;

  if (tryCandidate(Cand, TopCand, nullptr)) {

    Cand.setBest(TopCand);

    LLVM_DEBUG(traceCandidate(Cand));

  }


  IsTopNode = Cand.AtTop;

  tracePick(Cand);

  return Cand.SU;

}


/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.

SUnit *GenericScheduler::pickNode(bool &IsTopNode) {

  if (DAG->top() == DAG->bottom()) {

    assert(Top.Available.empty() && Top.Pending.empty() &&

           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");

    return nullptr;

  }

  SUnit *SU;

  do {

    if (RegionPolicy.OnlyTopDown) {

      SU = Top.pickOnlyChoice();

      if (!SU) {

        CandPolicy NoPolicy;

        TopCand.reset(NoPolicy);

        pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);

        assert(TopCand.Reason != NoCand && "failed to find a candidate");

        tracePick(TopCand);

        SU = TopCand.SU;

      }

      IsTopNode = true;

    } else if (RegionPolicy.OnlyBottomUp) {

      SU = Bot.pickOnlyChoice();

      if (!SU) {

        CandPolicy NoPolicy;

        BotCand.reset(NoPolicy);

        pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);

        assert(BotCand.Reason != NoCand && "failed to find a candidate");

        tracePick(BotCand);

        SU = BotCand.SU;

      }

      IsTopNode = false;

    } else {

      SU = pickNodeBidirectional(IsTopNode);

    }

  } while (SU->isScheduled);


  // If IsTopNode, then SU is in Top.Available and must be removed. Otherwise,

  // if isTopReady(), then SU is in either Top.Available or Top.Pending.

  // If !IsTopNode, then SU is in Bot.Available and must be removed. Otherwise,

  // if isBottomReady(), then SU is in either Bot.Available or Bot.Pending.

  //

  // It is coincidental when !IsTopNode && isTopReady or when IsTopNode &&

  // isBottomReady. That is, it didn't factor into the decision to choose SU

  // because it isTopReady or isBottomReady, respectively. In fact, if the

  // RegionPolicy is OnlyTopDown or OnlyBottomUp, then the Bot queues and Top

  // queues respectivley contain the original roots and don't get updated when

  // picking a node. So if SU isTopReady on a OnlyBottomUp pick, then it was

  // because we schduled everything but the top roots. Conversley, if SU

  // isBottomReady on OnlyTopDown, then it was because we scheduled everything

  // but the bottom roots. If its in a queue even coincidentally, it should be

  // removed so it does not get re-picked in a subsequent pickNode call.

  if (SU->isTopReady())

    Top.removeReady(SU);

  if (SU->isBottomReady())

    Bot.removeReady(SU);


  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "

                    << *SU->getInstr());

  return SU;

}


void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {

  MachineBasicBlock::iterator InsertPos = SU->getInstr();

  if (!isTop)

    ++InsertPos;

  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;


  // Find already scheduled copies with a single physreg dependence and move

  // them just above the scheduled instruction.

  for (SDep &Dep : Deps) {

    if (Dep.getKind() != SDep::Data ||

        !Register::isPhysicalRegister(Dep.getReg()))

      continue;

    SUnit *DepSU = Dep.getSUnit();

    if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)

      continue;

    MachineInstr *Copy = DepSU->getInstr();

    if (!Copy->isCopy() && !Copy->isMoveImmediate())

      continue;

    LLVM_DEBUG(dbgs() << "  Rescheduling physreg copy ";

               DAG->dumpNode(*Dep.getSUnit()));

    DAG->moveInstruction(Copy, InsertPos);

  }

}


/// Update the scheduler's state after scheduling a node. This is the same node

/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to

/// update it's state based on the current cycle before MachineSchedStrategy

/// does.

///

/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling

/// them here. See comments in biasPhysReg.

void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {

  if (IsTopNode) {

    SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());

    Top.bumpNode(SU);

    if (SU->hasPhysRegUses)

      reschedulePhysReg(SU, true);

  } else {

    SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());

    Bot.bumpNode(SU);

    if (SU->hasPhysRegDefs)

      reschedulePhysReg(SU, false);

  }

}


/// Create the standard converging machine scheduler. This will be used as the

/// default scheduler if the target does not set a default.

ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {

  ScheduleDAGMILive *DAG =

      new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));

  // Register DAG post-processors.

  //

  // FIXME: extend the mutation API to allow earlier mutations to instantiate

  // data and pass it to later mutations. Have a single mutation that gathers

  // the interesting nodes in one pass.

  DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));


  const TargetSubtargetInfo &STI = C->MF->getSubtarget();

  // Add MacroFusion mutation if fusions are not empty.

  const auto &MacroFusions = STI.getMacroFusions();

  if (!MacroFusions.empty())

    DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));

  return DAG;

}


static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {

  return createGenericSchedLive(C);

}


static MachineSchedRegistry

GenericSchedRegistry("converge", "Standard converging scheduler.",

                     createConvergingSched);


//===----------------------------------------------------------------------===//

// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.

//===----------------------------------------------------------------------===//


void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {

  DAG = Dag;

  SchedModel = DAG->getSchedModel();

  TRI = DAG->TRI;


  Rem.init(DAG, SchedModel);

  Top.init(DAG, SchedModel, &Rem);

  Bot.init(DAG, SchedModel, &Rem);


  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,

  // or are disabled, then these HazardRecs will be disabled.

  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();

  if (!Top.HazardRec) {

    Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);

  }

  if (!Bot.HazardRec) {

    Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);

  }

}


void PostGenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,

                                      MachineBasicBlock::iterator End,

                                      unsigned NumRegionInstrs) {

  const MachineFunction &MF = *Begin->getMF();


  // Default to top-down because it was implemented first and existing targets

  // expect that behavior by default.

  RegionPolicy.OnlyTopDown = true;

  RegionPolicy.OnlyBottomUp = false;


  // Allow the subtarget to override default policy.

  MF.getSubtarget().overridePostRASchedPolicy(RegionPolicy, NumRegionInstrs);


  // After subtarget overrides, apply command line options.

  if (PostRADirection == MISched::TopDown) {

    RegionPolicy.OnlyTopDown = true;

    RegionPolicy.OnlyBottomUp = false;

  } else if (PostRADirection == MISched::BottomUp) {

    RegionPolicy.OnlyTopDown = false;

    RegionPolicy.OnlyBottomUp = true;

  } else if (PostRADirection == MISched::Bidirectional) {

    RegionPolicy.OnlyBottomUp = false;

    RegionPolicy.OnlyTopDown = false;

  }

}


void PostGenericScheduler::registerRoots() {

  Rem.CriticalPath = DAG->ExitSU.getDepth();


  // Some roots may not feed into ExitSU. Check all of them in case.

  for (const SUnit *SU : Bot.Available) {

    if (SU->getDepth() > Rem.CriticalPath)

      Rem.CriticalPath = SU->getDepth();

  }

  LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');

  if (DumpCriticalPathLength) {

    errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";

  }

}


/// Apply a set of heuristics to a new candidate for PostRA scheduling.

///

/// \param Cand provides the policy and current best candidate.

/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.

/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)

bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand,

                                        SchedCandidate &TryCand) {

  // Initialize the candidate if needed.

  if (!Cand.isValid()) {

    TryCand.Reason = NodeOrder;

    return true;

  }


  // Prioritize instructions that read unbuffered resources by stall cycles.

  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),

              Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))

    return TryCand.Reason != NoCand;


  // Keep clustered nodes together.

  const SUnit *CandNextClusterSU =

      Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();

  const SUnit *TryCandNextClusterSU =

      TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();

  if (tryGreater(TryCand.SU == TryCandNextClusterSU,

                 Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))

    return TryCand.Reason != NoCand;


  // Avoid critical resource consumption and balance the schedule.

  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,

              TryCand, Cand, ResourceReduce))

    return TryCand.Reason != NoCand;

  if (tryGreater(TryCand.ResDelta.DemandedResources,

                 Cand.ResDelta.DemandedResources,

                 TryCand, Cand, ResourceDemand))

    return TryCand.Reason != NoCand;


  // We only compare a subset of features when comparing nodes between

  // Top and Bottom boundary.

  if (Cand.AtTop == TryCand.AtTop) {

    // Avoid serializing long latency dependence chains.

    if (Cand.Policy.ReduceLatency &&

        tryLatency(TryCand, Cand, Cand.AtTop ? Top : Bot))

      return TryCand.Reason != NoCand;

  }


  // Fall through to original instruction order.

  if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {

    TryCand.Reason = NodeOrder;

    return true;

  }


  return false;

}


void PostGenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,

                                             SchedCandidate &Cand) {

  ReadyQueue &Q = Zone.Available;

  for (SUnit *SU : Q) {

    SchedCandidate TryCand(Cand.Policy);

    TryCand.SU = SU;

    TryCand.AtTop = Zone.isTop();

    TryCand.initResourceDelta(DAG, SchedModel);

    if (tryCandidate(Cand, TryCand)) {

      Cand.setBest(TryCand);

      LLVM_DEBUG(traceCandidate(Cand));

    }

  }

}


/// Pick the best candidate node from either the top or bottom queue.

SUnit *PostGenericScheduler::pickNodeBidirectional(bool &IsTopNode) {

  // FIXME: This is similiar to GenericScheduler::pickNodeBidirectional. Factor

  // out common parts.


  // Schedule as far as possible in the direction of no choice. This is most

  // efficient, but also provides the best heuristics for CriticalPSets.

  if (SUnit *SU = Bot.pickOnlyChoice()) {

    IsTopNode = false;

    tracePick(Only1, false);

    return SU;

  }

  if (SUnit *SU = Top.pickOnlyChoice()) {

    IsTopNode = true;

    tracePick(Only1, true);

    return SU;

  }

  // Set the bottom-up policy based on the state of the current bottom zone and

  // the instructions outside the zone, including the top zone.

  CandPolicy BotPolicy;

  setPolicy(BotPolicy, /*IsPostRA=*/true, Bot, &Top);

  // Set the top-down policy based on the state of the current top zone and

  // the instructions outside the zone, including the bottom zone.

  CandPolicy TopPolicy;

  setPolicy(TopPolicy, /*IsPostRA=*/true, Top, &Bot);


  // See if BotCand is still valid (because we previously scheduled from Top).

  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");

  if (!BotCand.isValid() || BotCand.SU->isScheduled ||

      BotCand.Policy != BotPolicy) {

    BotCand.reset(CandPolicy());

    pickNodeFromQueue(Bot, BotCand);

    assert(BotCand.Reason != NoCand && "failed to find the first candidate");

  } else {

    LLVM_DEBUG(traceCandidate(BotCand));

#ifndef NDEBUG

    if (VerifyScheduling) {

      SchedCandidate TCand;

      TCand.reset(CandPolicy());

      pickNodeFromQueue(Bot, BotCand);

      assert(TCand.SU == BotCand.SU &&

             "Last pick result should correspond to re-picking right now");

    }

#endif

  }


  // Check if the top Q has a better candidate.

  LLVM_DEBUG(dbgs() << "Picking from Top:\n");

  if (!TopCand.isValid() || TopCand.SU->isScheduled ||

      TopCand.Policy != TopPolicy) {

    TopCand.reset(CandPolicy());

    pickNodeFromQueue(Top, TopCand);

    assert(TopCand.Reason != NoCand && "failed to find the first candidate");

  } else {

    LLVM_DEBUG(traceCandidate(TopCand));

#ifndef NDEBUG

    if (VerifyScheduling) {

      SchedCandidate TCand;

      TCand.reset(CandPolicy());

      pickNodeFromQueue(Top, TopCand);

      assert(TCand.SU == TopCand.SU &&

             "Last pick result should correspond to re-picking right now");

    }

#endif

  }


  // Pick best from BotCand and TopCand.

  assert(BotCand.isValid());

  assert(TopCand.isValid());

  SchedCandidate Cand = BotCand;

  TopCand.Reason = NoCand;

  if (tryCandidate(Cand, TopCand)) {

    Cand.setBest(TopCand);

    LLVM_DEBUG(traceCandidate(Cand));

  }


  IsTopNode = Cand.AtTop;

  tracePick(Cand);

  return Cand.SU;

}


/// Pick the next node to schedule.

SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {

  if (DAG->top() == DAG->bottom()) {

    assert(Top.Available.empty() && Top.Pending.empty() &&

           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");

    return nullptr;

  }

  SUnit *SU;

  do {

    if (RegionPolicy.OnlyBottomUp) {

      SU = Bot.pickOnlyChoice();

      if (SU) {

        tracePick(Only1, true);

      } else {

        CandPolicy NoPolicy;

        BotCand.reset(NoPolicy);

        // Set the bottom-up policy based on the state of the current bottom

        // zone and the instructions outside the zone, including the top zone.

        setPolicy(BotCand.Policy, /*IsPostRA=*/true, Bot, nullptr);

        pickNodeFromQueue(Bot, BotCand);

        assert(BotCand.Reason != NoCand && "failed to find a candidate");

        tracePick(BotCand);

        SU = BotCand.SU;

      }

      IsTopNode = false;

    } else if (RegionPolicy.OnlyTopDown) {

      SU = Top.pickOnlyChoice();

      if (SU) {

        tracePick(Only1, true);

      } else {

        CandPolicy NoPolicy;

        TopCand.reset(NoPolicy);

        // Set the top-down policy based on the state of the current top zone

        // and the instructions outside the zone, including the bottom zone.

        setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);

        pickNodeFromQueue(Top, TopCand);

        assert(TopCand.Reason != NoCand && "failed to find a candidate");

        tracePick(TopCand);

        SU = TopCand.SU;

      }

      IsTopNode = true;

    } else {

      SU = pickNodeBidirectional(IsTopNode);

    }

  } while (SU->isScheduled);


  if (SU->isTopReady())

    Top.removeReady(SU);

  if (SU->isBottomReady())

    Bot.removeReady(SU);


  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "

                    << *SU->getInstr());

  return SU;

}


/// Called after ScheduleDAGMI has scheduled an instruction and updated

/// scheduled/remaining flags in the DAG nodes.

void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {

  if (IsTopNode) {

    SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());

    Top.bumpNode(SU);

  } else {

    SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());

    Bot.bumpNode(SU);

  }

}


ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {

  ScheduleDAGMI *DAG =

      new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),

                        /*RemoveKillFlags=*/true);

  const TargetSubtargetInfo &STI = C->MF->getSubtarget();

  // Add MacroFusion mutation if fusions are not empty.

  const auto &MacroFusions = STI.getMacroFusions();

  if (!MacroFusions.empty())

    DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));

  return DAG;

}


//===----------------------------------------------------------------------===//

// ILP Scheduler. Currently for experimental analysis of heuristics.

//===----------------------------------------------------------------------===//


namespace {


/// Order nodes by the ILP metric.

struct ILPOrder {

  const SchedDFSResult *DFSResult = nullptr;

  const BitVector *ScheduledTrees = nullptr;

  bool MaximizeILP;


  ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}


  /// Apply a less-than relation on node priority.

  ///

  /// (Return true if A comes after B in the Q.)

  bool operator()(const SUnit *A, const SUnit *B) const {

    unsigned SchedTreeA = DFSResult->getSubtreeID(A);

    unsigned SchedTreeB = DFSResult->getSubtreeID(B);

    if (SchedTreeA != SchedTreeB) {

      // Unscheduled trees have lower priority.

      if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))

        return ScheduledTrees->test(SchedTreeB);


      // Trees with shallower connections have lower priority.

      if (DFSResult->getSubtreeLevel(SchedTreeA)

          != DFSResult->getSubtreeLevel(SchedTreeB)) {

        return DFSResult->getSubtreeLevel(SchedTreeA)

          < DFSResult->getSubtreeLevel(SchedTreeB);

      }

    }

    if (MaximizeILP)

      return DFSResult->getILP(A) < DFSResult->getILP(B);

    else

      return DFSResult->getILP(A) > DFSResult->getILP(B);

  }

};


/// Schedule based on the ILP metric.

class ILPScheduler : public MachineSchedStrategy {

  ScheduleDAGMILive *DAG = nullptr;

  ILPOrder Cmp;


  std::vector<SUnit*> ReadyQ;


public:

  ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}


  void initialize(ScheduleDAGMI *dag) override {

    assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");

    DAG = static_cast<ScheduleDAGMILive*>(dag);

    DAG->computeDFSResult();

    Cmp.DFSResult = DAG->getDFSResult();

    Cmp.ScheduledTrees = &DAG->getScheduledTrees();

    ReadyQ.clear();

  }


  void registerRoots() override {

    // Restore the heap in ReadyQ with the updated DFS results.

    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);

  }


  /// Implement MachineSchedStrategy interface.

  /// -----------------------------------------


  /// Callback to select the highest priority node from the ready Q.

  SUnit *pickNode(bool &IsTopNode) override {

    if (ReadyQ.empty()) return nullptr;

    std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);

    SUnit *SU = ReadyQ.back();

    ReadyQ.pop_back();

    IsTopNode = false;

    LLVM_DEBUG(dbgs() << "Pick node "

                      << "SU(" << SU->NodeNum << ") "

                      << " ILP: " << DAG->getDFSResult()->getILP(SU)

                      << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)

                      << " @"

                      << DAG->getDFSResult()->getSubtreeLevel(

                             DAG->getDFSResult()->getSubtreeID(SU))

                      << '\n'

                      << "Scheduling " << *SU->getInstr());

    return SU;

  }


  /// Scheduler callback to notify that a new subtree is scheduled.

  void scheduleTree(unsigned SubtreeID) override {

    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);

  }


  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify

  /// DFSResults, and resort the priority Q.

  void schedNode(SUnit *SU, bool IsTopNode) override {

    assert(!IsTopNode && "SchedDFSResult needs bottom-up");

  }


  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }


  void releaseBottomNode(SUnit *SU) override {

    ReadyQ.push_back(SU);

    std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);

  }

};


} // end anonymous namespace


static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {

  return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));

}

static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {

  return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));

}


static MachineSchedRegistry ILPMaxRegistry(

  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);

static MachineSchedRegistry ILPMinRegistry(

  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);


//===----------------------------------------------------------------------===//

// Machine Instruction Shuffler for Correctness Testing

//===----------------------------------------------------------------------===//


#ifndef NDEBUG

namespace {


/// Apply a less-than relation on the node order, which corresponds to the

/// instruction order prior to scheduling. IsReverse implements greater-than.

template<bool IsReverse>

struct SUnitOrder {

  bool operator()(SUnit *A, SUnit *B) const {

    if (IsReverse)

      return A->NodeNum > B->NodeNum;

    else

      return A->NodeNum < B->NodeNum;

  }

};


/// Reorder instructions as much as possible.

class InstructionShuffler : public MachineSchedStrategy {

  bool IsAlternating;

  bool IsTopDown;


  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority

  // gives nodes with a higher number higher priority causing the latest

  // instructions to be scheduled first.

  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>

    TopQ;


  // When scheduling bottom-up, use greater-than as the queue priority.

  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>

    BottomQ;


public:

  InstructionShuffler(bool alternate, bool topdown)

    : IsAlternating(alternate), IsTopDown(topdown) {}


  void initialize(ScheduleDAGMI*) override {

    TopQ.clear();

    BottomQ.clear();

  }


  /// Implement MachineSchedStrategy interface.

  /// -----------------------------------------


  SUnit *pickNode(bool &IsTopNode) override {

    SUnit *SU;

    if (IsTopDown) {

      do {

        if (TopQ.empty()) return nullptr;

        SU = TopQ.top();

        TopQ.pop();

      } while (SU->isScheduled);

      IsTopNode = true;

    } else {

      do {

        if (BottomQ.empty()) return nullptr;

        SU = BottomQ.top();

        BottomQ.pop();

      } while (SU->isScheduled);

      IsTopNode = false;

    }

    if (IsAlternating)

      IsTopDown = !IsTopDown;

    return SU;

  }


  void schedNode(SUnit *SU, bool IsTopNode) override {}


  void releaseTopNode(SUnit *SU) override {

    TopQ.push(SU);

  }

  void releaseBottomNode(SUnit *SU) override {

    BottomQ.push(SU);

  }

};


} // end anonymous namespace


static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {

  bool Alternate =

      PreRADirection != MISched::TopDown && PreRADirection != MISched::BottomUp;

  bool TopDown = PreRADirection != MISched::BottomUp;

  return new ScheduleDAGMILive(

      C, std::make_unique<InstructionShuffler>(Alternate, TopDown));

}


static MachineSchedRegistry ShufflerRegistry(

  "shuffle", "Shuffle machine instructions alternating directions",

  createInstructionShuffler);

#endif // !NDEBUG


//===----------------------------------------------------------------------===//

// GraphWriter support for ScheduleDAGMILive.

//===----------------------------------------------------------------------===//


#ifndef NDEBUG

namespace llvm {


template<> struct GraphTraits<

  ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};


template<>

struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {

  DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}


  static std::string getGraphName(const ScheduleDAG *G) {

    return std::string(G->MF.getName());

  }


  static bool renderGraphFromBottomUp() {

    return true;

  }


  static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {

    if (ViewMISchedCutoff == 0)

      return false;

    return (Node->Preds.size() > ViewMISchedCutoff

         || Node->Succs.size() > ViewMISchedCutoff);

  }


  /// If you want to override the dot attributes printed for a particular

  /// edge, override this method.

  static std::string getEdgeAttributes(const SUnit *Node,

                                       SUnitIterator EI,

                                       const ScheduleDAG *Graph) {

    if (EI.isArtificialDep())

      return "color=cyan,style=dashed";

    if (EI.isCtrlDep())

      return "color=blue,style=dashed";

    return "";

  }


  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {

    std::string Str;

    raw_string_ostream SS(Str);

    const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);

    const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?

      static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;

    SS << "SU:" << SU->NodeNum;

    if (DFS)

      SS << " I:" << DFS->getNumInstrs(SU);

    return Str;

  }


  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {

    return G->getGraphNodeLabel(SU);

  }


  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {

    std::string Str("shape=Mrecord");

    const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);

    const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?

      static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;

    if (DFS) {

      Str += ",style=filled,fillcolor=\"#";

      Str += DOT::getColorString(DFS->getSubtreeID(N));

      Str += '"';

    }

    return Str;

  }

};


} // end namespace llvm

#endif // NDEBUG


/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG

/// rendered using 'dot'.

void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {

#ifndef NDEBUG

  ViewGraph(this, Name, false, Title);

#else

  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "

         << "systems with Graphviz or gv!\n";

#endif  // NDEBUG

}


/// Out-of-line implementation with no arguments is handy for gdb.

void ScheduleDAGMI::viewGraph() {

  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());

}


/// Sort predicate for the intervals stored in an instance of

/// ResourceSegments. Intervals are always disjoint (no intersection

/// for any pairs of intervals), therefore we can sort the totality of

/// the intervals by looking only at the left boundary.

static bool sortIntervals(const ResourceSegments::IntervalTy &A,

                          const ResourceSegments::IntervalTy &B) {

  return A.first < B.first;

}


unsigned ResourceSegments::getFirstAvailableAt(

    unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle,

    std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)>

        IntervalBuilder) const {

  assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals),

                        sortIntervals) &&

         "Cannot execute on an un-sorted set of intervals.");


  // Zero resource usage is allowed by TargetSchedule.td but we do not construct

  // a ResourceSegment interval for that situation.

  if (AcquireAtCycle == ReleaseAtCycle)

    return CurrCycle;


  unsigned RetCycle = CurrCycle;

  ResourceSegments::IntervalTy NewInterval =

      IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);

  for (auto &Interval : _Intervals) {

    if (!intersects(NewInterval, Interval))

      continue;


    // Move the interval right next to the top of the one it

    // intersects.

    assert(Interval.second > NewInterval.first &&

           "Invalid intervals configuration.");

    RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;

    NewInterval = IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);

  }

  return RetCycle;

}


void ResourceSegments::add(ResourceSegments::IntervalTy A,

                           const unsigned CutOff) {

  assert(A.first <= A.second && "Cannot add negative resource usage");

  assert(CutOff > 0 && "0-size interval history has no use.");

  // Zero resource usage is allowed by TargetSchedule.td, in the case that the

  // instruction needed the resource to be available but does not use it.

  // However, ResourceSegment represents an interval that is closed on the left

  // and open on the right. It is impossible to represent an empty interval when

  // the left is closed. Do not add it to Intervals.

  if (A.first == A.second)

    return;


  assert(all_of(_Intervals,

                [&A](const ResourceSegments::IntervalTy &Interval) -> bool {

                  return !intersects(A, Interval);

                }) &&

         "A resource is being overwritten");

  _Intervals.push_back(A);


  sortAndMerge();


  // Do not keep the full history of the intervals, just the

  // latest #CutOff.

  while (_Intervals.size() > CutOff)

    _Intervals.pop_front();

}


bool ResourceSegments::intersects(ResourceSegments::IntervalTy A,

                                  ResourceSegments::IntervalTy B) {

  assert(A.first <= A.second && "Invalid interval");

  assert(B.first <= B.second && "Invalid interval");


  // Share one boundary.

  if ((A.first == B.first) || (A.second == B.second))

    return true;


  // full intersersect: [    ***     )  B

  //                        [***)       A

  if ((A.first > B.first) && (A.second < B.second))

    return true;


  // right intersect: [     ***)        B

  //                       [***      )  A

  if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second))

    return true;


  // left intersect:      [***      )  B

  //                 [     ***)        A

  if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first))

    return true;


  return false;

}


void ResourceSegments::sortAndMerge() {

  if (_Intervals.size() <= 1)

    return;


  // First sort the collection.

  _Intervals.sort(sortIntervals);


  // can use next because I have at least 2 elements in the list

  auto next = std::next(std::begin(_Intervals));

  auto E = std::end(_Intervals);

  for (; next != E; ++next) {

    if (std::prev(next)->second >= next->first) {

      next->first = std::prev(next)->first;

      _Intervals.erase(std::prev(next));

      continue;

    }

  }

}

DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition: AArch64ExpandPseudoInsts.cpp:113

MBB
MachineBasicBlock & MBB
Definition: ARMSLSHardening.cpp:71

AliasAnalysis.h

print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:205

ArrayRef.h

getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:863

true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1981

BitVector.h
This file implements the BitVector class.

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

Machine
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:390

CommandLine.h

clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686

Compiler.h

LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622

intersects
static std::optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, const ArrayRef< InsnRange > &Ranges, const InstructionOrdering &Ordering)
Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.
Definition: DbgEntityHistoryCalculator.cpp:114

Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition: DeadArgumentElimination.cpp:353

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition: Debug.h:106

DenseMap.h
This file defines the DenseMap class.

Name
std::string Name
Definition: ELFObjHandler.cpp:77

End
bool End
Definition: ELF_riscv.cpp:480

rem
expand large div rem
Definition: ExpandLargeDivRem.cpp:177

DEBUG_TYPE
#define DEBUG_TYPE
Definition: GenericCycleImpl.h:31

GraphWriter.h

TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125

MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:112

InitializePasses.h

LaneBitmask.h
A common definition of LaneBitmask for use in TableGen and CodeGen.

LiveInterval.h

LiveIntervals.h

I
#define I(x, y, z)
Definition: MD5.cpp:58

G
#define G(x, y, z)
Definition: MD5.cpp:56

MachineBasicBlock.h

MachineDominators.h

MachineFunctionPass.h

MachineFunction.h

MachineInstr.h

MachineLoopInfo.h

MachineOperand.h

MachinePassRegistry.h

MachineRegisterInfo.h

isSchedBoundary
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
Definition: MachineScheduler.cpp:513

ILPMaxRegistry
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)

tracePick
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
Definition: MachineScheduler.cpp:3236

EnableMemOpCluster
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))

Scheduler
Machine Instruction Scheduler
Definition: MachineScheduler.cpp:274

nextIfDebug
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
Definition: MachineScheduler.cpp:371

MinSubtreeSize
static const unsigned MinSubtreeSize
Definition: MachineScheduler.cpp:197

InvalidCycle
static const unsigned InvalidCycle
Definition: MachineScheduler.cpp:2226

MISchedSortResourcesInTrace
static cl::opt< bool > MISchedSortResourcesInTrace("misched-sort-resources-in-trace", cl::Hidden, cl::init(true), cl::desc("Sort the resources printed in the dump trace"))

EnableCyclicPath
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))

postmisched
postmisched
Definition: MachineScheduler.cpp:302

priorNonDebug
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
Definition: MachineScheduler.cpp:350

MachineSchedOpt
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.

EnableMachineSched
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)

computeRemLatency
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
Definition: MachineScheduler.cpp:3002

MISchedCutoff
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))

SchedOnlyBlock
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))

EnableRegPressure
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))

GenericSchedRegistry
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConvergingSched)

HeaderColWidth
static cl::opt< unsigned > HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden, cl::desc("Set width of the columns with " "the resources and schedule units"), cl::init(19))

ForceFastCluster
static cl::opt< bool > ForceFastCluster("force-fast-cluster", cl::Hidden, cl::desc("Switch to fast cluster algorithm with the lost " "of some fusion opportunities"), cl::init(false))

FastClusterThreshold
static cl::opt< unsigned > FastClusterThreshold("fast-cluster-threshold", cl::Hidden, cl::desc("The threshold for fast cluster"), cl::init(1000))

checkResourceLimit
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
Definition: MachineScheduler.cpp:2234

createInstructionShuffler
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
Definition: MachineScheduler.cpp:4363

useDefaultMachineSched
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line.
Definition: MachineScheduler.cpp:323

sortIntervals
static bool sortIntervals(const ResourceSegments::IntervalTy &A, const ResourceSegments::IntervalTy &B)
Sort predicate for the intervals stored in an instance of ResourceSegments.
Definition: MachineScheduler.cpp:4470

ColWidth
static cl::opt< unsigned > ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden, cl::desc("Set width of the columns showing resource booking."), cl::init(5))

DefaultSchedRegistry
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)

SchedOnlyFunc
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))

scheduleTableLegend
static const char * scheduleTableLegend
Definition: MachineScheduler.cpp:991

createConvergingSched
static ScheduleDAGInstrs * createConvergingSched(MachineSchedContext *C)
Definition: MachineScheduler.cpp:3863

ViewMISchedCutoff
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph,...

ShufflerRegistry
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)

EnablePostRAMachineSched
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)

getSchedRegions
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
Definition: MachineScheduler.cpp:542

MIResourceCutOff
static cl::opt< unsigned > MIResourceCutOff("misched-resource-cutoff", cl::Hidden, cl::desc("Number of intervals to track"), cl::init(10))

createILPMaxScheduler
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
Definition: MachineScheduler.cpp:4271

ReadyListLimit
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists.

createILPMinScheduler
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
Definition: MachineScheduler.cpp:4274

MISchedDumpScheduleTrace
static cl::opt< bool > MISchedDumpScheduleTrace("misched-dump-schedule-trace", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))

ILPMinRegistry
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)

MachineScheduler.h

TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1945

MachineValueType.h

Interval
std::pair< uint64_t, uint64_t > Interval
Definition: MappedBlockStream.cpp:36

P
#define P(N)

if
if(PassOpts->AAPipeline)
Definition: PassBuilderBindings.cpp:64

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52

Pass.h

PriorityQueue.h
This file defines the PriorityQueue class.

RegisterClassInfo.h

RegisterPressure.h

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

isSimple
static bool isSimple(Instruction *I)
Definition: SLPVectorizer.cpp:1137

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

ScheduleDAGInstrs.h

ScheduleDAGMutation.h

ScheduleDAG.h

ScheduleDFS.h

ScheduleHazardRecognizer.h

SlotIndexes.h

SmallVector.h
This file defines the SmallVector class.

Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

STATISTIC
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166

TargetFrameLowering.h

TargetInstrInfo.h

initialize
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Definition: TargetLibraryInfo.cpp:917

TargetLowering.h
This file describes how to lower LLVM code to machine code.

TargetPassConfig.h
Target-Independent Code Generator Pass Configuration Options pass.

TargetRegisterInfo.h

TargetSchedule.h

TargetSubtargetInfo.h

Groups
static const X86InstrFMA3Group Groups[]
Definition: X86InstrFMA3Info.cpp:81

RHS
Value * RHS
Definition: X86PartialReduction.cpp:74

LHS
Value * LHS
Definition: X86PartialReduction.cpp:73

LiveDebugValues::DbgValue
Class recording the (high level) value of a variable.
Definition: InstrRefBasedImpl.h:512

T

llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:975

llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:78

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75

llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98

llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41

llvm::ArrayRef::rend
reverse_iterator rend() const
Definition: ArrayRef.h:160

llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168

llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163

llvm::ArrayRef::rbegin
reverse_iterator rbegin() const
Definition: ArrayRef.h:159

llvm::BitVector
Definition: BitVector.h:82

llvm::BitVector::test
bool test(unsigned Idx) const
Definition: BitVector.h:461

llvm::BitVector::resize
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341

llvm::BitVector::clear
void clear()
clear - Removes all bits from the bitvector.
Definition: BitVector.h:335

llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:351

llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition: DWARFExpression.h:32

llvm::DenseMapBase::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152

llvm::DenseMap
Definition: DenseMap.h:727

llvm::DstOp
Definition: MachineIRBuilder.h:70

llvm::DstOp::getReg
Register getReg() const
Definition: MachineIRBuilder.h:121

llvm::GenericSchedulerBase::traceCandidate
void traceCandidate(const SchedCandidate &Cand)
Definition: MachineScheduler.cpp:3113

llvm::GenericSchedulerBase::Rem
SchedRemainder Rem
Definition: MachineScheduler.h:1181

llvm::GenericSchedulerBase::setPolicy
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
Definition: MachineScheduler.cpp:3034

llvm::GenericSchedulerBase::RegionPolicy
MachineSchedPolicy RegionPolicy
Definition: MachineScheduler.h:1179

llvm::GenericSchedulerBase::SchedModel
const TargetSchedModel * SchedModel
Definition: MachineScheduler.h:1176

llvm::GenericSchedulerBase::getReasonStr
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
Definition: MachineScheduler.cpp:3089

llvm::GenericSchedulerBase::Context
const MachineSchedContext * Context
Definition: MachineScheduler.h:1175

llvm::GenericSchedulerBase::CandReason
CandReason
Represent the type of SchedCandidate found within a single queue.
Definition: MachineScheduler.h:1080

llvm::GenericSchedulerBase::RegExcess
@ RegExcess
Definition: MachineScheduler.h:1081

llvm::GenericSchedulerBase::RegMax
@ RegMax
Definition: MachineScheduler.h:1082

llvm::GenericSchedulerBase::ResourceDemand
@ ResourceDemand
Definition: MachineScheduler.h:1082

llvm::GenericSchedulerBase::Only1
@ Only1
Definition: MachineScheduler.h:1081

llvm::GenericSchedulerBase::ResourceReduce
@ ResourceReduce
Definition: MachineScheduler.h:1082

llvm::GenericSchedulerBase::Cluster
@ Cluster
Definition: MachineScheduler.h:1081

llvm::GenericSchedulerBase::TopPathReduce
@ TopPathReduce
Definition: MachineScheduler.h:1083

llvm::GenericSchedulerBase::NoCand
@ NoCand
Definition: MachineScheduler.h:1081

llvm::GenericSchedulerBase::BotHeightReduce
@ BotHeightReduce
Definition: MachineScheduler.h:1082

llvm::GenericSchedulerBase::RegCritical
@ RegCritical
Definition: MachineScheduler.h:1081

llvm::GenericSchedulerBase::NodeOrder
@ NodeOrder
Definition: MachineScheduler.h:1083

llvm::GenericSchedulerBase::PhysReg
@ PhysReg
Definition: MachineScheduler.h:1081

llvm::GenericSchedulerBase::Stall
@ Stall
Definition: MachineScheduler.h:1081

llvm::GenericSchedulerBase::BotPathReduce
@ BotPathReduce
Definition: MachineScheduler.h:1082

llvm::GenericSchedulerBase::Weak
@ Weak
Definition: MachineScheduler.h:1081

llvm::GenericSchedulerBase::NextDefUse
@ NextDefUse
Definition: MachineScheduler.h:1083

llvm::GenericSchedulerBase::TopDepthReduce
@ TopDepthReduce
Definition: MachineScheduler.h:1083

llvm::GenericSchedulerBase::TRI
const TargetRegisterInfo * TRI
Definition: MachineScheduler.h:1177

llvm::GenericScheduler::checkAcyclicLatency
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
Definition: MachineScheduler.cpp:3341

llvm::GenericScheduler::tryCandidate
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
Definition: MachineScheduler.cpp:3524

llvm::GenericScheduler::dumpPolicy
void dumpPolicy() const override
Definition: MachineScheduler.cpp:3321

llvm::GenericScheduler::initialize
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
Definition: MachineScheduler.cpp:3245

llvm::GenericScheduler::initCandidate
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
Definition: MachineScheduler.cpp:3476

llvm::GenericScheduler::registerRoots
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Definition: MachineScheduler.cpp:3369

llvm::GenericScheduler::initPolicy
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
Definition: MachineScheduler.cpp:3275

llvm::GenericScheduler::reschedulePhysReg
void reschedulePhysReg(SUnit *SU, bool isTop)
Definition: MachineScheduler.cpp:3798

llvm::GenericScheduler::pickNode
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
Definition: MachineScheduler.cpp:3738

llvm::GenericScheduler::pickNodeFromQueue
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
Definition: MachineScheduler.cpp:3635

llvm::GenericScheduler::schedNode
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
Definition: MachineScheduler.cpp:3829

llvm::GenericScheduler::pickNodeBidirectional
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
Definition: MachineScheduler.cpp:3660

llvm::HexagonInstrInfo::getMemOperandsWithOffsetWidth
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
Definition: HexagonInstrInfo.cpp:3070

llvm::HexagonInstrInfo::isSchedulingBoundary
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Test if the given instruction should be considered a scheduling boundary.
Definition: HexagonInstrInfo.cpp:1791

llvm::InstrItineraryData
Itinerary data supplied by a subtarget to be used by a target.
Definition: MCInstrItineraries.h:110

llvm::Instruction
Definition: Instruction.h:68

llvm::LiveInterval
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687

llvm::LiveIntervalsWrapperPass
Definition: LiveIntervals.h:524

llvm::LiveIntervals
Definition: LiveIntervals.h:55

llvm::LiveIntervals::getInstructionFromIndex
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
Definition: LiveIntervals.h:247

llvm::LiveIntervals::handleMove
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
Definition: LiveIntervals.cpp:1542

llvm::LiveIntervals::getInstructionIndex
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
Definition: LiveIntervals.h:242

llvm::LiveIntervals::getMBBEndIdx
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
Definition: LiveIntervals.h:257

llvm::LiveIntervals::getInterval
LiveInterval & getInterval(Register Reg)
Definition: LiveIntervals.h:130

llvm::LiveQueryResult
Result of a LiveRange query.
Definition: LiveInterval.h:90

llvm::LiveQueryResult::valueIn
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105

llvm::LiveRange::Query
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:542

llvm::LiveRange::end
iterator end()
Definition: LiveInterval.h:216

llvm::LiveRange::getVNInfoBefore
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarily including Idx,...
Definition: LiveInterval.h:429

llvm::LiveRange::begin
iterator begin()
Definition: LiveInterval.h:215

llvm::LiveRange::beginIndex
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:385

llvm::LiveRange::endIndex
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:392

llvm::LiveRange::isLocal
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries,...
Definition: LiveInterval.h:518

llvm::LiveRange::find
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
Definition: LiveInterval.cpp:350

llvm::LocationSize
Definition: MemoryLocation.h:68

llvm::LocationSize::hasValue
bool hasValue() const
Definition: MemoryLocation.h:165

llvm::MVT::SimpleValueType
SimpleValueType
Definition: MachineValueType.h:37

llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:125

llvm::MachineBasicBlock::getNumber
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
Definition: MachineBasicBlock.h:1217

llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:355

llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:357

llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:311

llvm::MachineBasicBlock::size
unsigned size() const
Definition: MachineBasicBlock.h:325

llvm::MachineBasicBlock::isSuccessor
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
Definition: MachineBasicBlock.cpp:960

llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:1109

llvm::MachineBasicBlock::getName
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
Definition: MachineBasicBlock.cpp:326

llvm::MachineDominatorTreeWrapperPass
Analysis pass which computes a MachineDominatorTree.
Definition: MachineDominators.h:131

llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:169

llvm::MachineFunction
Definition: MachineFunction.h:258

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:724

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:695

llvm::MachineFunction::print
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream.
Definition: MachineFunction.cpp:649

llvm::MachineInstrBundleIterator< const MachineInstr >

llvm::MachineInstrBundleIterator::getNonConstIterator
nonconst_iterator getNonConstIterator() const
Definition: MachineInstrBundleIterator.h:276

llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:69

llvm::MachineInstr::isCopy
bool isCopy() const
Definition: MachineInstr.h:1438

llvm::MachineInstr::mayLoad
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:1143

llvm::MachineInstr::mayStore
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:1156

llvm::MachineLoopInfoWrapperPass
Definition: MachineLoopInfo.h:156

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48

llvm::MachinePassRegistry
MachinePassRegistry - Track the registration of machine passes.
Definition: MachinePassRegistry.h:73

llvm::MachineRegisterInfo::getNumVirtRegs
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
Definition: MachineRegisterInfo.h:796

llvm::MachineSchedRegistry
MachineSchedRegistry provides a selection of available machine instruction schedulers.
Definition: MachineScheduler.h:156

llvm::MachineSchedRegistry::Registry
static MachinePassRegistry< ScheduleDAGCtor > Registry
Definition: MachineScheduler.h:163

llvm::MachineSchedRegistry::ScheduleDAGCtor
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
Definition: MachineScheduler.h:158

llvm::MachineSchedStrategy
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
Definition: MachineScheduler.h:219

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65

llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24

llvm::PostGenericScheduler::initPolicy
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Optionally override the per-region scheduling policy.
Definition: MachineScheduler.cpp:3895

llvm::PostGenericScheduler::tryCandidate
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
Definition: MachineScheduler.cpp:3940

llvm::PostGenericScheduler::schedNode
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
Definition: MachineScheduler.cpp:4143

llvm::PostGenericScheduler::pickNodeFromQueue
void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand)
Definition: MachineScheduler.cpp:3989

llvm::PostGenericScheduler::initialize
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
Definition: MachineScheduler.cpp:3875

llvm::PostGenericScheduler::pickNodeBidirectional
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
Definition: MachineScheduler.cpp:4005

llvm::PostGenericScheduler::registerRoots
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Definition: MachineScheduler.cpp:3921

llvm::PostGenericScheduler::pickNode
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
Definition: MachineScheduler.cpp:4086

llvm::PressureChange
Capture a change in pressure for a single pressure set.
Definition: RegisterPressure.h:102

llvm::PressureChange::getUnitInc
int getUnitInc() const
Definition: RegisterPressure.h:124

llvm::PressureChange::getPSetOrMax
unsigned getPSetOrMax() const
Definition: RegisterPressure.h:120

llvm::PressureChange::isValid
bool isValid() const
Definition: RegisterPressure.h:112

llvm::PressureChange::getPSet
unsigned getPSet() const
Definition: RegisterPressure.h:114

llvm::PressureDiff
List of PressureChanges in order of increasing, unique PSetID.
Definition: RegisterPressure.h:140

llvm::PressureDiff::dump
void dump(const TargetRegisterInfo &TRI) const
Definition: RegisterPressure.cpp:125

llvm::PressureDiff::addPressureChange
void addPressureChange(Register RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
Definition: RegisterPressure.cpp:661

llvm::PressureDiffs::clear
void clear()
Definition: RegisterPressure.h:208

llvm::PriorityQueue
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28

llvm::PriorityQueue::clear
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:76

llvm::ReadyQueue
Helpers for implementing custom MachineSchedStrategy classes.
Definition: MachineScheduler.h:549

llvm::ReadyQueue::push
void push(SUnit *SU)
Definition: MachineScheduler.h:580

llvm::ReadyQueue::end
iterator end()
Definition: MachineScheduler.h:574

llvm::ReadyQueue::find
iterator find(SUnit *SU)
Definition: MachineScheduler.h:578

llvm::ReadyQueue::begin
iterator begin()
Definition: MachineScheduler.h:572

llvm::ReadyQueue::elements
ArrayRef< SUnit * > elements()
Definition: MachineScheduler.h:576

llvm::ReadyQueue::dump
void dump() const
Definition: MachineScheduler.cpp:670

llvm::ReadyQueue::clear
void clear()
Definition: MachineScheduler.h:566

llvm::ReadyQueue::isInQueue
bool isInQueue(SUnit *SU) const
Definition: MachineScheduler.h:562

llvm::ReadyQueue::iterator
std::vector< SUnit * >::iterator iterator
Definition: MachineScheduler.h:570

llvm::ReadyQueue::empty
bool empty() const
Definition: MachineScheduler.h:564

llvm::ReadyQueue::getName
StringRef getName() const
Definition: MachineScheduler.h:559

llvm::ReadyQueue::size
unsigned size() const
Definition: MachineScheduler.h:568

llvm::ReadyQueue::remove
iterator remove(iterator I)
Definition: MachineScheduler.h:585

llvm::RegPressureTracker
Track the current register pressure at some position in the instruction stream, and remember the high...
Definition: RegisterPressure.h:358

llvm::RegPressureTracker::closeRegion
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
Definition: RegisterPressure.cpp:342

llvm::RegPressureTracker::setPos
void setPos(MachineBasicBlock::const_iterator Pos)
Definition: RegisterPressure.h:420

llvm::RegPressureTracker::getLiveThru
ArrayRef< unsigned > getLiveThru() const
Definition: RegisterPressure.h:455

llvm::RegPressureTracker::closeBottom
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
Definition: RegisterPressure.cpp:330

llvm::RegPressureTracker::recede
void recede(SmallVectorImpl< VRegMaskOrUnit > *LiveUses=nullptr)
Recede across the previous instruction.
Definition: RegisterPressure.cpp:862

llvm::RegPressureTracker::getPressure
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
Definition: RegisterPressure.h:459

llvm::RegPressureTracker::addLiveRegs
void addLiveRegs(ArrayRef< VRegMaskOrUnit > Regs)
Force liveness of virtual registers or physical register units.
Definition: RegisterPressure.cpp:696

llvm::RegPressureTracker::recedeSkipDebugValues
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
Definition: RegisterPressure.cpp:841

llvm::RegPressureTracker::getMaxUpwardPressureDelta
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
Definition: RegisterPressure.cpp:1086

llvm::RegPressureTracker::initLiveThru
void initLiveThru(const RegPressureTracker &RPTracker)
Initialize the LiveThru pressure set based on the untied defs found in RPTracker.
Definition: RegisterPressure.cpp:358

llvm::RegPressureTracker::dump
void dump() const
Definition: RegisterPressure.cpp:116

llvm::RegPressureTracker::init
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
Definition: RegisterPressure.cpp:262

llvm::RegPressureTracker::getPos
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
Definition: RegisterPressure.h:414

llvm::RegPressureTracker::closeTop
void closeTop()
Set the boundary for the top of the region and summarize live ins.
Definition: RegisterPressure.cpp:318

llvm::RegPressureTracker::getMaxDownwardPressureDelta
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
Definition: RegisterPressure.cpp:1335

llvm::RegPressureTracker::advance
void advance()
Advance across the current instruction.
Definition: RegisterPressure.cpp:936

llvm::RegPressureTracker::reset
void reset()
Definition: RegisterPressure.cpp:242

llvm::RegPressureTracker::getRegSetPressureAtPos
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
Definition: RegisterPressure.h:464

llvm::RegPressureTracker::getUpwardPressureDelta
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
Definition: RegisterPressure.cpp:1154

llvm::RegisterClassInfo
Definition: RegisterClassInfo.h:29

llvm::RegisterClassInfo::getNumAllocatableRegs
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
Definition: RegisterClassInfo.h:94

llvm::RegisterClassInfo::getRegPressureSetLimit
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
Definition: RegisterClassInfo.h:146

llvm::RegisterOperands
List of registers defined and used by a machine instruction.
Definition: RegisterPressure.h:166

llvm::RegisterOperands::collect
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Definition: RegisterPressure.cpp:565

llvm::RegisterOperands::adjustLaneLiveness
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
Definition: RegisterPressure.cpp:596

llvm::RegisterOperands::detectDeadDefs
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
Definition: RegisterPressure.cpp:576

llvm::RegisterPassParser
RegisterPassParser class - Handle the addition of new machine passes.
Definition: MachinePassRegistry.h:138

llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19

llvm::Register::isVirtual
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91

llvm::Register::isPhysicalRegister
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65

llvm::ResourceSegments::add
void add(IntervalTy A, const unsigned CutOff=10)
Adds an interval [a, b) to the collection of the instance.
Definition: MachineScheduler.cpp:4505

llvm::ResourceSegments::getResourceIntervalBottom
static IntervalTy getResourceIntervalBottom(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
These function return the interval used by a resource in bottom and top scheduling.
Definition: MachineScheduler.h:726

llvm::ResourceSegments::intersects
static bool intersects(IntervalTy A, IntervalTy B)
Checks whether intervals intersect.
Definition: MachineScheduler.cpp:4532

llvm::ResourceSegments::IntervalTy
std::pair< int64_t, int64_t > IntervalTy
Represents an interval of discrete integer values closed on the left and open on the right: [a,...
Definition: MachineScheduler.h:640

llvm::ResourceSegments::getResourceIntervalTop
static IntervalTy getResourceIntervalTop(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
Definition: MachineScheduler.h:731

llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49

llvm::SDep::getSUnit
SUnit * getSUnit() const
Definition: ScheduleDAG.h:498

llvm::SDep::getKind
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:504

llvm::SDep::Anti
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54

llvm::SDep::Data
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53

llvm::SDep::isWeak
bool isWeak() const
Tests if this a weak dependence.
Definition: ScheduleDAG.h:194

llvm::SDep::Cluster
@ Cluster
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:74

llvm::SDep::Artificial
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72

llvm::SDep::Weak
@ Weak
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:73

llvm::SDep::getLatency
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142

llvm::SDep::isArtificial
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200

llvm::SDep::isCtrl
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161

llvm::SDep::getReg
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218

llvm::SDep::isCluster
bool isCluster() const
Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...
Definition: ScheduleDAG.h:206

llvm::SUnitIterator
Definition: ScheduleDAG.h:643

llvm::SUnitIterator::isArtificialDep
bool isArtificialDep() const
Definition: ScheduleDAG.h:686

llvm::SUnitIterator::isCtrlDep
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:683

llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242

llvm::SUnit::isCall
bool isCall
Is a function call.
Definition: ScheduleDAG.h:287

llvm::SUnit::TopReadyCycle
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:278

llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:270

llvm::SUnit::NumSuccsLeft
unsigned NumSuccsLeft
Definition: ScheduleDAG.h:275

llvm::SUnit::biasCriticalPath
void biasCriticalPath()
Orders this node's predecessor edges such that the critical path edge occurs first.
Definition: ScheduleDAG.cpp:325

llvm::SUnit::isUnbuffered
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:300

llvm::SUnit::getHeight
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:424

llvm::SUnit::Latency
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:303

llvm::SUnit::getDepth
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:416

llvm::SUnit::isScheduled
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:296

llvm::SUnit::NumPredsLeft
unsigned NumPredsLeft
Definition: ScheduleDAG.h:274

llvm::SUnit::hasPhysRegDefs
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:292

llvm::SUnit::BotReadyCycle
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:279

llvm::SUnit::Succs
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:263

llvm::SUnit::hasReservedResource
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:301

llvm::SUnit::WeakPredsLeft
unsigned WeakPredsLeft
Definition: ScheduleDAG.h:276

llvm::SUnit::isBottomReady
bool isBottomReady() const
Definition: ScheduleDAG.h:467

llvm::SUnit::hasPhysRegUses
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:291

llvm::SUnit::isTopReady
bool isTopReady() const
Definition: ScheduleDAG.h:464

llvm::SUnit::Preds
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:262

llvm::SUnit::WeakSuccsLeft
unsigned WeakSuccsLeft
Definition: ScheduleDAG.h:277

llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:390

llvm::SchedBoundary
Each Scheduling boundary is associated with ready queues.
Definition: MachineScheduler.h:841

llvm::SchedBoundary::getNextResourceCycleByInstance
unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource unit can be scheduled.
Definition: MachineScheduler.cpp:2347

llvm::SchedBoundary::releasePending
void releasePending()
Release pending ready nodes in to the available queue.
Definition: MachineScheduler.cpp:2840

llvm::SchedBoundary::getDependentLatency
unsigned getDependentLatency() const
Definition: MachineScheduler.h:976

llvm::SchedBoundary::getScheduledLatency
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
Definition: MachineScheduler.h:981

llvm::SchedBoundary::incExecutedResources
void incExecutedResources(unsigned PIdx, unsigned Count)
Definition: MachineScheduler.cpp:2622

llvm::SchedBoundary::isResourceLimited
bool isResourceLimited() const
Definition: MachineScheduler.h:1012

llvm::SchedBoundary::SchedModel
const TargetSchedModel * SchedModel
Definition: MachineScheduler.h:851

llvm::SchedBoundary::getExecutedCount
unsigned getExecutedCount() const
Get a scaled count for the minimum execution time of the scheduled micro-ops that are ready to execut...
Definition: MachineScheduler.h:1004

llvm::SchedBoundary::getLatencyStallCycles
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
Definition: MachineScheduler.cpp:2335

llvm::SchedBoundary::findMaxLatency
unsigned findMaxLatency(ArrayRef< SUnit * > ReadySUs)
Definition: MachineScheduler.cpp:2503

llvm::SchedBoundary::DAG
ScheduleDAGMI * DAG
Definition: MachineScheduler.h:850

llvm::SchedBoundary::dumpReservedCycles
void dumpReservedCycles() const
Dump the state of the information that tracks resource usage.
Definition: MachineScheduler.cpp:2914

llvm::SchedBoundary::isTop
bool isTop() const
Definition: MachineScheduler.h:965

llvm::SchedBoundary::getOtherResourceCount
unsigned getOtherResourceCount(unsigned &OtherCritIdx)
Definition: MachineScheduler.cpp:2524

llvm::SchedBoundary::Rem
SchedRemainder * Rem
Definition: MachineScheduler.h:852

llvm::SchedBoundary::bumpNode
void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
Definition: MachineScheduler.cpp:2675

llvm::SchedBoundary::getCriticalCount
unsigned getCriticalCount() const
Get the scaled count of scheduled micro-ops and resources, including executed resources.
Definition: MachineScheduler.h:995

llvm::SchedBoundary::pickOnlyChoice
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
Definition: MachineScheduler.cpp:2879

llvm::SchedBoundary::releaseNode
void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, unsigned Idx=0)
Release SU to make it ready.
Definition: MachineScheduler.cpp:2550

llvm::SchedBoundary::countResource
unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, unsigned Cycles, unsigned ReadyCycle, unsigned StartAtCycle)
Add the given processor resource to this scheduled zone.
Definition: MachineScheduler.cpp:2638

llvm::SchedBoundary::~SchedBoundary
~SchedBoundary()
Definition: MachineScheduler.cpp:2228

llvm::SchedBoundary::HazardRec
ScheduleHazardRecognizer * HazardRec
Definition: MachineScheduler.h:857

llvm::SchedBoundary::isUnbufferedGroup
bool isUnbufferedGroup(unsigned PIdx) const
Definition: MachineScheduler.h:1027

llvm::SchedBoundary::Available
ReadyQueue Available
Definition: MachineScheduler.h:854

llvm::SchedBoundary::init
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
Definition: MachineScheduler.cpp:2301

llvm::SchedBoundary::getResourceCount
unsigned getResourceCount(unsigned ResIdx) const
Definition: MachineScheduler.h:989

llvm::SchedBoundary::bumpCycle
void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
Definition: MachineScheduler.cpp:2584

llvm::SchedBoundary::getCurrMOps
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
Definition: MachineScheduler.h:973

llvm::SchedBoundary::getCurrCycle
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
Definition: MachineScheduler.h:970

llvm::SchedBoundary::Pending
ReadyQueue Pending
Definition: MachineScheduler.h:855

llvm::SchedBoundary::reset
void reset()
Definition: MachineScheduler.cpp:2243

llvm::SchedBoundary::checkHazard
bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
Definition: MachineScheduler.cpp:2454

llvm::SchedBoundary::getNextResourceCycle
std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource can be scheduled.
Definition: MachineScheduler.cpp:2373

llvm::SchedBoundary::dumpScheduledState
void dumpScheduledState() const
Definition: MachineScheduler.cpp:2940

llvm::SchedBoundary::removeReady
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
Definition: MachineScheduler.cpp:2867

llvm::SchedBoundary::getZoneCritResIdx
unsigned getZoneCritResIdx() const
Definition: MachineScheduler.h:1009

llvm::SchedBoundary::getUnscheduledLatency
unsigned getUnscheduledLatency(SUnit *SU) const
Definition: MachineScheduler.h:985

llvm::SchedDFSResult
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:65

llvm::SchedDFSResult::getNumInstrs
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
Definition: ScheduleDFS.h:145

llvm::SchedDFSResult::getSubtreeID
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:169

llvm::SchedDFSResult::clear
void clear()
Clear the results.
Definition: ScheduleDFS.h:128

llvm::SchedDFSResult::getILP
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
Definition: ScheduleDFS.h:158

llvm::SchedDFSResult::compute
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
Definition: ScheduleDAGInstrs.cpp:1465

llvm::SchedDFSResult::getNumSubtrees
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
Definition: ScheduleDFS.h:163

llvm::SchedDFSResult::getSubtreeLevel
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:180

llvm::SchedDFSResult::resize
void resize(unsigned NumSUnits)
Initialize the result data with the size of the DAG.
Definition: ScheduleDFS.h:136

llvm::SchedDFSResult::scheduleTree
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
Definition: ScheduleDAGInstrs.cpp:1511

llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition: ScheduleDAGInstrs.h:114

llvm::ScheduleDAGInstrs::finishBlock
virtual void finishBlock()
Cleans up after scheduling in the given block.
Definition: ScheduleDAGInstrs.cpp:185

llvm::ScheduleDAGInstrs::end
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
Definition: ScheduleDAGInstrs.h:287

llvm::ScheduleDAGInstrs::BB
MachineBasicBlock * BB
The block in which to insert instructions.
Definition: ScheduleDAGInstrs.h:140

llvm::ScheduleDAGInstrs::FirstDbgValue
MachineInstr * FirstDbgValue
Definition: ScheduleDAGInstrs.h:256

llvm::ScheduleDAGInstrs::startBlock
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
Definition: ScheduleDAGInstrs.cpp:181

llvm::ScheduleDAGInstrs::getSchedModel
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
Definition: ScheduleDAGInstrs.h:269

llvm::ScheduleDAGInstrs::RegionEnd
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
Definition: ScheduleDAGInstrs.h:146

llvm::ScheduleDAGInstrs::getSchedClass
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
Definition: ScheduleDAGInstrs.h:272

llvm::ScheduleDAGInstrs::DbgValues
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
Definition: ScheduleDAGInstrs.h:255

llvm::ScheduleDAGInstrs::addEdge
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
Definition: ScheduleDAGInstrs.cpp:1218

llvm::ScheduleDAGInstrs::DumpDirection
DumpDirection
The direction that should be used to dump the scheduled Sequence.
Definition: ScheduleDAGInstrs.h:188

llvm::ScheduleDAGInstrs::BottomUp
@ BottomUp
Definition: ScheduleDAGInstrs.h:190

llvm::ScheduleDAGInstrs::TopDown
@ TopDown
Definition: ScheduleDAGInstrs.h:189

llvm::ScheduleDAGInstrs::Bidirectional
@ Bidirectional
Definition: ScheduleDAGInstrs.h:191

llvm::ScheduleDAGInstrs::TrackLaneMasks
bool TrackLaneMasks
Whether lane masks should get tracked.
Definition: ScheduleDAGInstrs.h:134

llvm::ScheduleDAGInstrs::dumpNode
void dumpNode(const SUnit &SU) const override
Definition: ScheduleDAGInstrs.cpp:1174

llvm::ScheduleDAGInstrs::IsReachable
bool IsReachable(SUnit *SU, SUnit *TargetSU)
IsReachable - Checks if SU is reachable from TargetSU.
Definition: ScheduleDAGInstrs.h:279

llvm::ScheduleDAGInstrs::DumpDir
DumpDirection DumpDir
Definition: ScheduleDAGInstrs.h:198

llvm::ScheduleDAGInstrs::begin
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
Definition: ScheduleDAGInstrs.h:284

llvm::ScheduleDAGInstrs::buildSchedGraph
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
Definition: ScheduleDAGInstrs.cpp:735

llvm::ScheduleDAGInstrs::getSUnit
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
Definition: ScheduleDAGInstrs.h:397

llvm::ScheduleDAGInstrs::SchedModel
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
Definition: ScheduleDAGInstrs.h:120

llvm::ScheduleDAGInstrs::canAddEdge
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
Definition: ScheduleDAGInstrs.cpp:1214

llvm::ScheduleDAGInstrs::RegionBegin
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
Definition: ScheduleDAGInstrs.h:143

llvm::ScheduleDAGInstrs::enterRegion
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
Definition: ScheduleDAGInstrs.cpp:190

llvm::ScheduleDAGInstrs::dump
void dump() const override
Definition: ScheduleDAGInstrs.cpp:1185

llvm::ScheduleDAGInstrs::setDumpDirection
void setDumpDirection(DumpDirection D)
Definition: ScheduleDAGInstrs.h:195

llvm::ScheduleDAGMILive
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
Definition: MachineScheduler.h:407

llvm::ScheduleDAGMILive::scheduleMI
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
Definition: MachineScheduler.cpp:1645

llvm::ScheduleDAGMILive::schedule
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
Definition: MachineScheduler.cpp:1455

llvm::ScheduleDAGMILive::VRegUses
VReg2SUnitMultiMap VRegUses
Maps vregs to the SUnits of their uses in the current scheduling region.
Definition: MachineScheduler.h:419

llvm::ScheduleDAGMILive::computeDFSResult
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
Definition: MachineScheduler.cpp:1538

llvm::ScheduleDAGMILive::getPressureDiff
PressureDiff & getPressureDiff(const SUnit *SU)
Definition: MachineScheduler.h:475

llvm::ScheduleDAGMILive::DFSResult
SchedDFSResult * DFSResult
Information about DAG subtrees.
Definition: MachineScheduler.h:413

llvm::ScheduleDAGMILive::enterRegion
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
Definition: MachineScheduler.cpp:1230

llvm::ScheduleDAGMILive::initQueues
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
Definition: MachineScheduler.cpp:1635

llvm::ScheduleDAGMILive::ShouldTrackLaneMasks
bool ShouldTrackLaneMasks
Definition: MachineScheduler.h:428

llvm::ScheduleDAGMILive::BotRPTracker
RegPressureTracker BotRPTracker
Definition: MachineScheduler.h:443

llvm::ScheduleDAGMILive::buildDAGWithRegPressure
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
Definition: MachineScheduler.cpp:1515

llvm::ScheduleDAGMILive::RegionCriticalPSets
std::vector< PressureChange > RegionCriticalPSets
List of pressure sets that exceed the target's pressure limit before scheduling, listed in increasing...
Definition: MachineScheduler.h:435

llvm::ScheduleDAGMILive::updateScheduledPressure
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
Definition: MachineScheduler.cpp:1327

llvm::ScheduleDAGMILive::SUPressureDiffs
PressureDiffs SUPressureDiffs
Definition: MachineScheduler.h:424

llvm::ScheduleDAGMILive::computeCyclicCriticalPath
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
Definition: MachineScheduler.cpp:1574

llvm::ScheduleDAGMILive::initRegPressure
void initRegPressure()
Definition: MachineScheduler.cpp:1252

llvm::ScheduleDAGMILive::updatePressureDiffs
void updatePressureDiffs(ArrayRef< VRegMaskOrUnit > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
Definition: MachineScheduler.cpp:1355

llvm::ScheduleDAGMILive::collectVRegUses
void collectVRegUses(SUnit &SU)
Definition: MachineScheduler.cpp:1188

llvm::ScheduleDAGMILive::RegClassInfo
RegisterClassInfo * RegClassInfo
Definition: MachineScheduler.h:409

llvm::ScheduleDAGMILive::getDFSResult
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
Definition: MachineScheduler.h:487

llvm::ScheduleDAGMILive::RPTracker
RegPressureTracker RPTracker
Definition: MachineScheduler.h:430

llvm::ScheduleDAGMILive::ShouldTrackPressure
bool ShouldTrackPressure
Register pressure in this region computed by initRegPressure.
Definition: MachineScheduler.h:427

llvm::ScheduleDAGMILive::~ScheduleDAGMILive
~ScheduleDAGMILive() override
Definition: MachineScheduler.cpp:1184

llvm::ScheduleDAGMILive::dump
void dump() const override
Definition: MachineScheduler.cpp:1422

llvm::ScheduleDAGMILive::getScheduledTrees
BitVector & getScheduledTrees()
Definition: MachineScheduler.h:489

llvm::ScheduleDAGMILive::LiveRegionEnd
MachineBasicBlock::iterator LiveRegionEnd
Definition: MachineScheduler.h:416

llvm::ScheduleDAGMILive::ScheduledTrees
BitVector ScheduledTrees
Definition: MachineScheduler.h:414

llvm::ScheduleDAGMILive::TopRPTracker
RegPressureTracker TopRPTracker
Definition: MachineScheduler.h:439

llvm::ScheduleDAGMI
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
Definition: MachineScheduler.h:285

llvm::ScheduleDAGMI::dumpSchedule
void dumpSchedule() const
dump the scheduled Sequence.
Definition: MachineScheduler.cpp:1157

llvm::ScheduleDAGMI::SchedImpl
std::unique_ptr< MachineSchedStrategy > SchedImpl
Definition: MachineScheduler.h:289

llvm::ScheduleDAGMI::startBlock
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
Definition: MachineScheduler.cpp:761

llvm::ScheduleDAGMI::releasePred
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
Definition: MachineScheduler.cpp:728

llvm::ScheduleDAGMI::initQueues
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
Definition: MachineScheduler.cpp:929

llvm::ScheduleDAGMI::addMutation
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
Definition: MachineScheduler.h:337

llvm::ScheduleDAGMI::moveInstruction
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
Definition: MachineScheduler.cpp:797

llvm::ScheduleDAGMI::releasePredecessors
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU's predecessors.
Definition: MachineScheduler.cpp:756

llvm::ScheduleDAGMI::postProcessDAG
void postProcessDAG()
Apply each ScheduleDAGMutation step in order.
Definition: MachineScheduler.cpp:904

llvm::ScheduleDAGMI::NextClusterSucc
const SUnit * NextClusterSucc
Definition: MachineScheduler.h:302

llvm::ScheduleDAGMI::dumpScheduleTraceTopDown
void dumpScheduleTraceTopDown() const
Print execution trace of the schedule top-down or bottom-up.
Definition: MachineScheduler.cpp:993

llvm::ScheduleDAGMI::NextClusterPred
const SUnit * NextClusterPred
Record the next node in a scheduled cluster.
Definition: MachineScheduler.h:301

llvm::ScheduleDAGMI::checkSchedLimit
bool checkSchedLimit()
Definition: MachineScheduler.cpp:815

llvm::ScheduleDAGMI::top
MachineBasicBlock::iterator top() const
Definition: MachineScheduler.h:342

llvm::ScheduleDAGMI::schedule
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
Definition: MachineScheduler.cpp:830

llvm::ScheduleDAGMI::findRootsAndBiasEdges
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
Definition: MachineScheduler.cpp:910

llvm::ScheduleDAGMI::bottom
MachineBasicBlock::iterator bottom() const
Definition: MachineScheduler.h:343

llvm::ScheduleDAGMI::CurrentBottom
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
Definition: MachineScheduler.h:298

llvm::ScheduleDAGMI::hasVRegLiveness
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
Definition: MachineScheduler.h:330

llvm::ScheduleDAGMI::enterRegion
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
Definition: MachineScheduler.cpp:775

llvm::ScheduleDAGMI::AA
AAResults * AA
Definition: MachineScheduler.h:287

llvm::ScheduleDAGMI::getLIS
LiveIntervals * getLIS() const
Definition: MachineScheduler.h:327

llvm::ScheduleDAGMI::viewGraph
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
Definition: MachineScheduler.cpp:4462

llvm::ScheduleDAGMI::releaseSucc
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
Definition: MachineScheduler.cpp:691

llvm::ScheduleDAGMI::dumpScheduleTraceBottomUp
void dumpScheduleTraceBottomUp() const
Definition: MachineScheduler.cpp:1074

llvm::ScheduleDAGMI::~ScheduleDAGMI
~ScheduleDAGMI() override

llvm::ScheduleDAGMI::finishBlock
void finishBlock() override
Cleans up after scheduling in the given block.
Definition: MachineScheduler.cpp:766

llvm::ScheduleDAGMI::LIS
LiveIntervals * LIS
Definition: MachineScheduler.h:288

llvm::ScheduleDAGMI::getNextClusterPred
const SUnit * getNextClusterPred() const
Definition: MachineScheduler.h:364

llvm::ScheduleDAGMI::updateQueues
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
Definition: MachineScheduler.cpp:959

llvm::ScheduleDAGMI::placeDebugValues
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
Definition: MachineScheduler.cpp:970

llvm::ScheduleDAGMI::CurrentTop
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
Definition: MachineScheduler.h:295

llvm::ScheduleDAGMI::releaseSuccessors
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU's successors.
Definition: MachineScheduler.cpp:719

llvm::ScheduleDAGMI::getNextClusterSucc
const SUnit * getNextClusterSucc() const
Definition: MachineScheduler.h:366

llvm::ScheduleDAGMI::Mutations
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
Definition: MachineScheduler.h:292

llvm::ScheduleDAGMutation
Mutate the DAG as a postpass after normal DAG building.
Definition: ScheduleDAGMutation.h:22

llvm::ScheduleDAG
Definition: ScheduleDAG.h:572

llvm::ScheduleDAG::MRI
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:578

llvm::ScheduleDAG::TII
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:575

llvm::ScheduleDAG::SUnits
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:579

llvm::ScheduleDAG::TRI
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:576

llvm::ScheduleDAG::EntrySU
SUnit EntrySU
Special node for the region entry.
Definition: ScheduleDAG.h:580

llvm::ScheduleDAG::MF
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:577

llvm::ScheduleDAG::dumpNodeAll
void dumpNodeAll(const SUnit &SU) const
Definition: ScheduleDAG.cpp:365

llvm::ScheduleDAG::ExitSU
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:581

llvm::ScheduleHazardRecognizer::RecedeCycle
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
Definition: ScheduleHazardRecognizer.h:109

llvm::ScheduleHazardRecognizer::Reset
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
Definition: ScheduleHazardRecognizer.h:67

llvm::ScheduleHazardRecognizer::EmitInstruction
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
Definition: ScheduleHazardRecognizer.h:71

llvm::ScheduleHazardRecognizer::NoHazard
@ NoHazard
Definition: ScheduleHazardRecognizer.h:38

llvm::ScheduleHazardRecognizer::AdvanceCycle
virtual void AdvanceCycle()
AdvanceCycle - This callback is invoked whenever the next top-down instruction to be scheduled cannot...
Definition: ScheduleHazardRecognizer.h:104

llvm::ScheduleHazardRecognizer::getHazardType
virtual HazardType getHazardType(SUnit *, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
Definition: ScheduleHazardRecognizer.h:60

llvm::ScheduleHazardRecognizer::isEnabled
bool isEnabled() const
Definition: ScheduleHazardRecognizer.h:45

llvm::SlotIndex
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65

llvm::SlotIndex::isSameInstr
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:176

llvm::SlotIndex::getRegSlot
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:237

llvm::SlotIndexesWrapperPass
Definition: SlotIndexes.h:663

llvm::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:78

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573

llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:610

llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:638

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:413

llvm::SmallVectorTemplateCommon::end
iterator end()
Definition: SmallVector.h:269

llvm::SmallVectorTemplateCommon::const_reverse_iterator
std::reverse_iterator< const_iterator > const_reverse_iterator
Definition: SmallVector.h:254

llvm::SmallVectorTemplateCommon::begin
iterator begin()
Definition: SmallVector.h:267

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196

llvm::SparseMultiSet::find
iterator find(const KeyT &Key)
Find an element by its key.
Definition: SparseMultiSet.h:375

llvm::SparseMultiSet::clear
void clear()
Clears the set.
Definition: SparseMultiSet.h:342

llvm::SparseMultiSet::end
iterator end()
Returns an iterator past this container.
Definition: SparseMultiSet.h:319

llvm::SparseMultiSet::insert
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
Definition: SparseMultiSet.h:419

llvm::SparseMultiSet< VReg2SUnit, VirtReg2IndexFunctor >::iterator
iterator_base< SparseMultiSet * > iterator
Definition: SparseMultiSet.h:311

llvm::SparseMultiSet::setUniverse
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
Definition: SparseMultiSet.h:202

llvm::SrcOp
Definition: MachineIRBuilder.h:142

llvm::SrcOp::getReg
Register getReg() const
Definition: MachineIRBuilder.h:194

llvm::TargetFrameLowering
Information about stack frame layout on the target.
Definition: TargetFrameLowering.h:45

llvm::TargetFrameLowering::StackGrowsDown
@ StackGrowsDown
Definition: TargetFrameLowering.h:49

llvm::TargetFrameLowering::getStackGrowthDirection
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
Definition: TargetFrameLowering.h:94

llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:112

llvm::TargetInstrInfo::CreateTargetMIHazardRecognizer
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
Definition: TargetInstrInfo.cpp:1413

llvm::TargetLoweringBase::getRegClassFor
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
Definition: TargetLowering.h:1042

llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition: TargetLowering.h:1093

llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3780

llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:85

llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:235

llvm::TargetRegisterInfo::getRegPressureSetName
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.

llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:30

llvm::TargetSchedModel::getResourceName
const char * getResourceName(unsigned PIdx) const
Definition: TargetSchedule.h:122

llvm::TargetSchedModel::mustEndGroup
bool mustEndGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if current group must end.
Definition: TargetSchedule.cpp:83

llvm::TargetSchedModel::getIssueWidth
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
Definition: TargetSchedule.h:98

llvm::TargetSchedModel::getMicroOpFactor
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
Definition: TargetSchedule.h:149

llvm::TargetSchedModel::getWriteProcResEnd
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
Definition: TargetSchedule.h:137

llvm::TargetSchedModel::hasInstrSchedModel
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
Definition: TargetSchedule.cpp:42

llvm::TargetSchedModel::mustBeginGroup
bool mustBeginGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if new group must begin.
Definition: TargetSchedule.cpp:72

llvm::TargetSchedModel::getLatencyFactor
unsigned getLatencyFactor() const
Multiply cycle count by this factor to normalize it relative to other resources.
Definition: TargetSchedule.h:155

llvm::TargetSchedModel::getResourceFactor
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
Definition: TargetSchedule.h:143

llvm::TargetSchedModel::getMicroOpBufferSize
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
Definition: TargetSchedule.h:160

llvm::TargetSchedModel::getNumMicroOps
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
Definition: TargetSchedule.cpp:94

llvm::TargetSchedModel::getProcResource
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
Definition: TargetSchedule.h:117

llvm::TargetSchedModel::getNumProcResourceKinds
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
Definition: TargetSchedule.h:112

llvm::TargetSchedModel::getInstrItineraries
const InstrItineraryData * getInstrItineraries() const
Definition: TargetSchedule.h:82

llvm::TargetSchedModel::enableIntervals
bool enableIntervals() const
Definition: TargetSchedule.cpp:343

llvm::TargetSchedModel::getWriteProcResBegin
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
Definition: TargetSchedule.h:133

llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:63

llvm::TargetSubtargetInfo::getMacroFusions
virtual std::vector< MacroFusionPredTy > getMacroFusions() const
Get the list of MacroFusion predicates.
Definition: TargetSubtargetInfo.h:345

llvm::TargetSubtargetInfo::enableMachineScheduler
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
Definition: TargetSubtargetInfo.cpp:36

llvm::TargetSubtargetInfo::overridePostRASchedPolicy
virtual void overridePostRASchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic post-ra scheduling policy within a region.
Definition: TargetSubtargetInfo.h:243

llvm::TargetSubtargetInfo::overrideSchedPolicy
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
Definition: TargetSubtargetInfo.h:233

llvm::TargetSubtargetInfo::enablePostRAMachineScheduler
virtual bool enablePostRAMachineScheduler() const
True if the subtarget should run a machine scheduler after register allocation.
Definition: TargetSubtargetInfo.cpp:53

llvm::TargetSubtargetInfo::getFrameLowering
virtual const TargetFrameLowering * getFrameLowering() const
Definition: TargetSubtargetInfo.h:98

llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition: TargetSubtargetInfo.h:97

llvm::TargetSubtargetInfo::getTargetLowering
virtual const TargetLowering * getTargetLowering() const
Definition: TargetSubtargetInfo.h:101

llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81

llvm::VNInfo
VNInfo - Value Number Information.
Definition: LiveInterval.h:53

llvm::VNInfo::def
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61

llvm::VNInfo::isPHIDef
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:78

llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:399

llvm::cl::opt
Definition: CommandLine.h:1423

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52

llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661

unsigned

iterator_range.h
This provides a very simple, boring adaptor for a begin and end iterator into a range type.

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

false
Definition: StackSlotColoring.cpp:193

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::DOT::getColorString
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:91

llvm::MISched::Bidirectional
@ Bidirectional
Definition: MachineScheduler.h:107

llvm::MISched::BottomUp
@ BottomUp
Definition: MachineScheduler.h:106

llvm::MISched::Unspecified
@ Unspecified
Definition: MachineScheduler.h:104

llvm::MISched::TopDown
@ TopDown
Definition: MachineScheduler.h:105

llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:371

llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp

llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:137

llvm::cl::apply
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1309

llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443

llvm::codeview::FrameCookieKind::Copy
@ Copy

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::Offset
@ Offset
Definition: DWP.cpp:480

llvm::operator<
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361

llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:2037

llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739

llvm::PrintDAGs
cl::opt< bool > PrintDAGs

llvm::getWeakLeft
unsigned getWeakLeft(const SUnit *SU, bool isTop)
Definition: MachineScheduler.cpp:3428

llvm::createMacroFusionDAGMutation
std::unique_ptr< ScheduleDAGMutation > createMacroFusionDAGMutation(ArrayRef< MacroFusionPredTy > Predicates, bool BranchOnly=false)
Create a DAG scheduling mutation to pair instructions back to back for instructions that benefit acco...
Definition: MacroFusion.cpp:215

llvm::right_justify
FormattedString right_justify(StringRef Str, unsigned Width)
right_justify - add spaces before string so total output is Width characters.
Definition: Format.h:153

llvm::PostRADirection
cl::opt< MISched::Direction > PostRADirection("misched-postra-direction", cl::Hidden, cl::desc("Post reg-alloc list scheduling direction"), cl::init(MISched::Unspecified), cl::values(clEnumValN(MISched::TopDown, "topdown", "Force top-down post reg-alloc list scheduling"), clEnumValN(MISched::BottomUp, "bottomup", "Force bottom-up post reg-alloc list scheduling"), clEnumValN(MISched::Bidirectional, "bidirectional", "Force bidirectional post reg-alloc list scheduling")))

llvm::Latency
@ Latency
Definition: SIMachineScheduler.h:34

llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:77

llvm::PrintLaneMask
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:92

llvm::MISchedDumpReservedCycles
cl::opt< bool > MISchedDumpReservedCycles("misched-dump-reserved-cycles", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))

llvm::initializePostMachineSchedulerPass
void initializePostMachineSchedulerPass(PassRegistry &)

llvm::VerifyScheduling
cl::opt< bool > VerifyScheduling

llvm::MachineSchedulerID
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
Definition: MachineScheduler.cpp:264

llvm::PostMachineSchedulerID
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
Definition: MachineScheduler.cpp:295

llvm::createGenericSchedLive
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
Definition: MachineScheduler.cpp:3845

llvm::ViewMISchedDAGs
cl::opt< bool > ViewMISchedDAGs

llvm::tryPressure
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
Definition: MachineScheduler.cpp:3389

llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664

llvm::printVRegOrUnit
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
Definition: TargetRegisterInfo.cpp:161

llvm::createStoreClusterDAGMutation
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
Definition: MachineScheduler.cpp:1829

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163

llvm::DumpCriticalPathLength
cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))

llvm::tryLatency
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
Definition: MachineScheduler.cpp:3202

llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:907

llvm::initializeMachineSchedulerPass
void initializeMachineSchedulerPass(PassRegistry &)

llvm::createGenericSchedPostRA
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
Definition: MachineScheduler.cpp:4153

llvm::left_justify
FormattedString left_justify(StringRef Str, unsigned Width)
left_justify - append spaces after string so total output is Width characters.
Definition: Format.h:146

llvm::PreRADirection
cl::opt< MISched::Direction > PreRADirection

llvm::createLoadClusterDAGMutation
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
Definition: MachineScheduler.cpp:1820

llvm::tryGreater
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Definition: MachineScheduler.cpp:3186

llvm::ViewGraph
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427

llvm::dumpRegSetPressure
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
Definition: RegisterPressure.cpp:80

llvm::tryLess
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
Definition: MachineScheduler.cpp:3170

llvm::createCopyConstrainDAGMutation
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Definition: MachineScheduler.cpp:2052

llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:107

llvm::printMBBReference
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition: MachineBasicBlock.cpp:122

llvm::MischedDetailResourceBooking
cl::opt< bool > MischedDetailResourceBooking("misched-detail-resource-booking", cl::Hidden, cl::init(false), cl::desc("Show details of invoking getNextResoufceCycle."))

llvm::biasPhysReg
int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Definition: MachineScheduler.cpp:3439

std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860

raw_ostream.h

N
#define N

llvm::DOTGraphTraits< ScheduleDAGMI * >::getNodeDescription
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
Definition: MachineScheduler.cpp:4429

llvm::DOTGraphTraits< ScheduleDAGMI * >::getEdgeAttributes
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method.
Definition: MachineScheduler.cpp:4407

llvm::DOTGraphTraits< ScheduleDAGMI * >::getGraphName
static std::string getGraphName(const ScheduleDAG *G)
Definition: MachineScheduler.cpp:4390

llvm::DOTGraphTraits< ScheduleDAGMI * >::getNodeLabel
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
Definition: MachineScheduler.cpp:4417

llvm::DOTGraphTraits< ScheduleDAGMI * >::isNodeHidden
static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G)
Definition: MachineScheduler.cpp:4398

llvm::DOTGraphTraits< ScheduleDAGMI * >::DOTGraphTraits
DOTGraphTraits(bool isSimple=false)
Definition: MachineScheduler.cpp:4388

llvm::DOTGraphTraits< ScheduleDAGMI * >::getNodeAttributes
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
Definition: MachineScheduler.cpp:4433

llvm::DOTGraphTraits< ScheduleDAGMI * >::renderGraphFromBottomUp
static bool renderGraphFromBottomUp()
Definition: MachineScheduler.cpp:4394

llvm::DOTGraphTraits
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
Definition: DOTGraphTraits.h:166

llvm::DefaultDOTGraphTraits
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Definition: DOTGraphTraits.h:28

llvm::GenericSchedulerBase::CandPolicy
Policy for scheduling the next instruction in the candidate's zone.
Definition: MachineScheduler.h:1090

llvm::GenericSchedulerBase::CandPolicy::ReduceResIdx
unsigned ReduceResIdx
Definition: MachineScheduler.h:1092

llvm::GenericSchedulerBase::CandPolicy::DemandResIdx
unsigned DemandResIdx
Definition: MachineScheduler.h:1093

llvm::GenericSchedulerBase::CandPolicy::ReduceLatency
bool ReduceLatency
Definition: MachineScheduler.h:1091

llvm::GenericSchedulerBase::SchedCandidate
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
Definition: MachineScheduler.h:1128

llvm::GenericSchedulerBase::SchedCandidate::setBest
void setBest(SchedCandidate &Best)
Definition: MachineScheduler.h:1161

llvm::GenericSchedulerBase::SchedCandidate::SU
SUnit * SU
Definition: MachineScheduler.h:1132

llvm::GenericSchedulerBase::SchedCandidate::reset
void reset(const CandPolicy &NewPolicy)
Definition: MachineScheduler.h:1149

llvm::GenericSchedulerBase::SchedCandidate::initResourceDelta
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Definition: MachineScheduler.cpp:2970

llvm::GenericSchedulerBase::SchedCandidate::RPDelta
RegPressureDelta RPDelta
Definition: MachineScheduler.h:1141

llvm::GenericSchedulerBase::SchedCandidate::AtTop
bool AtTop
Definition: MachineScheduler.h:1138

llvm::GenericSchedulerBase::SchedCandidate::ResDelta
SchedResourceDelta ResDelta
Definition: MachineScheduler.h:1144

llvm::GenericSchedulerBase::SchedCandidate::isValid
bool isValid() const
Definition: MachineScheduler.h:1158

llvm::GenericSchedulerBase::SchedCandidate::Reason
CandReason Reason
Definition: MachineScheduler.h:1135

llvm::GenericSchedulerBase::SchedCandidate::Policy
CandPolicy Policy
Definition: MachineScheduler.h:1129

llvm::GenericSchedulerBase::SchedResourceDelta
Status of an instruction's critical resource consumption.
Definition: MachineScheduler.h:1108

llvm::GenericSchedulerBase::SchedResourceDelta::CritResources
unsigned CritResources
Definition: MachineScheduler.h:1110

llvm::GenericSchedulerBase::SchedResourceDelta::DemandedResources
unsigned DemandedResources
Definition: MachineScheduler.h:1113

llvm::GraphTraits
Definition: GraphTraits.h:38

llvm::LaneBitmask::getNone
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:81

llvm::MCProcResourceDesc::BufferSize
int BufferSize
Definition: MCSchedule.h:51

llvm::MCProcResourceDesc::Name
const char * Name
Definition: MCSchedule.h:35

llvm::MCProcResourceDesc::SubUnitsIdxBegin
const unsigned * SubUnitsIdxBegin
Definition: MCSchedule.h:56

llvm::MCProcResourceDesc::NumUnits
unsigned NumUnits
Definition: MCSchedule.h:36

llvm::MCSchedClassDesc
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:121

llvm::MCWriteProcResEntry
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:66

llvm::MachineSchedContext
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Definition: MachineScheduler.h:136

llvm::MachineSchedContext::RegClassInfo
RegisterClassInfo * RegClassInfo
Definition: MachineScheduler.h:144

llvm::MachineSchedContext::~MachineSchedContext
virtual ~MachineSchedContext()
Definition: MachineScheduler.cpp:212

llvm::MachineSchedContext::MachineSchedContext
MachineSchedContext()
Definition: MachineScheduler.cpp:208

llvm::MachineSchedPolicy::OnlyTopDown
bool OnlyTopDown
Definition: MachineScheduler.h:201

llvm::MachineSchedPolicy::DisableLatencyHeuristic
bool DisableLatencyHeuristic
Definition: MachineScheduler.h:206

llvm::MachineSchedPolicy::OnlyBottomUp
bool OnlyBottomUp
Definition: MachineScheduler.h:202

llvm::MachineSchedPolicy::ComputeDFSResult
bool ComputeDFSResult
Definition: MachineScheduler.h:209

llvm::MachineSchedPolicy::ShouldTrackPressure
bool ShouldTrackPressure
Definition: MachineScheduler.h:194

llvm::MachineSchedPolicy::ShouldTrackLaneMasks
bool ShouldTrackLaneMasks
Track LaneMasks to allow reordering of independent subregister writes of the same vreg.
Definition: MachineScheduler.h:197

llvm::MemOp
Definition: TargetLowering.h:115

llvm::RegPressureDelta::CriticalMax
PressureChange CriticalMax
Definition: RegisterPressure.h:242

llvm::RegPressureDelta::CurrentMax
PressureChange CurrentMax
Definition: RegisterPressure.h:243

llvm::RegPressureDelta::Excess
PressureChange Excess
Definition: RegisterPressure.h:241

llvm::RegionPressure
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
Definition: RegisterPressure.h:82

llvm::RegisterPressure::LiveOutRegs
SmallVector< VRegMaskOrUnit, 8 > LiveOutRegs
Definition: RegisterPressure.h:53

llvm::RegisterPressure::LiveInRegs
SmallVector< VRegMaskOrUnit, 8 > LiveInRegs
List of live in virtual registers or physical register units.
Definition: RegisterPressure.h:52

llvm::RegisterPressure::MaxSetPressure
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
Definition: RegisterPressure.h:49

llvm::SchedRemainder
Summarize the unscheduled region.
Definition: MachineScheduler.h:597

llvm::SchedRemainder::init
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Definition: MachineScheduler.cpp:2279

llvm::SchedRemainder::RemIssueCount
unsigned RemIssueCount
Definition: MachineScheduler.h:603

llvm::SchedRemainder::CriticalPath
unsigned CriticalPath
Definition: MachineScheduler.h:599

llvm::SchedRemainder::CyclicCritPath
unsigned CyclicCritPath
Definition: MachineScheduler.h:600

llvm::SchedRemainder::RemainingCounts
SmallVector< unsigned, 16 > RemainingCounts
Definition: MachineScheduler.h:608

llvm::SchedRemainder::IsAcyclicLatencyLimited
bool IsAcyclicLatencyLimited
Definition: MachineScheduler.h:605

llvm::SchedRemainder::reset
void reset()
Definition: MachineScheduler.h:612

llvm::VReg2SUnit
An individual mapping from virtual register number to SUnit.
Definition: ScheduleDAGInstrs.h:52

llvm::VRegMaskOrUnit
Definition: RegisterPressure.h:38

llvm::cl::desc
Definition: CommandLine.h:409