doxygen/MachineScheduler_8cpp_source.html

//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// MachineScheduler schedules machine instructions after phi elimination. It

// preserves LiveIntervals so it can be invoked before register allocation.

//

//===----------------------------------------------------------------------===//


#include "llvm/CodeGen/MachineScheduler.h"

#include "llvm/ADT/ArrayRef.h"

#include "llvm/ADT/BitVector.h"

#include "llvm/ADT/DenseMap.h"

#include "llvm/ADT/PriorityQueue.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/Statistic.h"

#include "llvm/ADT/iterator_range.h"

#include "llvm/Analysis/AliasAnalysis.h"

#include "llvm/CodeGen/LiveInterval.h"

#include "llvm/CodeGen/LiveIntervals.h"

#include "llvm/CodeGen/MachineBasicBlock.h"

#include "llvm/CodeGen/MachineDominators.h"

#include "llvm/CodeGen/MachineFunction.h"

#include "llvm/CodeGen/MachineFunctionPass.h"

#include "llvm/CodeGen/MachineInstr.h"

#include "llvm/CodeGen/MachineLoopInfo.h"

#include "llvm/CodeGen/MachineOperand.h"

#include "llvm/CodeGen/MachinePassRegistry.h"

#include "llvm/CodeGen/MachineRegisterInfo.h"

#include "llvm/CodeGen/RegisterClassInfo.h"

#include "llvm/CodeGen/RegisterPressure.h"

#include "llvm/CodeGen/ScheduleDAG.h"

#include "llvm/CodeGen/ScheduleDAGInstrs.h"

#include "llvm/CodeGen/ScheduleDAGMutation.h"

#include "llvm/CodeGen/ScheduleDFS.h"

#include "llvm/CodeGen/ScheduleHazardRecognizer.h"

#include "llvm/CodeGen/SlotIndexes.h"

#include "llvm/CodeGen/TargetFrameLowering.h"

#include "llvm/CodeGen/TargetInstrInfo.h"

#include "llvm/CodeGen/TargetLowering.h"

#include "llvm/CodeGen/TargetPassConfig.h"

#include "llvm/CodeGen/TargetRegisterInfo.h"

#include "llvm/CodeGen/TargetSchedule.h"

#include "llvm/CodeGen/TargetSubtargetInfo.h"

#include "llvm/CodeGenTypes/MachineValueType.h"

#include "llvm/Config/llvm-config.h"

#include "llvm/InitializePasses.h"

#include "llvm/MC/LaneBitmask.h"

#include "llvm/Pass.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Compiler.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/GraphWriter.h"

#include "llvm/Support/raw_ostream.h"

#include <algorithm>

#include <cassert>

#include <cstdint>

#include <iterator>

#include <limits>

#include <memory>

#include <string>

#include <tuple>

#include <utility>

#include <vector>


using namespace llvm;


#define DEBUG_TYPE "machine-scheduler"


STATISTIC(NumClustered, "Number of load/store pairs clustered");


namespace llvm {


cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,

                           cl::desc("Force top-down list scheduling"));

cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,

                            cl::desc("Force bottom-up list scheduling"));

namespace MISchedPostRASched {

enum Direction {

  TopDown,

  BottomUp,

  Bidirectional,

};

} // end namespace MISchedPostRASched

cl::opt<MISchedPostRASched::Direction> PostRADirection(

    "misched-postra-direction", cl::Hidden,

    cl::desc("Post reg-alloc list scheduling direction"),

    // Default to top-down because it was implemented first and existing targets

    // expect that behavior by default.

    cl::init(MISchedPostRASched::TopDown),

    cl::values(

        clEnumValN(MISchedPostRASched::TopDown, "topdown",

                   "Force top-down post reg-alloc list scheduling"),

        clEnumValN(MISchedPostRASched::BottomUp, "bottomup",

                   "Force bottom-up post reg-alloc list scheduling"),

        clEnumValN(MISchedPostRASched::Bidirectional, "bidirectional",

                   "Force bidirectional post reg-alloc list scheduling")));

cl::opt<bool>

DumpCriticalPathLength("misched-dcpl", cl::Hidden,

                       cl::desc("Print critical path length to stdout"));


cl::opt<bool> VerifyScheduling(

    "verify-misched", cl::Hidden,

    cl::desc("Verify machine instrs before and after machine scheduling"));


#ifndef NDEBUG

cl::opt<bool> ViewMISchedDAGs(

    "view-misched-dags", cl::Hidden,

    cl::desc("Pop up a window to show MISched dags after they are processed"));

cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,

                        cl::desc("Print schedule DAGs"));

cl::opt<bool> MISchedDumpReservedCycles(

    "misched-dump-reserved-cycles", cl::Hidden, cl::init(false),

    cl::desc("Dump resource usage at schedule boundary."));

cl::opt<bool> MischedDetailResourceBooking(

    "misched-detail-resource-booking", cl::Hidden, cl::init(false),

    cl::desc("Show details of invoking getNextResoufceCycle."));

#else

const bool ViewMISchedDAGs = false;

const bool PrintDAGs = false;

const bool MischedDetailResourceBooking = false;

#ifdef LLVM_ENABLE_DUMP

const bool MISchedDumpReservedCycles = false;

#endif // LLVM_ENABLE_DUMP

#endif // NDEBUG


} // end namespace llvm


#ifndef NDEBUG

/// In some situations a few uninteresting nodes depend on nearly all other

/// nodes in the graph, provide a cutoff to hide them.

static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,

  cl::desc("Hide nodes with more predecessor/successor than cutoff"));


static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,

  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));


static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,

  cl::desc("Only schedule this function"));

static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,

                                        cl::desc("Only schedule this MBB#"));

#endif // NDEBUG


/// Avoid quadratic complexity in unusually large basic blocks by limiting the

/// size of the ready lists.

static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,

  cl::desc("Limit ready list to N instructions"), cl::init(256));


static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,

  cl::desc("Enable register pressure scheduling."), cl::init(true));


static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,

  cl::desc("Enable cyclic critical path analysis."), cl::init(true));


static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,

                                        cl::desc("Enable memop clustering."),

                                        cl::init(true));

static cl::opt<bool>

    ForceFastCluster("force-fast-cluster", cl::Hidden,

                     cl::desc("Switch to fast cluster algorithm with the lost "

                              "of some fusion opportunities"),

                     cl::init(false));

static cl::opt<unsigned>

    FastClusterThreshold("fast-cluster-threshold", cl::Hidden,

                         cl::desc("The threshold for fast cluster"),

                         cl::init(1000));


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

static cl::opt<bool> MISchedDumpScheduleTrace(

    "misched-dump-schedule-trace", cl::Hidden, cl::init(false),

    cl::desc("Dump resource usage at schedule boundary."));

static cl::opt<unsigned>

    HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,

                   cl::desc("Set width of the columns with "

                            "the resources and schedule units"),

                   cl::init(19));

static cl::opt<unsigned>

    ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,

             cl::desc("Set width of the columns showing resource booking."),

             cl::init(5));

static cl::opt<bool> MISchedSortResourcesInTrace(

    "misched-sort-resources-in-trace", cl::Hidden, cl::init(true),

    cl::desc("Sort the resources printed in the dump trace"));

#endif


static cl::opt<unsigned>

    MIResourceCutOff("misched-resource-cutoff", cl::Hidden,

                     cl::desc("Number of intervals to track"), cl::init(10));


// DAG subtrees must have at least this many nodes.

static const unsigned MinSubtreeSize = 8;


// Pin the vtables to this file.

void MachineSchedStrategy::anchor() {}


void ScheduleDAGMutation::anchor() {}


//===----------------------------------------------------------------------===//

// Machine Instruction Scheduling Pass and Registry

//===----------------------------------------------------------------------===//


MachineSchedContext::MachineSchedContext() {

  RegClassInfo = new RegisterClassInfo();

}


MachineSchedContext::~MachineSchedContext() {

  delete RegClassInfo;

}


namespace {


/// Base class for a machine scheduler class that can run at any point.

class MachineSchedulerBase : public MachineSchedContext,

                             public MachineFunctionPass {

public:

  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}


  void print(raw_ostream &O, const Module* = nullptr) const override;


protected:

  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);

};


/// MachineScheduler runs after coalescing and before register allocation.

class MachineScheduler : public MachineSchedulerBase {

public:

  MachineScheduler();


  void getAnalysisUsage(AnalysisUsage &AU) const override;


  bool runOnMachineFunction(MachineFunction&) override;


  static char ID; // Class identification, replacement for typeinfo


protected:

  ScheduleDAGInstrs *createMachineScheduler();

};


/// PostMachineScheduler runs after shortly before code emission.

class PostMachineScheduler : public MachineSchedulerBase {

public:

  PostMachineScheduler();


  void getAnalysisUsage(AnalysisUsage &AU) const override;


  bool runOnMachineFunction(MachineFunction&) override;


  static char ID; // Class identification, replacement for typeinfo


protected:

  ScheduleDAGInstrs *createPostMachineScheduler();

};


} // end anonymous namespace


char MachineScheduler::ID = 0;


char &llvm::MachineSchedulerID = MachineScheduler::ID;


INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,

                      "Machine Instruction Scheduler", false, false)

INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)

INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)

INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)

INITIALIZE_PASS_DEPENDENCY(SlotIndexes)

INITIALIZE_PASS_DEPENDENCY(LiveIntervals)

INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,

                    "Machine Instruction Scheduler", false, false)


MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {

  initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());

}


void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {

  AU.setPreservesCFG();

  AU.addRequired<MachineDominatorTree>();

  AU.addRequired<MachineLoopInfo>();

  AU.addRequired<AAResultsWrapperPass>();

  AU.addRequired<TargetPassConfig>();

  AU.addRequired<SlotIndexes>();

  AU.addPreserved<SlotIndexes>();

  AU.addRequired<LiveIntervals>();

  AU.addPreserved<LiveIntervals>();

  MachineFunctionPass::getAnalysisUsage(AU);

}


char PostMachineScheduler::ID = 0;


char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;


INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",

                      "PostRA Machine Instruction Scheduler", false, false)

INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)

INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)

INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)

INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",

                    "PostRA Machine Instruction Scheduler", false, false)


PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {

  initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());

}


void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {

  AU.setPreservesCFG();

  AU.addRequired<MachineDominatorTree>();

  AU.addRequired<MachineLoopInfo>();

  AU.addRequired<AAResultsWrapperPass>();

  AU.addRequired<TargetPassConfig>();

  MachineFunctionPass::getAnalysisUsage(AU);

}


MachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor>

    MachineSchedRegistry::Registry;


/// A dummy default scheduler factory indicates whether the scheduler

/// is overridden on the command line.

static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {

  return nullptr;

}


/// MachineSchedOpt allows command line selection of the scheduler.

static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,

               RegisterPassParser<MachineSchedRegistry>>

MachineSchedOpt("misched",

                cl::init(&useDefaultMachineSched), cl::Hidden,

                cl::desc("Machine instruction scheduler to use"));


static MachineSchedRegistry

DefaultSchedRegistry("default", "Use the target's default scheduler choice.",

                     useDefaultMachineSched);


static cl::opt<bool> EnableMachineSched(

    "enable-misched",

    cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),

    cl::Hidden);


static cl::opt<bool> EnablePostRAMachineSched(

    "enable-post-misched",

    cl::desc("Enable the post-ra machine instruction scheduling pass."),

    cl::init(true), cl::Hidden);


/// Decrement this iterator until reaching the top or a non-debug instr.

static MachineBasicBlock::const_iterator

priorNonDebug(MachineBasicBlock::const_iterator I,

              MachineBasicBlock::const_iterator Beg) {

  assert(I != Beg && "reached the top of the region, cannot decrement");

  while (--I != Beg) {

    if (!I->isDebugOrPseudoInstr())

      break;

  }

  return I;

}


/// Non-const version.

static MachineBasicBlock::iterator

priorNonDebug(MachineBasicBlock::iterator I,

              MachineBasicBlock::const_iterator Beg) {

  return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)

      .getNonConstIterator();

}


/// If this iterator is a debug value, increment until reaching the End or a

/// non-debug instruction.

static MachineBasicBlock::const_iterator

nextIfDebug(MachineBasicBlock::const_iterator I,

            MachineBasicBlock::const_iterator End) {

  for(; I != End; ++I) {

    if (!I->isDebugOrPseudoInstr())

      break;

  }

  return I;

}


/// Non-const version.

static MachineBasicBlock::iterator

nextIfDebug(MachineBasicBlock::iterator I,

            MachineBasicBlock::const_iterator End) {

  return nextIfDebug(MachineBasicBlock::const_iterator(I), End)

      .getNonConstIterator();

}


/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.

ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {

  // Select the scheduler, or set the default.

  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;

  if (Ctor != useDefaultMachineSched)

    return Ctor(this);


  // Get the default scheduler set by the target for this function.

  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);

  if (Scheduler)

    return Scheduler;


  // Default to GenericScheduler.

  return createGenericSchedLive(this);

}


/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by

/// the caller. We don't have a command line option to override the postRA

/// scheduler. The Target must configure it.

ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {

  // Get the postRA scheduler set by the target for this function.

  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);

  if (Scheduler)

    return Scheduler;


  // Default to GenericScheduler.

  return createGenericSchedPostRA(this);

}


/// Top-level MachineScheduler pass driver.

///

/// Visit blocks in function order. Divide each block into scheduling regions

/// and visit them bottom-up. Visiting regions bottom-up is not required, but is

/// consistent with the DAG builder, which traverses the interior of the

/// scheduling regions bottom-up.

///

/// This design avoids exposing scheduling boundaries to the DAG builder,

/// simplifying the DAG builder's support for "special" target instructions.

/// At the same time the design allows target schedulers to operate across

/// scheduling boundaries, for example to bundle the boundary instructions

/// without reordering them. This creates complexity, because the target

/// scheduler must update the RegionBegin and RegionEnd positions cached by

/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler

/// design would be to split blocks at scheduling boundaries, but LLVM has a

/// general bias against block splitting purely for implementation simplicity.

bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {

  if (skipFunction(mf.getFunction()))

    return false;


  if (EnableMachineSched.getNumOccurrences()) {

    if (!EnableMachineSched)

      return false;

  } else if (!mf.getSubtarget().enableMachineScheduler())

    return false;


  LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));


  // Initialize the context of the pass.

  MF = &mf;

  MLI = &getAnalysis<MachineLoopInfo>();

  MDT = &getAnalysis<MachineDominatorTree>();

  PassConfig = &getAnalysis<TargetPassConfig>();

  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();


  LIS = &getAnalysis<LiveIntervals>();


  if (VerifyScheduling) {

    LLVM_DEBUG(LIS->dump());

    MF->verify(this, "Before machine scheduling.");

  }

  RegClassInfo->runOnMachineFunction(*MF);


  // Instantiate the selected scheduler for this target, function, and

  // optimization level.

  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());

  ScheduleDAGMI::DumpDirection D;

  if (ForceTopDown)

    D = ScheduleDAGMI::DumpDirection::TopDown;

  else if (ForceBottomUp)

    D = ScheduleDAGMI::DumpDirection::BottomUp;

  else

    D = ScheduleDAGMI::DumpDirection::Bidirectional;

  Scheduler->setDumpDirection(D);

  scheduleRegions(*Scheduler, false);


  LLVM_DEBUG(LIS->dump());

  if (VerifyScheduling)

    MF->verify(this, "After machine scheduling.");

  return true;

}


bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {

  if (skipFunction(mf.getFunction()))

    return false;


  if (EnablePostRAMachineSched.getNumOccurrences()) {

    if (!EnablePostRAMachineSched)

      return false;

  } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {

    LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");

    return false;

  }

  LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));


  // Initialize the context of the pass.

  MF = &mf;

  MLI = &getAnalysis<MachineLoopInfo>();

  PassConfig = &getAnalysis<TargetPassConfig>();

  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();


  if (VerifyScheduling)

    MF->verify(this, "Before post machine scheduling.");


  // Instantiate the selected scheduler for this target, function, and

  // optimization level.

  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());

  ScheduleDAGMI::DumpDirection D;

  if (PostRADirection == MISchedPostRASched::TopDown)

    D = ScheduleDAGMI::DumpDirection::TopDown;

  else if (PostRADirection == MISchedPostRASched::BottomUp)

    D = ScheduleDAGMI::DumpDirection::BottomUp;

  else

    D = ScheduleDAGMI::DumpDirection::Bidirectional;

  Scheduler->setDumpDirection(D);

  scheduleRegions(*Scheduler, true);


  if (VerifyScheduling)

    MF->verify(this, "After post machine scheduling.");

  return true;

}


/// Return true of the given instruction should not be included in a scheduling

/// region.

///

/// MachineScheduler does not currently support scheduling across calls. To

/// handle calls, the DAG builder needs to be modified to create register

/// anti/output dependencies on the registers clobbered by the call's regmask

/// operand. In PreRA scheduling, the stack pointer adjustment already prevents

/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce

/// the boundary, but there would be no benefit to postRA scheduling across

/// calls this late anyway.

static bool isSchedBoundary(MachineBasicBlock::iterator MI,

                            MachineBasicBlock *MBB,

                            MachineFunction *MF,

                            const TargetInstrInfo *TII) {

  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);

}


/// A region of an MBB for scheduling.

namespace {

struct SchedRegion {

  /// RegionBegin is the first instruction in the scheduling region, and

  /// RegionEnd is either MBB->end() or the scheduling boundary after the

  /// last instruction in the scheduling region. These iterators cannot refer

  /// to instructions outside of the identified scheduling region because

  /// those may be reordered before scheduling this region.

  MachineBasicBlock::iterator RegionBegin;

  MachineBasicBlock::iterator RegionEnd;

  unsigned NumRegionInstrs;


  SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,

              unsigned N) :

    RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}

};

} // end anonymous namespace


using MBBRegionsVector = SmallVector<SchedRegion, 16>;


static void

getSchedRegions(MachineBasicBlock *MBB,

                MBBRegionsVector &Regions,

                bool RegionsTopDown) {

  MachineFunction *MF = MBB->getParent();

  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();


  MachineBasicBlock::iterator I = nullptr;

  for(MachineBasicBlock::iterator RegionEnd = MBB->end();

      RegionEnd != MBB->begin(); RegionEnd = I) {


    // Avoid decrementing RegionEnd for blocks with no terminator.

    if (RegionEnd != MBB->end() ||

        isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {

      --RegionEnd;

    }


    // The next region starts above the previous region. Look backward in the

    // instruction stream until we find the nearest boundary.

    unsigned NumRegionInstrs = 0;

    I = RegionEnd;

    for (;I != MBB->begin(); --I) {

      MachineInstr &MI = *std::prev(I);

      if (isSchedBoundary(&MI, &*MBB, MF, TII))

        break;

      if (!MI.isDebugOrPseudoInstr()) {

        // MBB::size() uses instr_iterator to count. Here we need a bundle to

        // count as a single instruction.

        ++NumRegionInstrs;

      }

    }


    // It's possible we found a scheduling region that only has debug

    // instructions. Don't bother scheduling these.

    if (NumRegionInstrs != 0)

      Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));

  }


  if (RegionsTopDown)

    std::reverse(Regions.begin(), Regions.end());

}


/// Main driver for both MachineScheduler and PostMachineScheduler.

void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,

                                           bool FixKillFlags) {

  // Visit all machine basic blocks.

  //

  // TODO: Visit blocks in global postorder or postorder within the bottom-up

  // loop tree. Then we can optionally compute global RegPressure.

  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();

       MBB != MBBEnd; ++MBB) {


    Scheduler.startBlock(&*MBB);


#ifndef NDEBUG

    if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())

      continue;

    if (SchedOnlyBlock.getNumOccurrences()

        && (int)SchedOnlyBlock != MBB->getNumber())

      continue;

#endif


    // Break the block into scheduling regions [I, RegionEnd). RegionEnd

    // points to the scheduling boundary at the bottom of the region. The DAG

    // does not include RegionEnd, but the region does (i.e. the next

    // RegionEnd is above the previous RegionBegin). If the current block has

    // no terminator then RegionEnd == MBB->end() for the bottom region.

    //

    // All the regions of MBB are first found and stored in MBBRegions, which

    // will be processed (MBB) top-down if initialized with true.

    //

    // The Scheduler may insert instructions during either schedule() or

    // exitRegion(), even for empty regions. So the local iterators 'I' and

    // 'RegionEnd' are invalid across these calls. Instructions must not be

    // added to other regions than the current one without updating MBBRegions.


    MBBRegionsVector MBBRegions;

    getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());

    for (const SchedRegion &R : MBBRegions) {

      MachineBasicBlock::iterator I = R.RegionBegin;

      MachineBasicBlock::iterator RegionEnd = R.RegionEnd;

      unsigned NumRegionInstrs = R.NumRegionInstrs;


      // Notify the scheduler of the region, even if we may skip scheduling

      // it. Perhaps it still needs to be bundled.

      Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);


      // Skip empty scheduling regions (0 or 1 schedulable instructions).

      if (I == RegionEnd || I == std::prev(RegionEnd)) {

        // Close the current region. Bundle the terminator if needed.

        // This invalidates 'RegionEnd' and 'I'.

        Scheduler.exitRegion();

        continue;

      }

      LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");

      LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)

                        << " " << MBB->getName() << "\n  From: " << *I

                        << "    To: ";

                 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;

                 else dbgs() << "End\n";

                 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');

      if (DumpCriticalPathLength) {

        errs() << MF->getName();

        errs() << ":%bb. " << MBB->getNumber();

        errs() << " " << MBB->getName() << " \n";

      }


      // Schedule a region: possibly reorder instructions.

      // This invalidates the original region iterators.

      Scheduler.schedule();


      // Close the current region.

      Scheduler.exitRegion();

    }

    Scheduler.finishBlock();

    // FIXME: Ideally, no further passes should rely on kill flags. However,

    // thumb2 size reduction is currently an exception, so the PostMIScheduler

    // needs to do this.

    if (FixKillFlags)

      Scheduler.fixupKills(*MBB);

  }

  Scheduler.finalizeSchedule();

}


void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {

  // unimplemented

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

LLVM_DUMP_METHOD void ReadyQueue::dump() const {

  dbgs() << "Queue " << Name << ": ";

  for (const SUnit *SU : Queue)

    dbgs() << SU->NodeNum << " ";

  dbgs() << "\n";

}

#endif


//===----------------------------------------------------------------------===//

// ScheduleDAGMI - Basic machine instruction scheduling. This is

// independent of PreRA/PostRA scheduling and involves no extra book-keeping for

// virtual registers.

// ===----------------------------------------------------------------------===/


// Provide a vtable anchor.

ScheduleDAGMI::~ScheduleDAGMI() = default;


/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When

/// NumPredsLeft reaches zero, release the successor node.

///

/// FIXME: Adjust SuccSU height based on MinLatency.

void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {

  SUnit *SuccSU = SuccEdge->getSUnit();


  if (SuccEdge->isWeak()) {

    --SuccSU->WeakPredsLeft;

    if (SuccEdge->isCluster())

      NextClusterSucc = SuccSU;

    return;

  }

#ifndef NDEBUG

  if (SuccSU->NumPredsLeft == 0) {

    dbgs() << "*** Scheduling failed! ***\n";

    dumpNode(*SuccSU);

    dbgs() << " has been released too many times!\n";

    llvm_unreachable(nullptr);

  }

#endif

  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,

  // CurrCycle may have advanced since then.

  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())

    SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();


  --SuccSU->NumPredsLeft;

  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)

    SchedImpl->releaseTopNode(SuccSU);

}


/// releaseSuccessors - Call releaseSucc on each of SU's successors.

void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {

  for (SDep &Succ : SU->Succs)

    releaseSucc(SU, &Succ);

}


/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When

/// NumSuccsLeft reaches zero, release the predecessor node.

///

/// FIXME: Adjust PredSU height based on MinLatency.

void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {

  SUnit *PredSU = PredEdge->getSUnit();


  if (PredEdge->isWeak()) {

    --PredSU->WeakSuccsLeft;

    if (PredEdge->isCluster())

      NextClusterPred = PredSU;

    return;

  }

#ifndef NDEBUG

  if (PredSU->NumSuccsLeft == 0) {

    dbgs() << "*** Scheduling failed! ***\n";

    dumpNode(*PredSU);

    dbgs() << " has been released too many times!\n";

    llvm_unreachable(nullptr);

  }

#endif

  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,

  // CurrCycle may have advanced since then.

  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())

    PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();


  --PredSU->NumSuccsLeft;

  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)

    SchedImpl->releaseBottomNode(PredSU);

}


/// releasePredecessors - Call releasePred on each of SU's predecessors.

void ScheduleDAGMI::releasePredecessors(SUnit *SU) {

  for (SDep &Pred : SU->Preds)

    releasePred(SU, &Pred);

}


void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {

  ScheduleDAGInstrs::startBlock(bb);

  SchedImpl->enterMBB(bb);

}


void ScheduleDAGMI::finishBlock() {

  SchedImpl->leaveMBB();

  ScheduleDAGInstrs::finishBlock();

}


/// enterRegion - Called back from PostMachineScheduler::runOnMachineFunction

/// after crossing a scheduling boundary. [begin, end) includes all instructions

/// in the region, including the boundary itself and single-instruction regions

/// that don't get scheduled.

void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,

                                     MachineBasicBlock::iterator begin,

                                     MachineBasicBlock::iterator end,

                                     unsigned regioninstrs)

{

  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);


  SchedImpl->initPolicy(begin, end, regioninstrs);

}


/// This is normally called from the main scheduler loop but may also be invoked

/// by the scheduling strategy to perform additional code motion.

void ScheduleDAGMI::moveInstruction(

  MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {

  // Advance RegionBegin if the first instruction moves down.

  if (&*RegionBegin == MI)

    ++RegionBegin;


  // Update the instruction stream.

  BB->splice(InsertPos, BB, MI);


  // Update LiveIntervals

  if (LIS)

    LIS->handleMove(*MI, /*UpdateFlags=*/true);


  // Recede RegionBegin if an instruction moves above the first.

  if (RegionBegin == InsertPos)

    RegionBegin = MI;

}


bool ScheduleDAGMI::checkSchedLimit() {

#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)

  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {

    CurrentTop = CurrentBottom;

    return false;

  }

  ++NumInstrsScheduled;

#endif

  return true;

}


/// Per-region scheduling driver, called back from

/// PostMachineScheduler::runOnMachineFunction. This is a simplified driver

/// that does not consider liveness or register pressure. It is useful for

/// PostRA scheduling and potentially other custom schedulers.

void ScheduleDAGMI::schedule() {

  LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");

  LLVM_DEBUG(SchedImpl->dumpPolicy());


  // Build the DAG.

  buildSchedGraph(AA);


  postProcessDAG();


  SmallVector<SUnit*, 8> TopRoots, BotRoots;

  findRootsAndBiasEdges(TopRoots, BotRoots);


  LLVM_DEBUG(dump());

  if (PrintDAGs) dump();

  if (ViewMISchedDAGs) viewGraph();


  // Initialize the strategy before modifying the DAG.

  // This may initialize a DFSResult to be used for queue priority.

  SchedImpl->initialize(this);


  // Initialize ready queues now that the DAG and priority data are finalized.

  initQueues(TopRoots, BotRoots);


  bool IsTopNode = false;

  while (true) {

    LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");

    SUnit *SU = SchedImpl->pickNode(IsTopNode);

    if (!SU) break;


    assert(!SU->isScheduled && "Node already scheduled");

    if (!checkSchedLimit())

      break;


    MachineInstr *MI = SU->getInstr();

    if (IsTopNode) {

      assert(SU->isTopReady() && "node still has unscheduled dependencies");

      if (&*CurrentTop == MI)

        CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);

      else

        moveInstruction(MI, CurrentTop);

    } else {

      assert(SU->isBottomReady() && "node still has unscheduled dependencies");

      MachineBasicBlock::iterator priorII =

        priorNonDebug(CurrentBottom, CurrentTop);

      if (&*priorII == MI)

        CurrentBottom = priorII;

      else {

        if (&*CurrentTop == MI)

          CurrentTop = nextIfDebug(++CurrentTop, priorII);

        moveInstruction(MI, CurrentBottom);

        CurrentBottom = MI;

      }

    }

    // Notify the scheduling strategy before updating the DAG.

    // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues

    // runs, it can then use the accurate ReadyCycle time to determine whether

    // newly released nodes can move to the readyQ.

    SchedImpl->schedNode(SU, IsTopNode);


    updateQueues(SU, IsTopNode);

  }

  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");


  placeDebugValues();


  LLVM_DEBUG({

    dbgs() << "*** Final schedule for "

           << printMBBReference(*begin()->getParent()) << " ***\n";

    dumpSchedule();

    dbgs() << '\n';

  });

}


/// Apply each ScheduleDAGMutation step in order.

void ScheduleDAGMI::postProcessDAG() {

  for (auto &m : Mutations)

    m->apply(this);

}


void ScheduleDAGMI::

findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,

                      SmallVectorImpl<SUnit*> &BotRoots) {

  for (SUnit &SU : SUnits) {

    assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");


    // Order predecessors so DFSResult follows the critical path.

    SU.biasCriticalPath();


    // A SUnit is ready to top schedule if it has no predecessors.

    if (!SU.NumPredsLeft)

      TopRoots.push_back(&SU);

    // A SUnit is ready to bottom schedule if it has no successors.

    if (!SU.NumSuccsLeft)

      BotRoots.push_back(&SU);

  }

  ExitSU.biasCriticalPath();

}


/// Identify DAG roots and setup scheduler queues.

void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,

                               ArrayRef<SUnit*> BotRoots) {

  NextClusterSucc = nullptr;

  NextClusterPred = nullptr;


  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.

  //

  // Nodes with unreleased weak edges can still be roots.

  // Release top roots in forward order.

  for (SUnit *SU : TopRoots)

    SchedImpl->releaseTopNode(SU);


  // Release bottom roots in reverse order so the higher priority nodes appear

  // first. This is more natural and slightly more efficient.

  for (SmallVectorImpl<SUnit*>::const_reverse_iterator

         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {

    SchedImpl->releaseBottomNode(*I);

  }


  releaseSuccessors(&EntrySU);

  releasePredecessors(&ExitSU);


  SchedImpl->registerRoots();


  // Advance past initial DebugValues.

  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);

  CurrentBottom = RegionEnd;

}


/// Update scheduler queues after scheduling an instruction.

void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {

  // Release dependent instructions for scheduling.

  if (IsTopNode)

    releaseSuccessors(SU);

  else

    releasePredecessors(SU);


  SU->isScheduled = true;

}


/// Reinsert any remaining debug_values, just like the PostRA scheduler.

void ScheduleDAGMI::placeDebugValues() {

  // If first instruction was a DBG_VALUE then put it back.

  if (FirstDbgValue) {

    BB->splice(RegionBegin, BB, FirstDbgValue);

    RegionBegin = FirstDbgValue;

  }


  for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator

         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {

    std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);

    MachineInstr *DbgValue = P.first;

    MachineBasicBlock::iterator OrigPrevMI = P.second;

    if (&*RegionBegin == DbgValue)

      ++RegionBegin;

    BB->splice(std::next(OrigPrevMI), BB, DbgValue);

    if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)

      RegionEnd = DbgValue;

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

static const char *scheduleTableLegend = "  i: issue\n  x: resource booked";


LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceTopDown() const {

  // Bail off when there is no schedule model to query.

  if (!SchedModel.hasInstrSchedModel())

    return;


  //  Nothing to show if there is no or just one instruction.

  if (BB->size() < 2)

    return;


  dbgs() << " * Schedule table (TopDown):\n";

  dbgs() << scheduleTableLegend << "\n";

  const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle;

  unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle;

  for (MachineInstr &MI : *this) {

    SUnit *SU = getSUnit(&MI);

    if (!SU)

      continue;

    const MCSchedClassDesc *SC = getSchedClass(SU);

    for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),

                                       PE = SchedModel.getWriteProcResEnd(SC);

         PI != PE; ++PI) {

      if (SU->TopReadyCycle + PI->ReleaseAtCycle - 1 > LastCycle)

        LastCycle = SU->TopReadyCycle + PI->ReleaseAtCycle - 1;

    }

  }

  // Print the header with the cycles

  dbgs() << llvm::left_justify("Cycle", HeaderColWidth);

  for (unsigned C = FirstCycle; C <= LastCycle; ++C)

    dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);

  dbgs() << "|\n";


  for (MachineInstr &MI : *this) {

    SUnit *SU = getSUnit(&MI);

    if (!SU) {

      dbgs() << "Missing SUnit\n";

      continue;

    }

    std::string NodeName("SU(");

    NodeName += std::to_string(SU->NodeNum) + ")";

    dbgs() << llvm::left_justify(NodeName, HeaderColWidth);

    unsigned C = FirstCycle;

    for (; C <= LastCycle; ++C) {

      if (C == SU->TopReadyCycle)

        dbgs() << llvm::left_justify("| i", ColWidth);

      else

        dbgs() << llvm::left_justify("|", ColWidth);

    }

    dbgs() << "|\n";

    const MCSchedClassDesc *SC = getSchedClass(SU);


    SmallVector<MCWriteProcResEntry, 4> ResourcesIt(

        make_range(SchedModel.getWriteProcResBegin(SC),

                   SchedModel.getWriteProcResEnd(SC)));


    if (MISchedSortResourcesInTrace)

      llvm::stable_sort(ResourcesIt,

                        [](const MCWriteProcResEntry &LHS,

                           const MCWriteProcResEntry &RHS) -> bool {

                          return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||

                                 (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&

                                  LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);

                        });

    for (const MCWriteProcResEntry &PI : ResourcesIt) {

      C = FirstCycle;

      const std::string ResName =

          SchedModel.getResourceName(PI.ProcResourceIdx);

      dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);

      for (; C < SU->TopReadyCycle + PI.AcquireAtCycle; ++C) {

        dbgs() << llvm::left_justify("|", ColWidth);

      }

      for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;

           ++I, ++C)

        dbgs() << llvm::left_justify("| x", ColWidth);

      while (C++ <= LastCycle)

        dbgs() << llvm::left_justify("|", ColWidth);

      // Place end char

      dbgs() << "| \n";

    }

  }

}


LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const {

  // Bail off when there is no schedule model to query.

  if (!SchedModel.hasInstrSchedModel())

    return;


  //  Nothing to show if there is no or just one instruction.

  if (BB->size() < 2)

    return;


  dbgs() << " * Schedule table (BottomUp):\n";

  dbgs() << scheduleTableLegend << "\n";


  const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle;

  int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle;

  for (MachineInstr &MI : *this) {

    SUnit *SU = getSUnit(&MI);

    if (!SU)

      continue;

    const MCSchedClassDesc *SC = getSchedClass(SU);

    for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),

                                       PE = SchedModel.getWriteProcResEnd(SC);

         PI != PE; ++PI) {

      if ((int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1 < LastCycle)

        LastCycle = (int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1;

    }

  }

  // Print the header with the cycles

  dbgs() << llvm::left_justify("Cycle", HeaderColWidth);

  for (int C = FirstCycle; C >= LastCycle; --C)

    dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);

  dbgs() << "|\n";


  for (MachineInstr &MI : *this) {

    SUnit *SU = getSUnit(&MI);

    if (!SU) {

      dbgs() << "Missing SUnit\n";

      continue;

    }

    std::string NodeName("SU(");

    NodeName += std::to_string(SU->NodeNum) + ")";

    dbgs() << llvm::left_justify(NodeName, HeaderColWidth);

    int C = FirstCycle;

    for (; C >= LastCycle; --C) {

      if (C == (int)SU->BotReadyCycle)

        dbgs() << llvm::left_justify("| i", ColWidth);

      else

        dbgs() << llvm::left_justify("|", ColWidth);

    }

    dbgs() << "|\n";

    const MCSchedClassDesc *SC = getSchedClass(SU);

    SmallVector<MCWriteProcResEntry, 4> ResourcesIt(

        make_range(SchedModel.getWriteProcResBegin(SC),

                   SchedModel.getWriteProcResEnd(SC)));


    if (MISchedSortResourcesInTrace)

      llvm::stable_sort(ResourcesIt,

                        [](const MCWriteProcResEntry &LHS,

                           const MCWriteProcResEntry &RHS) -> bool {

                          return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||

                                 (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&

                                  LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);

                        });

    for (const MCWriteProcResEntry &PI : ResourcesIt) {

      C = FirstCycle;

      const std::string ResName =

          SchedModel.getResourceName(PI.ProcResourceIdx);

      dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);

      for (; C > ((int)SU->BotReadyCycle - (int)PI.AcquireAtCycle); --C) {

        dbgs() << llvm::left_justify("|", ColWidth);

      }

      for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;

           ++I, --C)

        dbgs() << llvm::left_justify("| x", ColWidth);

      while (C-- >= LastCycle)

        dbgs() << llvm::left_justify("|", ColWidth);

      // Place end char

      dbgs() << "| \n";

    }

  }

}

#endif


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {

  if (MISchedDumpScheduleTrace) {

    if (DumpDir == DumpDirection::TopDown)

      dumpScheduleTraceTopDown();

    else if (DumpDir == DumpDirection::BottomUp)

      dumpScheduleTraceBottomUp();

    else if (DumpDir == DumpDirection::Bidirectional) {

      dbgs() << "* Schedule table (Bidirectional): not implemented\n";

    } else {

      dbgs() << "* Schedule table: DumpDirection not set.\n";

    }

  }


  for (MachineInstr &MI : *this) {

    if (SUnit *SU = getSUnit(&MI))

      dumpNode(*SU);

    else

      dbgs() << "Missing SUnit\n";

  }

}

#endif


//===----------------------------------------------------------------------===//

// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals

// preservation.

//===----------------------------------------------------------------------===//


ScheduleDAGMILive::~ScheduleDAGMILive() {

  delete DFSResult;

}


void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {

  const MachineInstr &MI = *SU.getInstr();

  for (const MachineOperand &MO : MI.operands()) {

    if (!MO.isReg())

      continue;

    if (!MO.readsReg())

      continue;

    if (TrackLaneMasks && !MO.isUse())

      continue;


    Register Reg = MO.getReg();

    if (!Reg.isVirtual())

      continue;


    // Ignore re-defs.

    if (TrackLaneMasks) {

      bool FoundDef = false;

      for (const MachineOperand &MO2 : MI.all_defs()) {

        if (MO2.getReg() == Reg && !MO2.isDead()) {

          FoundDef = true;

          break;

        }

      }

      if (FoundDef)

        continue;

    }


    // Record this local VReg use.

    VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);

    for (; UI != VRegUses.end(); ++UI) {

      if (UI->SU == &SU)

        break;

    }

    if (UI == VRegUses.end())

      VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));

  }

}


/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after

/// crossing a scheduling boundary. [begin, end) includes all instructions in

/// the region, including the boundary itself and single-instruction regions

/// that don't get scheduled.

void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,

                                MachineBasicBlock::iterator begin,

                                MachineBasicBlock::iterator end,

                                unsigned regioninstrs)

{

  // ScheduleDAGMI initializes SchedImpl's per-region policy.

  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);


  // For convenience remember the end of the liveness region.

  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);


  SUPressureDiffs.clear();


  ShouldTrackPressure = SchedImpl->shouldTrackPressure();

  ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();


  assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&

         "ShouldTrackLaneMasks requires ShouldTrackPressure");

}


// Setup the register pressure trackers for the top scheduled and bottom

// scheduled regions.

void ScheduleDAGMILive::initRegPressure() {

  VRegUses.clear();

  VRegUses.setUniverse(MRI.getNumVirtRegs());

  for (SUnit &SU : SUnits)

    collectVRegUses(SU);


  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,

                    ShouldTrackLaneMasks, false);

  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,

                    ShouldTrackLaneMasks, false);


  // Close the RPTracker to finalize live ins.

  RPTracker.closeRegion();


  LLVM_DEBUG(RPTracker.dump());


  // Initialize the live ins and live outs.

  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);

  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);


  // Close one end of the tracker so we can call

  // getMaxUpward/DownwardPressureDelta before advancing across any

  // instructions. This converts currently live regs into live ins/outs.

  TopRPTracker.closeTop();

  BotRPTracker.closeBottom();


  BotRPTracker.initLiveThru(RPTracker);

  if (!BotRPTracker.getLiveThru().empty()) {

    TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());

    LLVM_DEBUG(dbgs() << "Live Thru: ";

               dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));

  };


  // For each live out vreg reduce the pressure change associated with other

  // uses of the same vreg below the live-out reaching def.

  updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);


  // Account for liveness generated by the region boundary.

  if (LiveRegionEnd != RegionEnd) {

    SmallVector<RegisterMaskPair, 8> LiveUses;

    BotRPTracker.recede(&LiveUses);

    updatePressureDiffs(LiveUses);

  }


  LLVM_DEBUG(dbgs() << "Top Pressure:\n";

             dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);

             dbgs() << "Bottom Pressure:\n";

             dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););


  assert((BotRPTracker.getPos() == RegionEnd ||

          (RegionEnd->isDebugInstr() &&

           BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&

         "Can't find the region bottom");


  // Cache the list of excess pressure sets in this region. This will also track

  // the max pressure in the scheduled code for these sets.

  RegionCriticalPSets.clear();

  const std::vector<unsigned> &RegionPressure =

    RPTracker.getPressure().MaxSetPressure;

  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {

    unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);

    if (RegionPressure[i] > Limit) {

      LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit

                        << " Actual " << RegionPressure[i] << "\n");

      RegionCriticalPSets.push_back(PressureChange(i));

    }

  }

  LLVM_DEBUG(dbgs() << "Excess PSets: ";

             for (const PressureChange &RCPS

                  : RegionCriticalPSets) dbgs()

             << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";

             dbgs() << "\n");

}


void ScheduleDAGMILive::

updateScheduledPressure(const SUnit *SU,

                        const std::vector<unsigned> &NewMaxPressure) {

  const PressureDiff &PDiff = getPressureDiff(SU);

  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();

  for (const PressureChange &PC : PDiff) {

    if (!PC.isValid())

      break;

    unsigned ID = PC.getPSet();

    while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)

      ++CritIdx;

    if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {

      if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()

          && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())

        RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);

    }

    unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);

    if (NewMaxPressure[ID] >= Limit - 2) {

      LLVM_DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "

                        << NewMaxPressure[ID]

                        << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")

                        << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]

                        << " livethru)\n");

    }

  }

}


/// Update the PressureDiff array for liveness after scheduling this

/// instruction.

void ScheduleDAGMILive::updatePressureDiffs(

    ArrayRef<RegisterMaskPair> LiveUses) {

  for (const RegisterMaskPair &P : LiveUses) {

    Register Reg = P.RegUnit;

    /// FIXME: Currently assuming single-use physregs.

    if (!Reg.isVirtual())

      continue;


    if (ShouldTrackLaneMasks) {

      // If the register has just become live then other uses won't change

      // this fact anymore => decrement pressure.

      // If the register has just become dead then other uses make it come

      // back to life => increment pressure.

      bool Decrement = P.LaneMask.any();


      for (const VReg2SUnit &V2SU

           : make_range(VRegUses.find(Reg), VRegUses.end())) {

        SUnit &SU = *V2SU.SU;

        if (SU.isScheduled || &SU == &ExitSU)

          continue;


        PressureDiff &PDiff = getPressureDiff(&SU);

        PDiff.addPressureChange(Reg, Decrement, &MRI);

        LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU.NodeNum << ") "

                          << printReg(Reg, TRI) << ':'

                          << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();

                   dbgs() << "              to "; PDiff.dump(*TRI););

      }

    } else {

      assert(P.LaneMask.any());

      LLVM_DEBUG(dbgs() << "  LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");

      // This may be called before CurrentBottom has been initialized. However,

      // BotRPTracker must have a valid position. We want the value live into the

      // instruction or live out of the block, so ask for the previous

      // instruction's live-out.

      const LiveInterval &LI = LIS->getInterval(Reg);

      VNInfo *VNI;

      MachineBasicBlock::const_iterator I =

        nextIfDebug(BotRPTracker.getPos(), BB->end());

      if (I == BB->end())

        VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));

      else {

        LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));

        VNI = LRQ.valueIn();

      }

      // RegisterPressureTracker guarantees that readsReg is true for LiveUses.

      assert(VNI && "No live value at use.");

      for (const VReg2SUnit &V2SU

           : make_range(VRegUses.find(Reg), VRegUses.end())) {

        SUnit *SU = V2SU.SU;

        // If this use comes before the reaching def, it cannot be a last use,

        // so decrease its pressure change.

        if (!SU->isScheduled && SU != &ExitSU) {

          LiveQueryResult LRQ =

              LI.Query(LIS->getInstructionIndex(*SU->getInstr()));

          if (LRQ.valueIn() == VNI) {

            PressureDiff &PDiff = getPressureDiff(SU);

            PDiff.addPressureChange(Reg, true, &MRI);

            LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "

                              << *SU->getInstr();

                       dbgs() << "              to "; PDiff.dump(*TRI););

          }

        }

      }

    }

  }

}


void ScheduleDAGMILive::dump() const {

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

  if (EntrySU.getInstr() != nullptr)

    dumpNodeAll(EntrySU);

  for (const SUnit &SU : SUnits) {

    dumpNodeAll(SU);

    if (ShouldTrackPressure) {

      dbgs() << "  Pressure Diff      : ";

      getPressureDiff(&SU).dump(*TRI);

    }

    dbgs() << "  Single Issue       : ";

    if (SchedModel.mustBeginGroup(SU.getInstr()) &&

        SchedModel.mustEndGroup(SU.getInstr()))

      dbgs() << "true;";

    else

      dbgs() << "false;";

    dbgs() << '\n';

  }

  if (ExitSU.getInstr() != nullptr)

    dumpNodeAll(ExitSU);

#endif

}


/// schedule - Called back from MachineScheduler::runOnMachineFunction

/// after setting up the current scheduling region. [RegionBegin, RegionEnd)

/// only includes instructions that have DAG nodes, not scheduling boundaries.

///

/// This is a skeletal driver, with all the functionality pushed into helpers,

/// so that it can be easily extended by experimental schedulers. Generally,

/// implementing MachineSchedStrategy should be sufficient to implement a new

/// scheduling algorithm. However, if a scheduler further subclasses

/// ScheduleDAGMILive then it will want to override this virtual method in order

/// to update any specialized state.

void ScheduleDAGMILive::schedule() {

  LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");

  LLVM_DEBUG(SchedImpl->dumpPolicy());

  buildDAGWithRegPressure();


  postProcessDAG();


  SmallVector<SUnit*, 8> TopRoots, BotRoots;

  findRootsAndBiasEdges(TopRoots, BotRoots);


  // Initialize the strategy before modifying the DAG.

  // This may initialize a DFSResult to be used for queue priority.

  SchedImpl->initialize(this);


  LLVM_DEBUG(dump());

  if (PrintDAGs) dump();

  if (ViewMISchedDAGs) viewGraph();


  // Initialize ready queues now that the DAG and priority data are finalized.

  initQueues(TopRoots, BotRoots);


  bool IsTopNode = false;

  while (true) {

    LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");

    SUnit *SU = SchedImpl->pickNode(IsTopNode);

    if (!SU) break;


    assert(!SU->isScheduled && "Node already scheduled");

    if (!checkSchedLimit())

      break;


    scheduleMI(SU, IsTopNode);


    if (DFSResult) {

      unsigned SubtreeID = DFSResult->getSubtreeID(SU);

      if (!ScheduledTrees.test(SubtreeID)) {

        ScheduledTrees.set(SubtreeID);

        DFSResult->scheduleTree(SubtreeID);

        SchedImpl->scheduleTree(SubtreeID);

      }

    }


    // Notify the scheduling strategy after updating the DAG.

    SchedImpl->schedNode(SU, IsTopNode);


    updateQueues(SU, IsTopNode);

  }

  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");


  placeDebugValues();


  LLVM_DEBUG({

    dbgs() << "*** Final schedule for "

           << printMBBReference(*begin()->getParent()) << " ***\n";

    dumpSchedule();

    dbgs() << '\n';

  });

}


/// Build the DAG and setup three register pressure trackers.

void ScheduleDAGMILive::buildDAGWithRegPressure() {

  if (!ShouldTrackPressure) {

    RPTracker.reset();

    RegionCriticalPSets.clear();

    buildSchedGraph(AA);

    return;

  }


  // Initialize the register pressure tracker used by buildSchedGraph.

  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,

                 ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);


  // Account for liveness generate by the region boundary.

  if (LiveRegionEnd != RegionEnd)

    RPTracker.recede();


  // Build the DAG, and compute current register pressure.

  buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);


  // Initialize top/bottom trackers after computing region pressure.

  initRegPressure();

}


void ScheduleDAGMILive::computeDFSResult() {

  if (!DFSResult)

    DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);

  DFSResult->clear();

  ScheduledTrees.clear();

  DFSResult->resize(SUnits.size());

  DFSResult->compute(SUnits);

  ScheduledTrees.resize(DFSResult->getNumSubtrees());

}


/// Compute the max cyclic critical path through the DAG. The scheduling DAG

/// only provides the critical path for single block loops. To handle loops that

/// span blocks, we could use the vreg path latencies provided by

/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently

/// available for use in the scheduler.

///

/// The cyclic path estimation identifies a def-use pair that crosses the back

/// edge and considers the depth and height of the nodes. For example, consider

/// the following instruction sequence where each instruction has unit latency

/// and defines an eponymous virtual register:

///

/// a->b(a,c)->c(b)->d(c)->exit

///

/// The cyclic critical path is a two cycles: b->c->b

/// The acyclic critical path is four cycles: a->b->c->d->exit

/// LiveOutHeight = height(c) = len(c->d->exit) = 2

/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3

/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4

/// LiveInDepth = depth(b) = len(a->b) = 1

///

/// LiveOutDepth - LiveInDepth = 3 - 1 = 2

/// LiveInHeight - LiveOutHeight = 4 - 2 = 2

/// CyclicCriticalPath = min(2, 2) = 2

///

/// This could be relevant to PostRA scheduling, but is currently implemented

/// assuming LiveIntervals.

unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {

  // This only applies to single block loop.

  if (!BB->isSuccessor(BB))

    return 0;


  unsigned MaxCyclicLatency = 0;

  // Visit each live out vreg def to find def/use pairs that cross iterations.

  for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {

    Register Reg = P.RegUnit;

    if (!Reg.isVirtual())

      continue;

    const LiveInterval &LI = LIS->getInterval(Reg);

    const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));

    if (!DefVNI)

      continue;


    MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);

    const SUnit *DefSU = getSUnit(DefMI);

    if (!DefSU)

      continue;


    unsigned LiveOutHeight = DefSU->getHeight();

    unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;

    // Visit all local users of the vreg def.

    for (const VReg2SUnit &V2SU

         : make_range(VRegUses.find(Reg), VRegUses.end())) {

      SUnit *SU = V2SU.SU;

      if (SU == &ExitSU)

        continue;


      // Only consider uses of the phi.

      LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));

      if (!LRQ.valueIn()->isPHIDef())

        continue;


      // Assume that a path spanning two iterations is a cycle, which could

      // overestimate in strange cases. This allows cyclic latency to be

      // estimated as the minimum slack of the vreg's depth or height.

      unsigned CyclicLatency = 0;

      if (LiveOutDepth > SU->getDepth())

        CyclicLatency = LiveOutDepth - SU->getDepth();


      unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;

      if (LiveInHeight > LiveOutHeight) {

        if (LiveInHeight - LiveOutHeight < CyclicLatency)

          CyclicLatency = LiveInHeight - LiveOutHeight;

      } else

        CyclicLatency = 0;


      LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("

                        << SU->NodeNum << ") = " << CyclicLatency << "c\n");

      if (CyclicLatency > MaxCyclicLatency)

        MaxCyclicLatency = CyclicLatency;

    }

  }

  LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");

  return MaxCyclicLatency;

}


/// Release ExitSU predecessors and setup scheduler queues. Re-position

/// the Top RP tracker in case the region beginning has changed.

void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,

                                   ArrayRef<SUnit*> BotRoots) {

  ScheduleDAGMI::initQueues(TopRoots, BotRoots);

  if (ShouldTrackPressure) {

    assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");

    TopRPTracker.setPos(CurrentTop);

  }

}


/// Move an instruction and update register pressure.

void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {

  // Move the instruction to its new location in the instruction stream.

  MachineInstr *MI = SU->getInstr();


  if (IsTopNode) {

    assert(SU->isTopReady() && "node still has unscheduled dependencies");

    if (&*CurrentTop == MI)

      CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);

    else {

      moveInstruction(MI, CurrentTop);

      TopRPTracker.setPos(MI);

    }


    if (ShouldTrackPressure) {

      // Update top scheduled pressure.

      RegisterOperands RegOpers;

      RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);

      if (ShouldTrackLaneMasks) {

        // Adjust liveness and add missing dead+read-undef flags.

        SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();

        RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);

      } else {

        // Adjust for missing dead-def flags.

        RegOpers.detectDeadDefs(*MI, *LIS);

      }


      TopRPTracker.advance(RegOpers);

      assert(TopRPTracker.getPos() == CurrentTop && "out of sync");

      LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(

                     TopRPTracker.getRegSetPressureAtPos(), TRI););


      updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);

    }

  } else {

    assert(SU->isBottomReady() && "node still has unscheduled dependencies");

    MachineBasicBlock::iterator priorII =

      priorNonDebug(CurrentBottom, CurrentTop);

    if (&*priorII == MI)

      CurrentBottom = priorII;

    else {

      if (&*CurrentTop == MI) {

        CurrentTop = nextIfDebug(++CurrentTop, priorII);

        TopRPTracker.setPos(CurrentTop);

      }

      moveInstruction(MI, CurrentBottom);

      CurrentBottom = MI;

      BotRPTracker.setPos(CurrentBottom);

    }

    if (ShouldTrackPressure) {

      RegisterOperands RegOpers;

      RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);

      if (ShouldTrackLaneMasks) {

        // Adjust liveness and add missing dead+read-undef flags.

        SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();

        RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);

      } else {

        // Adjust for missing dead-def flags.

        RegOpers.detectDeadDefs(*MI, *LIS);

      }


      if (BotRPTracker.getPos() != CurrentBottom)

        BotRPTracker.recedeSkipDebugValues();

      SmallVector<RegisterMaskPair, 8> LiveUses;

      BotRPTracker.recede(RegOpers, &LiveUses);

      assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");

      LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(

                     BotRPTracker.getRegSetPressureAtPos(), TRI););


      updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);

      updatePressureDiffs(LiveUses);

    }

  }

}


//===----------------------------------------------------------------------===//

// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.

//===----------------------------------------------------------------------===//


namespace {


/// Post-process the DAG to create cluster edges between neighboring

/// loads or between neighboring stores.

class BaseMemOpClusterMutation : public ScheduleDAGMutation {

  struct MemOpInfo {

    SUnit *SU;

    SmallVector<const MachineOperand *, 4> BaseOps;

    int64_t Offset;

    LocationSize Width;

    bool OffsetIsScalable;


    MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,

              int64_t Offset, bool OffsetIsScalable, LocationSize Width)

        : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),

          Width(Width), OffsetIsScalable(OffsetIsScalable) {}


    static bool Compare(const MachineOperand *const &A,

                        const MachineOperand *const &B) {

      if (A->getType() != B->getType())

        return A->getType() < B->getType();

      if (A->isReg())

        return A->getReg() < B->getReg();

      if (A->isFI()) {

        const MachineFunction &MF = *A->getParent()->getParent()->getParent();

        const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();

        bool StackGrowsDown = TFI.getStackGrowthDirection() ==

                              TargetFrameLowering::StackGrowsDown;

        return StackGrowsDown ? A->getIndex() > B->getIndex()

                              : A->getIndex() < B->getIndex();

      }


      llvm_unreachable("MemOpClusterMutation only supports register or frame "

                       "index bases.");

    }


    bool operator<(const MemOpInfo &RHS) const {

      // FIXME: Don't compare everything twice. Maybe use C++20 three way

      // comparison instead when it's available.

      if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),

                                       RHS.BaseOps.begin(), RHS.BaseOps.end(),

                                       Compare))

        return true;

      if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),

                                       BaseOps.begin(), BaseOps.end(), Compare))

        return false;

      if (Offset != RHS.Offset)

        return Offset < RHS.Offset;

      return SU->NodeNum < RHS.SU->NodeNum;

    }

  };


  const TargetInstrInfo *TII;

  const TargetRegisterInfo *TRI;

  bool IsLoad;

  bool ReorderWhileClustering;


public:

  BaseMemOpClusterMutation(const TargetInstrInfo *tii,

                           const TargetRegisterInfo *tri, bool IsLoad,

                           bool ReorderWhileClustering)

      : TII(tii), TRI(tri), IsLoad(IsLoad),

        ReorderWhileClustering(ReorderWhileClustering) {}


  void apply(ScheduleDAGInstrs *DAGInstrs) override;


protected:

  void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,

                                ScheduleDAGInstrs *DAG);

  void collectMemOpRecords(std::vector<SUnit> &SUnits,

                           SmallVectorImpl<MemOpInfo> &MemOpRecords);

  bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,

                   DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups);

};


class StoreClusterMutation : public BaseMemOpClusterMutation {

public:

  StoreClusterMutation(const TargetInstrInfo *tii,

                       const TargetRegisterInfo *tri,

                       bool ReorderWhileClustering)

      : BaseMemOpClusterMutation(tii, tri, false, ReorderWhileClustering) {}

};


class LoadClusterMutation : public BaseMemOpClusterMutation {

public:

  LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri,

                      bool ReorderWhileClustering)

      : BaseMemOpClusterMutation(tii, tri, true, ReorderWhileClustering) {}

};


} // end anonymous namespace


namespace llvm {


std::unique_ptr<ScheduleDAGMutation>

createLoadClusterDAGMutation(const TargetInstrInfo *TII,

                             const TargetRegisterInfo *TRI,

                             bool ReorderWhileClustering) {

  return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(

                                  TII, TRI, ReorderWhileClustering)

                            : nullptr;

}


std::unique_ptr<ScheduleDAGMutation>

createStoreClusterDAGMutation(const TargetInstrInfo *TII,

                              const TargetRegisterInfo *TRI,

                              bool ReorderWhileClustering) {

  return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(

                                  TII, TRI, ReorderWhileClustering)

                            : nullptr;

}


} // end namespace llvm


// Sorting all the loads/stores first, then for each load/store, checking the

// following load/store one by one, until reach the first non-dependent one and

// call target hook to see if they can cluster.

// If FastCluster is enabled, we assume that, all the loads/stores have been

// preprocessed and now, they didn't have dependencies on each other.

void BaseMemOpClusterMutation::clusterNeighboringMemOps(

    ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,

    ScheduleDAGInstrs *DAG) {

  // Keep track of the current cluster length and bytes for each SUnit.

  DenseMap<unsigned, std::pair<unsigned, unsigned>> SUnit2ClusterInfo;


  // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to

  // cluster mem ops collected within `MemOpRecords` array.

  for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {

    // Decision to cluster mem ops is taken based on target dependent logic

    auto MemOpa = MemOpRecords[Idx];


    // Seek for the next load/store to do the cluster.

    unsigned NextIdx = Idx + 1;

    for (; NextIdx < End; ++NextIdx)

      // Skip if MemOpb has been clustered already or has dependency with

      // MemOpa.

      if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&

          (FastCluster ||

           (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&

            !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))

        break;

    if (NextIdx == End)

      continue;


    auto MemOpb = MemOpRecords[NextIdx];

    unsigned ClusterLength = 2;

    unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() +

                                   MemOpb.Width.getValue().getKnownMinValue();

    if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {

      ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;

      CurrentClusterBytes = SUnit2ClusterInfo[MemOpa.SU->NodeNum].second +

                            MemOpb.Width.getValue().getKnownMinValue();

    }


    if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset,

                                  MemOpa.OffsetIsScalable, MemOpb.BaseOps,

                                  MemOpb.Offset, MemOpb.OffsetIsScalable,

                                  ClusterLength, CurrentClusterBytes))

      continue;


    SUnit *SUa = MemOpa.SU;

    SUnit *SUb = MemOpb.SU;

    if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)

      std::swap(SUa, SUb);


    // FIXME: Is this check really required?

    if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))

      continue;


    LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("

                      << SUb->NodeNum << ")\n");

    ++NumClustered;


    if (IsLoad) {

      // Copy successor edges from SUa to SUb. Interleaving computation

      // dependent on SUa can prevent load combining due to register reuse.

      // Predecessor edges do not need to be copied from SUb to SUa since

      // nearby loads should have effectively the same inputs.

      for (const SDep &Succ : SUa->Succs) {

        if (Succ.getSUnit() == SUb)

          continue;

        LLVM_DEBUG(dbgs() << "  Copy Succ SU(" << Succ.getSUnit()->NodeNum

                          << ")\n");

        DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));

      }

    } else {

      // Copy predecessor edges from SUb to SUa to avoid the SUnits that

      // SUb dependent on scheduled in-between SUb and SUa. Successor edges

      // do not need to be copied from SUa to SUb since no one will depend

      // on stores.

      // Notice that, we don't need to care about the memory dependency as

      // we won't try to cluster them if they have any memory dependency.

      for (const SDep &Pred : SUb->Preds) {

        if (Pred.getSUnit() == SUa)

          continue;

        LLVM_DEBUG(dbgs() << "  Copy Pred SU(" << Pred.getSUnit()->NodeNum

                          << ")\n");

        DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));

      }

    }


    SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,

                                             CurrentClusterBytes};


    LLVM_DEBUG(dbgs() << "  Curr cluster length: " << ClusterLength

                      << ", Curr cluster bytes: " << CurrentClusterBytes

                      << "\n");

  }

}


void BaseMemOpClusterMutation::collectMemOpRecords(

    std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {

  for (auto &SU : SUnits) {

    if ((IsLoad && !SU.getInstr()->mayLoad()) ||

        (!IsLoad && !SU.getInstr()->mayStore()))

      continue;


    const MachineInstr &MI = *SU.getInstr();

    SmallVector<const MachineOperand *, 4> BaseOps;

    int64_t Offset;

    bool OffsetIsScalable;

    LocationSize Width = 0;

    if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,

                                           OffsetIsScalable, Width, TRI)) {

      MemOpRecords.push_back(

          MemOpInfo(&SU, BaseOps, Offset, OffsetIsScalable, Width));


      LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "

                        << Offset << ", OffsetIsScalable: " << OffsetIsScalable

                        << ", Width: " << Width << "\n");

    }

#ifndef NDEBUG

    for (const auto *Op : BaseOps)

      assert(Op);

#endif

  }

}


bool BaseMemOpClusterMutation::groupMemOps(

    ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,

    DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups) {

  bool FastCluster =

      ForceFastCluster ||

      MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;


  for (const auto &MemOp : MemOps) {

    unsigned ChainPredID = DAG->SUnits.size();

    if (FastCluster) {

      for (const SDep &Pred : MemOp.SU->Preds) {

        // We only want to cluster the mem ops that have the same ctrl(non-data)

        // pred so that they didn't have ctrl dependency for each other. But for

        // store instrs, we can still cluster them if the pred is load instr.

        if ((Pred.isCtrl() &&

             (IsLoad ||

              (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&

            !Pred.isArtificial()) {

          ChainPredID = Pred.getSUnit()->NodeNum;

          break;

        }

      }

    } else

      ChainPredID = 0;


    Groups[ChainPredID].push_back(MemOp);

  }

  return FastCluster;

}


/// Callback from DAG postProcessing to create cluster edges for loads/stores.

void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {

  // Collect all the clusterable loads/stores

  SmallVector<MemOpInfo, 32> MemOpRecords;

  collectMemOpRecords(DAG->SUnits, MemOpRecords);


  if (MemOpRecords.size() < 2)

    return;


  // Put the loads/stores without dependency into the same group with some

  // heuristic if the DAG is too complex to avoid compiling time blow up.

  // Notice that, some fusion pair could be lost with this.

  DenseMap<unsigned, SmallVector<MemOpInfo, 32>> Groups;

  bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);


  for (auto &Group : Groups) {

    // Sorting the loads/stores, so that, we can stop the cluster as early as

    // possible.

    llvm::sort(Group.second);


    // Trying to cluster all the neighboring loads/stores.

    clusterNeighboringMemOps(Group.second, FastCluster, DAG);

  }

}


//===----------------------------------------------------------------------===//

// CopyConstrain - DAG post-processing to encourage copy elimination.

//===----------------------------------------------------------------------===//


namespace {


/// Post-process the DAG to create weak edges from all uses of a copy to

/// the one use that defines the copy's source vreg, most likely an induction

/// variable increment.

class CopyConstrain : public ScheduleDAGMutation {

  // Transient state.

  SlotIndex RegionBeginIdx;


  // RegionEndIdx is the slot index of the last non-debug instruction in the

  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.

  SlotIndex RegionEndIdx;


public:

  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}


  void apply(ScheduleDAGInstrs *DAGInstrs) override;


protected:

  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);

};


} // end anonymous namespace


namespace llvm {


std::unique_ptr<ScheduleDAGMutation>

createCopyConstrainDAGMutation(const TargetInstrInfo *TII,

                               const TargetRegisterInfo *TRI) {

  return std::make_unique<CopyConstrain>(TII, TRI);

}


} // end namespace llvm


/// constrainLocalCopy handles two possibilities:

/// 1) Local src:

/// I0:     = dst

/// I1: src = ...

/// I2:     = dst

/// I3: dst = src (copy)

/// (create pred->succ edges I0->I1, I2->I1)

///

/// 2) Local copy:

/// I0: dst = src (copy)

/// I1:     = dst

/// I2: src = ...

/// I3:     = dst

/// (create pred->succ edges I1->I2, I3->I2)

///

/// Although the MachineScheduler is currently constrained to single blocks,

/// this algorithm should handle extended blocks. An EBB is a set of

/// contiguously numbered blocks such that the previous block in the EBB is

/// always the single predecessor.

void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {

  LiveIntervals *LIS = DAG->getLIS();

  MachineInstr *Copy = CopySU->getInstr();


  // Check for pure vreg copies.

  const MachineOperand &SrcOp = Copy->getOperand(1);

  Register SrcReg = SrcOp.getReg();

  if (!SrcReg.isVirtual() || !SrcOp.readsReg())

    return;


  const MachineOperand &DstOp = Copy->getOperand(0);

  Register DstReg = DstOp.getReg();

  if (!DstReg.isVirtual() || DstOp.isDead())

    return;


  // Check if either the dest or source is local. If it's live across a back

  // edge, it's not local. Note that if both vregs are live across the back

  // edge, we cannot successfully contrain the copy without cyclic scheduling.

  // If both the copy's source and dest are local live intervals, then we

  // should treat the dest as the global for the purpose of adding

  // constraints. This adds edges from source's other uses to the copy.

  unsigned LocalReg = SrcReg;

  unsigned GlobalReg = DstReg;

  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);

  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {

    LocalReg = DstReg;

    GlobalReg = SrcReg;

    LocalLI = &LIS->getInterval(LocalReg);

    if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))

      return;

  }

  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);


  // Find the global segment after the start of the local LI.

  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());

  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a

  // local live range. We could create edges from other global uses to the local

  // start, but the coalescer should have already eliminated these cases, so

  // don't bother dealing with it.

  if (GlobalSegment == GlobalLI->end())

    return;


  // If GlobalSegment is killed at the LocalLI->start, the call to find()

  // returned the next global segment. But if GlobalSegment overlaps with

  // LocalLI->start, then advance to the next segment. If a hole in GlobalLI

  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.

  if (GlobalSegment->contains(LocalLI->beginIndex()))

    ++GlobalSegment;


  if (GlobalSegment == GlobalLI->end())

    return;


  // Check if GlobalLI contains a hole in the vicinity of LocalLI.

  if (GlobalSegment != GlobalLI->begin()) {

    // Two address defs have no hole.

    if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,

                               GlobalSegment->start)) {

      return;

    }

    // If the prior global segment may be defined by the same two-address

    // instruction that also defines LocalLI, then can't make a hole here.

    if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,

                               LocalLI->beginIndex())) {

      return;

    }

    // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise

    // it would be a disconnected component in the live range.

    assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&

           "Disconnected LRG within the scheduling region.");

  }

  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);

  if (!GlobalDef)

    return;


  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);

  if (!GlobalSU)

    return;


  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by

  // constraining the uses of the last local def to precede GlobalDef.

  SmallVector<SUnit*,8> LocalUses;

  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());

  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);

  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);

  for (const SDep &Succ : LastLocalSU->Succs) {

    if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)

      continue;

    if (Succ.getSUnit() == GlobalSU)

      continue;

    if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))

      return;

    LocalUses.push_back(Succ.getSUnit());

  }

  // Open the top of the GlobalLI hole by constraining any earlier global uses

  // to precede the start of LocalLI.

  SmallVector<SUnit*,8> GlobalUses;

  MachineInstr *FirstLocalDef =

    LIS->getInstructionFromIndex(LocalLI->beginIndex());

  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);

  for (const SDep &Pred : GlobalSU->Preds) {

    if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)

      continue;

    if (Pred.getSUnit() == FirstLocalSU)

      continue;

    if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))

      return;

    GlobalUses.push_back(Pred.getSUnit());

  }

  LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");

  // Add the weak edges.

  for (SUnit *LU : LocalUses) {

    LLVM_DEBUG(dbgs() << "  Local use SU(" << LU->NodeNum << ") -> SU("

                      << GlobalSU->NodeNum << ")\n");

    DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));

  }

  for (SUnit *GU : GlobalUses) {

    LLVM_DEBUG(dbgs() << "  Global use SU(" << GU->NodeNum << ") -> SU("

                      << FirstLocalSU->NodeNum << ")\n");

    DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));

  }

}


/// Callback from DAG postProcessing to create weak edges to encourage

/// copy elimination.

void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {

  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);

  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");


  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());

  if (FirstPos == DAG->end())

    return;

  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);

  RegionEndIdx = DAG->getLIS()->getInstructionIndex(

      *priorNonDebug(DAG->end(), DAG->begin()));


  for (SUnit &SU : DAG->SUnits) {

    if (!SU.getInstr()->isCopy())

      continue;


    constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));

  }

}


//===----------------------------------------------------------------------===//

// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler

// and possibly other custom schedulers.

//===----------------------------------------------------------------------===//


static const unsigned InvalidCycle = ~0U;


SchedBoundary::~SchedBoundary() { delete HazardRec; }


/// Given a Count of resource usage and a Latency value, return true if a

/// SchedBoundary becomes resource limited.

/// If we are checking after scheduling a node, we should return true when

/// we just reach the resource limit.

static bool checkResourceLimit(unsigned LFactor, unsigned Count,

                               unsigned Latency, bool AfterSchedNode) {

  int ResCntFactor = (int)(Count - (Latency * LFactor));

  if (AfterSchedNode)

    return ResCntFactor >= (int)LFactor;

  else

    return ResCntFactor > (int)LFactor;

}


void SchedBoundary::reset() {

  // A new HazardRec is created for each DAG and owned by SchedBoundary.

  // Destroying and reconstructing it is very expensive though. So keep

  // invalid, placeholder HazardRecs.

  if (HazardRec && HazardRec->isEnabled()) {

    delete HazardRec;

    HazardRec = nullptr;

  }

  Available.clear();

  Pending.clear();

  CheckPending = false;

  CurrCycle = 0;

  CurrMOps = 0;

  MinReadyCycle = std::numeric_limits<unsigned>::max();

  ExpectedLatency = 0;

  DependentLatency = 0;

  RetiredMOps = 0;

  MaxExecutedResCount = 0;

  ZoneCritResIdx = 0;

  IsResourceLimited = false;

  ReservedCycles.clear();

  ReservedResourceSegments.clear();

  ReservedCyclesIndex.clear();

  ResourceGroupSubUnitMasks.clear();

#if LLVM_ENABLE_ABI_BREAKING_CHECKS

  // Track the maximum number of stall cycles that could arise either from the

  // latency of a DAG edge or the number of cycles that a processor resource is

  // reserved (SchedBoundary::ReservedCycles).

  MaxObservedStall = 0;

#endif

  // Reserve a zero-count for invalid CritResIdx.

  ExecutedResCounts.resize(1);

  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");

}


void SchedRemainder::

init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {

  reset();

  if (!SchedModel->hasInstrSchedModel())

    return;

  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());

  for (SUnit &SU : DAG->SUnits) {

    const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);

    RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)

      * SchedModel->getMicroOpFactor();

    for (TargetSchedModel::ProcResIter

           PI = SchedModel->getWriteProcResBegin(SC),

           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {

      unsigned PIdx = PI->ProcResourceIdx;

      unsigned Factor = SchedModel->getResourceFactor(PIdx);

      assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);

      RemainingCounts[PIdx] +=

          (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));

    }

  }

}


void SchedBoundary::

init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {

  reset();

  DAG = dag;

  SchedModel = smodel;

  Rem = rem;

  if (SchedModel->hasInstrSchedModel()) {

    unsigned ResourceCount = SchedModel->getNumProcResourceKinds();

    ReservedCyclesIndex.resize(ResourceCount);

    ExecutedResCounts.resize(ResourceCount);

    ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));

    unsigned NumUnits = 0;


    for (unsigned i = 0; i < ResourceCount; ++i) {

      ReservedCyclesIndex[i] = NumUnits;

      NumUnits += SchedModel->getProcResource(i)->NumUnits;

      if (isUnbufferedGroup(i)) {

        auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;

        for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;

             U != UE; ++U)

          ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);

      }

    }


    ReservedCycles.resize(NumUnits, InvalidCycle);

  }

}


/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat

/// these "soft stalls" differently than the hard stall cycles based on CPU

/// resources and computed by checkHazard(). A fully in-order model

/// (MicroOpBufferSize==0) will not make use of this since instructions are not

/// available for scheduling until they are ready. However, a weaker in-order

/// model may use this for heuristics. For example, if a processor has in-order

/// behavior when reading certain resources, this may come into play.

unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {

  if (!SU->isUnbuffered)

    return 0;


  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);

  if (ReadyCycle > CurrCycle)

    return ReadyCycle - CurrCycle;

  return 0;

}


/// Compute the next cycle at which the given processor resource unit

/// can be scheduled.

unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,

                                                       unsigned ReleaseAtCycle,

                                                       unsigned AcquireAtCycle) {

  if (SchedModel && SchedModel->enableIntervals()) {

    if (isTop())

      return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop(

          CurrCycle, AcquireAtCycle, ReleaseAtCycle);


    return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom(

        CurrCycle, AcquireAtCycle, ReleaseAtCycle);

  }


  unsigned NextUnreserved = ReservedCycles[InstanceIdx];

  // If this resource has never been used, always return cycle zero.

  if (NextUnreserved == InvalidCycle)

    return CurrCycle;

  // For bottom-up scheduling add the cycles needed for the current operation.

  if (!isTop())

    NextUnreserved = std::max(CurrCycle, NextUnreserved + ReleaseAtCycle);

  return NextUnreserved;

}


/// Compute the next cycle at which the given processor resource can be

/// scheduled.  Returns the next cycle and the index of the processor resource

/// instance in the reserved cycles vector.

std::pair<unsigned, unsigned>

SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx,

                                    unsigned ReleaseAtCycle,

                                    unsigned AcquireAtCycle) {

  if (MischedDetailResourceBooking) {

    LLVM_DEBUG(dbgs() << "  Resource booking (@" << CurrCycle << "c): \n");

    LLVM_DEBUG(dumpReservedCycles());

    LLVM_DEBUG(dbgs() << "  getNextResourceCycle (@" << CurrCycle << "c): \n");

  }

  unsigned MinNextUnreserved = InvalidCycle;

  unsigned InstanceIdx = 0;

  unsigned StartIndex = ReservedCyclesIndex[PIdx];

  unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;

  assert(NumberOfInstances > 0 &&

         "Cannot have zero instances of a ProcResource");


  if (isUnbufferedGroup(PIdx)) {

    // If any subunits are used by the instruction, report that the

    // subunits of the resource group are available at the first cycle

    // in which the unit is available, effectively removing the group

    // record from hazarding and basing the hazarding decisions on the

    // subunit records. Otherwise, choose the first available instance

    // from among the subunits.  Specifications which assign cycles to

    // both the subunits and the group or which use an unbuffered

    // group with buffered subunits will appear to schedule

    // strangely. In the first case, the additional cycles for the

    // group will be ignored.  In the second, the group will be

    // ignored entirely.

    for (const MCWriteProcResEntry &PE :

         make_range(SchedModel->getWriteProcResBegin(SC),

                    SchedModel->getWriteProcResEnd(SC)))

      if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])

        return std::make_pair(getNextResourceCycleByInstance(

                                  StartIndex, ReleaseAtCycle, AcquireAtCycle),

                              StartIndex);


    auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;

    for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {

      unsigned NextUnreserved, NextInstanceIdx;

      std::tie(NextUnreserved, NextInstanceIdx) =

          getNextResourceCycle(SC, SubUnits[I], ReleaseAtCycle, AcquireAtCycle);

      if (MinNextUnreserved > NextUnreserved) {

        InstanceIdx = NextInstanceIdx;

        MinNextUnreserved = NextUnreserved;

      }

    }

    return std::make_pair(MinNextUnreserved, InstanceIdx);

  }


  for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;

       ++I) {

    unsigned NextUnreserved =

        getNextResourceCycleByInstance(I, ReleaseAtCycle, AcquireAtCycle);

    if (MischedDetailResourceBooking)

      LLVM_DEBUG(dbgs() << "    Instance " << I - StartIndex << " available @"

                        << NextUnreserved << "c\n");

    if (MinNextUnreserved > NextUnreserved) {

      InstanceIdx = I;

      MinNextUnreserved = NextUnreserved;

    }

  }

  if (MischedDetailResourceBooking)

    LLVM_DEBUG(dbgs() << "    selecting " << SchedModel->getResourceName(PIdx)

                      << "[" << InstanceIdx - StartIndex << "]"

                      << " available @" << MinNextUnreserved << "c"

                      << "\n");

  return std::make_pair(MinNextUnreserved, InstanceIdx);

}


/// Does this SU have a hazard within the current instruction group.

///

/// The scheduler supports two modes of hazard recognition. The first is the

/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that

/// supports highly complicated in-order reservation tables

/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.

///

/// The second is a streamlined mechanism that checks for hazards based on

/// simple counters that the scheduler itself maintains. It explicitly checks

/// for instruction dispatch limitations, including the number of micro-ops that

/// can dispatch per cycle.

///

/// TODO: Also check whether the SU must start a new group.

bool SchedBoundary::checkHazard(SUnit *SU) {

  if (HazardRec->isEnabled()

      && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {

    return true;

  }


  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());

  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {

    LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="

                      << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');

    return true;

  }


  if (CurrMOps > 0 &&

      ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||

       (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {

    LLVM_DEBUG(dbgs() << "  hazard: SU(" << SU->NodeNum << ") must "

                      << (isTop() ? "begin" : "end") << " group\n");

    return true;

  }


  if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {

    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);

    for (const MCWriteProcResEntry &PE :

          make_range(SchedModel->getWriteProcResBegin(SC),

                     SchedModel->getWriteProcResEnd(SC))) {

      unsigned ResIdx = PE.ProcResourceIdx;

      unsigned ReleaseAtCycle = PE.ReleaseAtCycle;

      unsigned AcquireAtCycle = PE.AcquireAtCycle;

      unsigned NRCycle, InstanceIdx;

      std::tie(NRCycle, InstanceIdx) =

          getNextResourceCycle(SC, ResIdx, ReleaseAtCycle, AcquireAtCycle);

      if (NRCycle > CurrCycle) {

#if LLVM_ENABLE_ABI_BREAKING_CHECKS

        MaxObservedStall = std::max(ReleaseAtCycle, MaxObservedStall);

#endif

        LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "

                          << SchedModel->getResourceName(ResIdx)

                          << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx]  << ']'

                          << "=" << NRCycle << "c\n");

        return true;

      }

    }

  }

  return false;

}


// Find the unscheduled node in ReadySUs with the highest latency.

unsigned SchedBoundary::

findMaxLatency(ArrayRef<SUnit*> ReadySUs) {

  SUnit *LateSU = nullptr;

  unsigned RemLatency = 0;

  for (SUnit *SU : ReadySUs) {

    unsigned L = getUnscheduledLatency(SU);

    if (L > RemLatency) {

      RemLatency = L;

      LateSU = SU;

    }

  }

  if (LateSU) {

    LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("

                      << LateSU->NodeNum << ") " << RemLatency << "c\n");

  }

  return RemLatency;

}


// Count resources in this zone and the remaining unscheduled

// instruction. Return the max count, scaled. Set OtherCritIdx to the critical

// resource index, or zero if the zone is issue limited.

unsigned SchedBoundary::

getOtherResourceCount(unsigned &OtherCritIdx) {

  OtherCritIdx = 0;

  if (!SchedModel->hasInstrSchedModel())

    return 0;


  unsigned OtherCritCount = Rem->RemIssueCount

    + (RetiredMOps * SchedModel->getMicroOpFactor());

  LLVM_DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "

                    << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');

  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();

       PIdx != PEnd; ++PIdx) {

    unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];

    if (OtherCount > OtherCritCount) {

      OtherCritCount = OtherCount;

      OtherCritIdx = PIdx;

    }

  }

  if (OtherCritIdx) {

    LLVM_DEBUG(

        dbgs() << "  " << Available.getName() << " + Remain CritRes: "

               << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)

               << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");

  }

  return OtherCritCount;

}


void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,

                                unsigned Idx) {

  assert(SU->getInstr() && "Scheduled SUnit must have instr");


#if LLVM_ENABLE_ABI_BREAKING_CHECKS

  // ReadyCycle was been bumped up to the CurrCycle when this node was

  // scheduled, but CurrCycle may have been eagerly advanced immediately after

  // scheduling, so may now be greater than ReadyCycle.

  if (ReadyCycle > CurrCycle)

    MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);

#endif


  if (ReadyCycle < MinReadyCycle)

    MinReadyCycle = ReadyCycle;


  // Check for interlocks first. For the purpose of other heuristics, an

  // instruction that cannot issue appears as if it's not in the ReadyQueue.

  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;

  bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||

                        checkHazard(SU) || (Available.size() >= ReadyListLimit);


  if (!HazardDetected) {

    Available.push(SU);


    if (InPQueue)

      Pending.remove(Pending.begin() + Idx);

    return;

  }


  if (!InPQueue)

    Pending.push(SU);

}


/// Move the boundary of scheduled code by one cycle.

void SchedBoundary::bumpCycle(unsigned NextCycle) {

  if (SchedModel->getMicroOpBufferSize() == 0) {

    assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&

           "MinReadyCycle uninitialized");

    if (MinReadyCycle > NextCycle)

      NextCycle = MinReadyCycle;

  }

  // Update the current micro-ops, which will issue in the next cycle.

  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);

  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;


  // Decrement DependentLatency based on the next cycle.

  if ((NextCycle - CurrCycle) > DependentLatency)

    DependentLatency = 0;

  else

    DependentLatency -= (NextCycle - CurrCycle);


  if (!HazardRec->isEnabled()) {

    // Bypass HazardRec virtual calls.

    CurrCycle = NextCycle;

  } else {

    // Bypass getHazardType calls in case of long latency.

    for (; CurrCycle != NextCycle; ++CurrCycle) {

      if (isTop())

        HazardRec->AdvanceCycle();

      else

        HazardRec->RecedeCycle();

    }

  }

  CheckPending = true;

  IsResourceLimited =

      checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),

                         getScheduledLatency(), true);


  LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()

                    << '\n');

}


void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {

  ExecutedResCounts[PIdx] += Count;

  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)

    MaxExecutedResCount = ExecutedResCounts[PIdx];

}


/// Add the given processor resource to this scheduled zone.

///

/// \param ReleaseAtCycle indicates the number of consecutive (non-pipelined)

/// cycles during which this resource is released.

///

/// \param AcquireAtCycle indicates the number of consecutive (non-pipelined)

/// cycles at which the resource is aquired after issue (assuming no stalls).

///

/// \return the next cycle at which the instruction may execute without

/// oversubscribing resources.

unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,

                                      unsigned ReleaseAtCycle,

                                      unsigned NextCycle,

                                      unsigned AcquireAtCycle) {

  unsigned Factor = SchedModel->getResourceFactor(PIdx);

  unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);

  LLVM_DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx) << " +"

                    << ReleaseAtCycle << "x" << Factor << "u\n");


  // Update Executed resources counts.

  incExecutedResources(PIdx, Count);

  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");

  Rem->RemainingCounts[PIdx] -= Count;


  // Check if this resource exceeds the current critical resource. If so, it

  // becomes the critical resource.

  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {

    ZoneCritResIdx = PIdx;

    LLVM_DEBUG(dbgs() << "  *** Critical resource "

                      << SchedModel->getResourceName(PIdx) << ": "

                      << getResourceCount(PIdx) / SchedModel->getLatencyFactor()

                      << "c\n");

  }

  // For reserved resources, record the highest cycle using the resource.

  unsigned NextAvailable, InstanceIdx;

  std::tie(NextAvailable, InstanceIdx) =

      getNextResourceCycle(SC, PIdx, ReleaseAtCycle, AcquireAtCycle);

  if (NextAvailable > CurrCycle) {

    LLVM_DEBUG(dbgs() << "  Resource conflict: "

                      << SchedModel->getResourceName(PIdx)

                      << '[' << InstanceIdx - ReservedCyclesIndex[PIdx]  << ']'

                      << " reserved until @" << NextAvailable << "\n");

  }

  return NextAvailable;

}


/// Move the boundary of scheduled code by one SUnit.

void SchedBoundary::bumpNode(SUnit *SU) {

  // Update the reservation table.

  if (HazardRec->isEnabled()) {

    if (!isTop() && SU->isCall) {

      // Calls are scheduled with their preceding instructions. For bottom-up

      // scheduling, clear the pipeline state before emitting.

      HazardRec->Reset();

    }

    HazardRec->EmitInstruction(SU);

    // Scheduling an instruction may have made pending instructions available.

    CheckPending = true;

  }

  // checkHazard should prevent scheduling multiple instructions per cycle that

  // exceed the issue width.

  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);

  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());

  assert(

      (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&

      "Cannot schedule this instruction's MicroOps in the current cycle.");


  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);

  LLVM_DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");


  unsigned NextCycle = CurrCycle;

  switch (SchedModel->getMicroOpBufferSize()) {

  case 0:

    assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");

    break;

  case 1:

    if (ReadyCycle > NextCycle) {

      NextCycle = ReadyCycle;

      LLVM_DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");

    }

    break;

  default:

    // We don't currently model the OOO reorder buffer, so consider all

    // scheduled MOps to be "retired". We do loosely model in-order resource

    // latency. If this instruction uses an in-order resource, account for any

    // likely stall cycles.

    if (SU->isUnbuffered && ReadyCycle > NextCycle)

      NextCycle = ReadyCycle;

    break;

  }

  RetiredMOps += IncMOps;


  // Update resource counts and critical resource.

  if (SchedModel->hasInstrSchedModel()) {

    unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();

    assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");

    Rem->RemIssueCount -= DecRemIssue;

    if (ZoneCritResIdx) {

      // Scale scheduled micro-ops for comparing with the critical resource.

      unsigned ScaledMOps =

        RetiredMOps * SchedModel->getMicroOpFactor();


      // If scaled micro-ops are now more than the previous critical resource by

      // a full cycle, then micro-ops issue becomes critical.

      if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))

          >= (int)SchedModel->getLatencyFactor()) {

        ZoneCritResIdx = 0;

        LLVM_DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "

                          << ScaledMOps / SchedModel->getLatencyFactor()

                          << "c\n");

      }

    }

    for (TargetSchedModel::ProcResIter

           PI = SchedModel->getWriteProcResBegin(SC),

           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {

      unsigned RCycle =

          countResource(SC, PI->ProcResourceIdx, PI->ReleaseAtCycle, NextCycle,

                        PI->AcquireAtCycle);

      if (RCycle > NextCycle)

        NextCycle = RCycle;

    }

    if (SU->hasReservedResource) {

      // For reserved resources, record the highest cycle using the resource.

      // For top-down scheduling, this is the cycle in which we schedule this

      // instruction plus the number of cycles the operations reserves the

      // resource. For bottom-up is it simply the instruction's cycle.

      for (TargetSchedModel::ProcResIter

             PI = SchedModel->getWriteProcResBegin(SC),

             PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {

        unsigned PIdx = PI->ProcResourceIdx;

        if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {


          if (SchedModel && SchedModel->enableIntervals()) {

            unsigned ReservedUntil, InstanceIdx;

            std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(

                SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);

            if (isTop()) {

              ReservedResourceSegments[InstanceIdx].add(

                  ResourceSegments::getResourceIntervalTop(

                      NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),

                  MIResourceCutOff);

            } else {

              ReservedResourceSegments[InstanceIdx].add(

                  ResourceSegments::getResourceIntervalBottom(

                      NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),

                  MIResourceCutOff);

            }

          } else {


            unsigned ReservedUntil, InstanceIdx;

            std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(

                SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);

            if (isTop()) {

              ReservedCycles[InstanceIdx] =

                  std::max(ReservedUntil, NextCycle + PI->ReleaseAtCycle);

            } else

              ReservedCycles[InstanceIdx] = NextCycle;

          }

        }

      }

    }

  }

  // Update ExpectedLatency and DependentLatency.

  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;

  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;

  if (SU->getDepth() > TopLatency) {

    TopLatency = SU->getDepth();

    LLVM_DEBUG(dbgs() << "  " << Available.getName() << " TopLatency SU("

                      << SU->NodeNum << ") " << TopLatency << "c\n");

  }

  if (SU->getHeight() > BotLatency) {

    BotLatency = SU->getHeight();

    LLVM_DEBUG(dbgs() << "  " << Available.getName() << " BotLatency SU("

                      << SU->NodeNum << ") " << BotLatency << "c\n");

  }

  // If we stall for any reason, bump the cycle.

  if (NextCycle > CurrCycle)

    bumpCycle(NextCycle);

  else

    // After updating ZoneCritResIdx and ExpectedLatency, check if we're

    // resource limited. If a stall occurred, bumpCycle does this.

    IsResourceLimited =

        checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),

                           getScheduledLatency(), true);


  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle

  // resets CurrMOps. Loop to handle instructions with more MOps than issue in

  // one cycle.  Since we commonly reach the max MOps here, opportunistically

  // bump the cycle to avoid uselessly checking everything in the readyQ.

  CurrMOps += IncMOps;


  // Bump the cycle count for issue group constraints.

  // This must be done after NextCycle has been adjust for all other stalls.

  // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set

  // currCycle to X.

  if ((isTop() &&  SchedModel->mustEndGroup(SU->getInstr())) ||

      (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {

    LLVM_DEBUG(dbgs() << "  Bump cycle to " << (isTop() ? "end" : "begin")

                      << " group\n");

    bumpCycle(++NextCycle);

  }


  while (CurrMOps >= SchedModel->getIssueWidth()) {

    LLVM_DEBUG(dbgs() << "  *** Max MOps " << CurrMOps << " at cycle "

                      << CurrCycle << '\n');

    bumpCycle(++NextCycle);

  }

  LLVM_DEBUG(dumpScheduledState());

}


/// Release pending ready nodes in to the available queue. This makes them

/// visible to heuristics.

void SchedBoundary::releasePending() {

  // If the available queue is empty, it is safe to reset MinReadyCycle.

  if (Available.empty())

    MinReadyCycle = std::numeric_limits<unsigned>::max();


  // Check to see if any of the pending instructions are ready to issue.  If

  // so, add them to the available queue.

  for (unsigned I = 0, E = Pending.size(); I < E; ++I) {

    SUnit *SU = *(Pending.begin() + I);

    unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;


    if (ReadyCycle < MinReadyCycle)

      MinReadyCycle = ReadyCycle;


    if (Available.size() >= ReadyListLimit)

      break;


    releaseNode(SU, ReadyCycle, true, I);

    if (E != Pending.size()) {

      --I;

      --E;

    }

  }

  CheckPending = false;

}


/// Remove SU from the ready set for this boundary.

void SchedBoundary::removeReady(SUnit *SU) {

  if (Available.isInQueue(SU))

    Available.remove(Available.find(SU));

  else {

    assert(Pending.isInQueue(SU) && "bad ready count");

    Pending.remove(Pending.find(SU));

  }

}


/// If this queue only has one ready candidate, return it. As a side effect,

/// defer any nodes that now hit a hazard, and advance the cycle until at least

/// one node is ready. If multiple instructions are ready, return NULL.

SUnit *SchedBoundary::pickOnlyChoice() {

  if (CheckPending)

    releasePending();


  // Defer any ready instrs that now have a hazard.

  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {

    if (checkHazard(*I)) {

      Pending.push(*I);

      I = Available.remove(I);

      continue;

    }

    ++I;

  }

  for (unsigned i = 0; Available.empty(); ++i) {

//  FIXME: Re-enable assert once PR20057 is resolved.

//    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&

//           "permanent hazard");

    (void)i;

    bumpCycle(CurrCycle + 1);

    releasePending();

  }


  LLVM_DEBUG(Pending.dump());

  LLVM_DEBUG(Available.dump());


  if (Available.size() == 1)

    return *Available.begin();

  return nullptr;

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


/// Dump the content of the \ref ReservedCycles vector for the

/// resources that are used in the basic block.

///

LLVM_DUMP_METHOD void SchedBoundary::dumpReservedCycles() const {

  if (!SchedModel->hasInstrSchedModel())

    return;


  unsigned ResourceCount = SchedModel->getNumProcResourceKinds();

  unsigned StartIdx = 0;


  for (unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) {

    const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits;

    std::string ResName = SchedModel->getResourceName(ResIdx);

    for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {

      dbgs() << ResName << "(" << UnitIdx << ") = ";

      if (SchedModel && SchedModel->enableIntervals()) {

        if (ReservedResourceSegments.count(StartIdx + UnitIdx))

          dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx);

        else

          dbgs() << "{ }\n";

      } else

        dbgs() << ReservedCycles[StartIdx + UnitIdx] << "\n";

    }

    StartIdx += NumUnits;

  }

}


// This is useful information to dump after bumpNode.

// Note that the Queue contents are more useful before pickNodeFromQueue.

LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {

  unsigned ResFactor;

  unsigned ResCount;

  if (ZoneCritResIdx) {

    ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);

    ResCount = getResourceCount(ZoneCritResIdx);

  } else {

    ResFactor = SchedModel->getMicroOpFactor();

    ResCount = RetiredMOps * ResFactor;

  }

  unsigned LFactor = SchedModel->getLatencyFactor();

  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"

         << "  Retired: " << RetiredMOps;

  dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";

  dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "

         << ResCount / ResFactor << " "

         << SchedModel->getResourceName(ZoneCritResIdx)

         << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"

         << (IsResourceLimited ? "  - Resource" : "  - Latency")

         << " limited.\n";

  if (MISchedDumpReservedCycles)

    dumpReservedCycles();

}

#endif


//===----------------------------------------------------------------------===//

// GenericScheduler - Generic implementation of MachineSchedStrategy.

//===----------------------------------------------------------------------===//


void GenericSchedulerBase::SchedCandidate::

initResourceDelta(const ScheduleDAGMI *DAG,

                  const TargetSchedModel *SchedModel) {

  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)

    return;


  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);

  for (TargetSchedModel::ProcResIter

         PI = SchedModel->getWriteProcResBegin(SC),

         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {

    if (PI->ProcResourceIdx == Policy.ReduceResIdx)

      ResDelta.CritResources += PI->ReleaseAtCycle;

    if (PI->ProcResourceIdx == Policy.DemandResIdx)

      ResDelta.DemandedResources += PI->ReleaseAtCycle;

  }

}


/// Compute remaining latency. We need this both to determine whether the

/// overall schedule has become latency-limited and whether the instructions

/// outside this zone are resource or latency limited.

///

/// The "dependent" latency is updated incrementally during scheduling as the

/// max height/depth of scheduled nodes minus the cycles since it was

/// scheduled:

///   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone

///

/// The "independent" latency is the max ready queue depth:

///   ILat = max N.depth for N in Available|Pending

///

/// RemainingLatency is the greater of independent and dependent latency.

///

/// These computations are expensive, especially in DAGs with many edges, so

/// only do them if necessary.

static unsigned computeRemLatency(SchedBoundary &CurrZone) {

  unsigned RemLatency = CurrZone.getDependentLatency();

  RemLatency = std::max(RemLatency,

                        CurrZone.findMaxLatency(CurrZone.Available.elements()));

  RemLatency = std::max(RemLatency,

                        CurrZone.findMaxLatency(CurrZone.Pending.elements()));

  return RemLatency;

}


/// Returns true if the current cycle plus remaning latency is greater than

/// the critical path in the scheduling region.

bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,

                                               SchedBoundary &CurrZone,

                                               bool ComputeRemLatency,

                                               unsigned &RemLatency) const {

  // The current cycle is already greater than the critical path, so we are

  // already latency limited and don't need to compute the remaining latency.

  if (CurrZone.getCurrCycle() > Rem.CriticalPath)

    return true;


  // If we haven't scheduled anything yet, then we aren't latency limited.

  if (CurrZone.getCurrCycle() == 0)

    return false;


  if (ComputeRemLatency)

    RemLatency = computeRemLatency(CurrZone);


  return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;

}


/// Set the CandPolicy given a scheduling zone given the current resources and

/// latencies inside and outside the zone.

void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,

                                     SchedBoundary &CurrZone,

                                     SchedBoundary *OtherZone) {

  // Apply preemptive heuristics based on the total latency and resources

  // inside and outside this zone. Potential stalls should be considered before

  // following this policy.


  // Compute the critical resource outside the zone.

  unsigned OtherCritIdx = 0;

  unsigned OtherCount =

    OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;


  bool OtherResLimited = false;

  unsigned RemLatency = 0;

  bool RemLatencyComputed = false;

  if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {

    RemLatency = computeRemLatency(CurrZone);

    RemLatencyComputed = true;

    OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),

                                         OtherCount, RemLatency, false);

  }


  // Schedule aggressively for latency in PostRA mode. We don't check for

  // acyclic latency during PostRA, and highly out-of-order processors will

  // skip PostRA scheduling.

  if (!OtherResLimited &&

      (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,

                                       RemLatency))) {

    Policy.ReduceLatency |= true;

    LLVM_DEBUG(dbgs() << "  " << CurrZone.Available.getName()

                      << " RemainingLatency " << RemLatency << " + "

                      << CurrZone.getCurrCycle() << "c > CritPath "

                      << Rem.CriticalPath << "\n");

  }

  // If the same resource is limiting inside and outside the zone, do nothing.

  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)

    return;


  LLVM_DEBUG(if (CurrZone.isResourceLimited()) {

    dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "

           << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";

  } if (OtherResLimited) dbgs()

                 << "  RemainingLimit: "

                 << SchedModel->getResourceName(OtherCritIdx) << "\n";

             if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()

             << "  Latency limited both directions.\n");


  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)

    Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();


  if (OtherResLimited)

    Policy.DemandResIdx = OtherCritIdx;

}


#ifndef NDEBUG

const char *GenericSchedulerBase::getReasonStr(

  GenericSchedulerBase::CandReason Reason) {

  switch (Reason) {

  case NoCand:         return "NOCAND    ";

  case Only1:          return "ONLY1     ";

  case PhysReg:        return "PHYS-REG  ";

  case RegExcess:      return "REG-EXCESS";

  case RegCritical:    return "REG-CRIT  ";

  case Stall:          return "STALL     ";

  case Cluster:        return "CLUSTER   ";

  case Weak:           return "WEAK      ";

  case RegMax:         return "REG-MAX   ";

  case ResourceReduce: return "RES-REDUCE";

  case ResourceDemand: return "RES-DEMAND";

  case TopDepthReduce: return "TOP-DEPTH ";

  case TopPathReduce:  return "TOP-PATH  ";

  case BotHeightReduce:return "BOT-HEIGHT";

  case BotPathReduce:  return "BOT-PATH  ";

  case NextDefUse:     return "DEF-USE   ";

  case NodeOrder:      return "ORDER     ";

  };

  llvm_unreachable("Unknown reason!");

}


void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {

  PressureChange P;

  unsigned ResIdx = 0;

  unsigned Latency = 0;

  switch (Cand.Reason) {

  default:

    break;

  case RegExcess:

    P = Cand.RPDelta.Excess;

    break;

  case RegCritical:

    P = Cand.RPDelta.CriticalMax;

    break;

  case RegMax:

    P = Cand.RPDelta.CurrentMax;

    break;

  case ResourceReduce:

    ResIdx = Cand.Policy.ReduceResIdx;

    break;

  case ResourceDemand:

    ResIdx = Cand.Policy.DemandResIdx;

    break;

  case TopDepthReduce:

    Latency = Cand.SU->getDepth();

    break;

  case TopPathReduce:

    Latency = Cand.SU->getHeight();

    break;

  case BotHeightReduce:

    Latency = Cand.SU->getHeight();

    break;

  case BotPathReduce:

    Latency = Cand.SU->getDepth();

    break;

  }

  dbgs() << "  Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);

  if (P.isValid())

    dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())

           << ":" << P.getUnitInc() << " ";

  else

    dbgs() << "      ";

  if (ResIdx)

    dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";

  else

    dbgs() << "         ";

  if (Latency)

    dbgs() << " " << Latency << " cycles ";

  else

    dbgs() << "          ";

  dbgs() << '\n';

}

#endif


namespace llvm {

/// Return true if this heuristic determines order.

/// TODO: Consider refactor return type of these functions as integer or enum,

/// as we may need to differentiate whether TryCand is better than Cand.

bool tryLess(int TryVal, int CandVal,

             GenericSchedulerBase::SchedCandidate &TryCand,

             GenericSchedulerBase::SchedCandidate &Cand,

             GenericSchedulerBase::CandReason Reason) {

  if (TryVal < CandVal) {

    TryCand.Reason = Reason;

    return true;

  }

  if (TryVal > CandVal) {

    if (Cand.Reason > Reason)

      Cand.Reason = Reason;

    return true;

  }

  return false;

}


bool tryGreater(int TryVal, int CandVal,

                GenericSchedulerBase::SchedCandidate &TryCand,

                GenericSchedulerBase::SchedCandidate &Cand,

                GenericSchedulerBase::CandReason Reason) {

  if (TryVal > CandVal) {

    TryCand.Reason = Reason;

    return true;

  }

  if (TryVal < CandVal) {

    if (Cand.Reason > Reason)

      Cand.Reason = Reason;

    return true;

  }

  return false;

}


bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,

                GenericSchedulerBase::SchedCandidate &Cand,

                SchedBoundary &Zone) {

  if (Zone.isTop()) {

    // Prefer the candidate with the lesser depth, but only if one of them has

    // depth greater than the total latency scheduled so far, otherwise either

    // of them could be scheduled now with no stall.

    if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >

        Zone.getScheduledLatency()) {

      if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),

                  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))

        return true;

    }

    if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),

                   TryCand, Cand, GenericSchedulerBase::TopPathReduce))

      return true;

  } else {

    // Prefer the candidate with the lesser height, but only if one of them has

    // height greater than the total latency scheduled so far, otherwise either

    // of them could be scheduled now with no stall.

    if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >

        Zone.getScheduledLatency()) {

      if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),

                  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))

        return true;

    }

    if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),

                   TryCand, Cand, GenericSchedulerBase::BotPathReduce))

      return true;

  }

  return false;

}

} // end namespace llvm


static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {

  LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")

                    << GenericSchedulerBase::getReasonStr(Reason) << '\n');

}


static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {

  tracePick(Cand.Reason, Cand.AtTop);

}


void GenericScheduler::initialize(ScheduleDAGMI *dag) {

  assert(dag->hasVRegLiveness() &&

         "(PreRA)GenericScheduler needs vreg liveness");

  DAG = static_cast<ScheduleDAGMILive*>(dag);

  SchedModel = DAG->getSchedModel();

  TRI = DAG->TRI;


  if (RegionPolicy.ComputeDFSResult)

    DAG->computeDFSResult();


  Rem.init(DAG, SchedModel);

  Top.init(DAG, SchedModel, &Rem);

  Bot.init(DAG, SchedModel, &Rem);


  // Initialize resource counts.


  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or

  // are disabled, then these HazardRecs will be disabled.

  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();

  if (!Top.HazardRec) {

    Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);

  }

  if (!Bot.HazardRec) {

    Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);

  }

  TopCand.SU = nullptr;

  BotCand.SU = nullptr;

}


/// Initialize the per-region scheduling policy.

void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,

                                  MachineBasicBlock::iterator End,

                                  unsigned NumRegionInstrs) {

  const MachineFunction &MF = *Begin->getMF();

  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();


  // Avoid setting up the register pressure tracker for small regions to save

  // compile time. As a rough heuristic, only track pressure when the number of

  // schedulable instructions exceeds half the allocatable integer register file

  // that is the largest legal integer regiser type.

  RegionPolicy.ShouldTrackPressure = true;

  for (unsigned VT = MVT::i64; VT > (unsigned)MVT::i1; --VT) {

    MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;

    if (TLI->isTypeLegal(LegalIntVT)) {

      unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(

        TLI->getRegClassFor(LegalIntVT));

      RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);

      break;

    }

  }


  // For generic targets, we default to bottom-up, because it's simpler and more

  // compile-time optimizations have been implemented in that direction.

  RegionPolicy.OnlyBottomUp = true;


  // Allow the subtarget to override default policy.

  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);


  // After subtarget overrides, apply command line options.

  if (!EnableRegPressure) {

    RegionPolicy.ShouldTrackPressure = false;

    RegionPolicy.ShouldTrackLaneMasks = false;

  }


  // Check -misched-topdown/bottomup can force or unforce scheduling direction.

  // e.g. -misched-bottomup=false allows scheduling in both directions.

  assert((!ForceTopDown || !ForceBottomUp) &&

         "-misched-topdown incompatible with -misched-bottomup");

  if (ForceBottomUp.getNumOccurrences() > 0) {

    RegionPolicy.OnlyBottomUp = ForceBottomUp;

    if (RegionPolicy.OnlyBottomUp)

      RegionPolicy.OnlyTopDown = false;

  }

  if (ForceTopDown.getNumOccurrences() > 0) {

    RegionPolicy.OnlyTopDown = ForceTopDown;

    if (RegionPolicy.OnlyTopDown)

      RegionPolicy.OnlyBottomUp = false;

  }

}


void GenericScheduler::dumpPolicy() const {

  // Cannot completely remove virtual function even in release mode.

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

  dbgs() << "GenericScheduler RegionPolicy: "

         << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure

         << " OnlyTopDown=" << RegionPolicy.OnlyTopDown

         << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp

         << "\n";

#endif

}


/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic

/// critical path by more cycles than it takes to drain the instruction buffer.

/// We estimate an upper bounds on in-flight instructions as:

///

/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )

/// InFlightIterations = AcyclicPath / CyclesPerIteration

/// InFlightResources = InFlightIterations * LoopResources

///

/// TODO: Check execution resources in addition to IssueCount.

void GenericScheduler::checkAcyclicLatency() {

  if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)

    return;


  // Scaled number of cycles per loop iteration.

  unsigned IterCount =

    std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),

             Rem.RemIssueCount);

  // Scaled acyclic critical path.

  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();

  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop

  unsigned InFlightCount =

    (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;

  unsigned BufferLimit =

    SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();


  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;


  LLVM_DEBUG(

      dbgs() << "IssueCycles="

             << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "

             << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()

             << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount

             << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()

             << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";

      if (Rem.IsAcyclicLatencyLimited) dbgs() << "  ACYCLIC LATENCY LIMIT\n");

}


void GenericScheduler::registerRoots() {

  Rem.CriticalPath = DAG->ExitSU.getDepth();


  // Some roots may not feed into ExitSU. Check all of them in case.

  for (const SUnit *SU : Bot.Available) {

    if (SU->getDepth() > Rem.CriticalPath)

      Rem.CriticalPath = SU->getDepth();

  }

  LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');

  if (DumpCriticalPathLength) {

    errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";

  }


  if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {

    Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();

    checkAcyclicLatency();

  }

}


namespace llvm {

bool tryPressure(const PressureChange &TryP,

                 const PressureChange &CandP,

                 GenericSchedulerBase::SchedCandidate &TryCand,

                 GenericSchedulerBase::SchedCandidate &Cand,

                 GenericSchedulerBase::CandReason Reason,

                 const TargetRegisterInfo *TRI,

                 const MachineFunction &MF) {

  // If one candidate decreases and the other increases, go with it.

  // Invalid candidates have UnitInc==0.

  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,

                 Reason)) {

    return true;

  }

  // Do not compare the magnitude of pressure changes between top and bottom

  // boundary.

  if (Cand.AtTop != TryCand.AtTop)

    return false;


  // If both candidates affect the same set in the same boundary, go with the

  // smallest increase.

  unsigned TryPSet = TryP.getPSetOrMax();

  unsigned CandPSet = CandP.getPSetOrMax();

  if (TryPSet == CandPSet) {

    return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,

                   Reason);

  }


  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :

                                 std::numeric_limits<int>::max();


  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :

                                   std::numeric_limits<int>::max();


  // If the candidates are decreasing pressure, reverse priority.

  if (TryP.getUnitInc() < 0)

    std::swap(TryRank, CandRank);

  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);

}


unsigned getWeakLeft(const SUnit *SU, bool isTop) {

  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;

}


/// Minimize physical register live ranges. Regalloc wants them adjacent to

/// their physreg def/use.

///

/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf

/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled

/// with the operation that produces or consumes the physreg. We'll do this when

/// regalloc has support for parallel copies.

int biasPhysReg(const SUnit *SU, bool isTop) {

  const MachineInstr *MI = SU->getInstr();


  if (MI->isCopy()) {

    unsigned ScheduledOper = isTop ? 1 : 0;

    unsigned UnscheduledOper = isTop ? 0 : 1;

    // If we have already scheduled the physreg produce/consumer, immediately

    // schedule the copy.

    if (MI->getOperand(ScheduledOper).getReg().isPhysical())

      return 1;

    // If the physreg is at the boundary, defer it. Otherwise schedule it

    // immediately to free the dependent. We can hoist the copy later.

    bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;

    if (MI->getOperand(UnscheduledOper).getReg().isPhysical())

      return AtBoundary ? -1 : 1;

  }


  if (MI->isMoveImmediate()) {

    // If we have a move immediate and all successors have been assigned, bias

    // towards scheduling this later. Make sure all register defs are to

    // physical registers.

    bool DoBias = true;

    for (const MachineOperand &Op : MI->defs()) {

      if (Op.isReg() && !Op.getReg().isPhysical()) {

        DoBias = false;

        break;

      }

    }


    if (DoBias)

      return isTop ? -1 : 1;

  }


  return 0;

}

} // end namespace llvm


void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,

                                     bool AtTop,

                                     const RegPressureTracker &RPTracker,

                                     RegPressureTracker &TempTracker) {

  Cand.SU = SU;

  Cand.AtTop = AtTop;

  if (DAG->isTrackingPressure()) {

    if (AtTop) {

      TempTracker.getMaxDownwardPressureDelta(

        Cand.SU->getInstr(),

        Cand.RPDelta,

        DAG->getRegionCriticalPSets(),

        DAG->getRegPressure().MaxSetPressure);

    } else {

      if (VerifyScheduling) {

        TempTracker.getMaxUpwardPressureDelta(

          Cand.SU->getInstr(),

          &DAG->getPressureDiff(Cand.SU),

          Cand.RPDelta,

          DAG->getRegionCriticalPSets(),

          DAG->getRegPressure().MaxSetPressure);

      } else {

        RPTracker.getUpwardPressureDelta(

          Cand.SU->getInstr(),

          DAG->getPressureDiff(Cand.SU),

          Cand.RPDelta,

          DAG->getRegionCriticalPSets(),

          DAG->getRegPressure().MaxSetPressure);

      }

    }

  }

  LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()

             << "  Try  SU(" << Cand.SU->NodeNum << ") "

             << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"

             << Cand.RPDelta.Excess.getUnitInc() << "\n");

}


/// Apply a set of heuristics to a new candidate. Heuristics are currently

/// hierarchical. This may be more efficient than a graduated cost model because

/// we don't need to evaluate all aspects of the model for each node in the

/// queue. But it's really done to make the heuristics easier to debug and

/// statistically analyze.

///

/// \param Cand provides the policy and current best candidate.

/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.

/// \param Zone describes the scheduled zone that we are extending, or nullptr

///             if Cand is from a different zone than TryCand.

/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)

bool GenericScheduler::tryCandidate(SchedCandidate &Cand,

                                    SchedCandidate &TryCand,

                                    SchedBoundary *Zone) const {

  // Initialize the candidate if needed.

  if (!Cand.isValid()) {

    TryCand.Reason = NodeOrder;

    return true;

  }


  // Bias PhysReg Defs and copies to their uses and defined respectively.

  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),

                 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))

    return TryCand.Reason != NoCand;


  // Avoid exceeding the target's limit.

  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,

                                               Cand.RPDelta.Excess,

                                               TryCand, Cand, RegExcess, TRI,

                                               DAG->MF))

    return TryCand.Reason != NoCand;


  // Avoid increasing the max critical pressure in the scheduled region.

  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,

                                               Cand.RPDelta.CriticalMax,

                                               TryCand, Cand, RegCritical, TRI,

                                               DAG->MF))

    return TryCand.Reason != NoCand;


  // We only compare a subset of features when comparing nodes between

  // Top and Bottom boundary. Some properties are simply incomparable, in many

  // other instances we should only override the other boundary if something

  // is a clear good pick on one boundary. Skip heuristics that are more

  // "tie-breaking" in nature.

  bool SameBoundary = Zone != nullptr;

  if (SameBoundary) {

    // For loops that are acyclic path limited, aggressively schedule for

    // latency. Within an single cycle, whenever CurrMOps > 0, allow normal

    // heuristics to take precedence.

    if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&

        tryLatency(TryCand, Cand, *Zone))

      return TryCand.Reason != NoCand;


    // Prioritize instructions that read unbuffered resources by stall cycles.

    if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),

                Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))

      return TryCand.Reason != NoCand;

  }


  // Keep clustered nodes together to encourage downstream peephole

  // optimizations which may reduce resource requirements.

  //

  // This is a best effort to set things up for a post-RA pass. Optimizations

  // like generating loads of multiple registers should ideally be done within

  // the scheduler pass by combining the loads during DAG postprocessing.

  const SUnit *CandNextClusterSU =

    Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();

  const SUnit *TryCandNextClusterSU =

    TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();

  if (tryGreater(TryCand.SU == TryCandNextClusterSU,

                 Cand.SU == CandNextClusterSU,

                 TryCand, Cand, Cluster))

    return TryCand.Reason != NoCand;


  if (SameBoundary) {

    // Weak edges are for clustering and other constraints.

    if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),

                getWeakLeft(Cand.SU, Cand.AtTop),

                TryCand, Cand, Weak))

      return TryCand.Reason != NoCand;

  }


  // Avoid increasing the max pressure of the entire region.

  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,

                                               Cand.RPDelta.CurrentMax,

                                               TryCand, Cand, RegMax, TRI,

                                               DAG->MF))

    return TryCand.Reason != NoCand;


  if (SameBoundary) {

    // Avoid critical resource consumption and balance the schedule.

    TryCand.initResourceDelta(DAG, SchedModel);

    if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,

                TryCand, Cand, ResourceReduce))

      return TryCand.Reason != NoCand;

    if (tryGreater(TryCand.ResDelta.DemandedResources,

                   Cand.ResDelta.DemandedResources,

                   TryCand, Cand, ResourceDemand))

      return TryCand.Reason != NoCand;


    // Avoid serializing long latency dependence chains.

    // For acyclic path limited loops, latency was already checked above.

    if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&

        !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))

      return TryCand.Reason != NoCand;


    // Fall through to original instruction order.

    if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)

        || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {

      TryCand.Reason = NodeOrder;

      return true;

    }

  }


  return false;

}


/// Pick the best candidate from the queue.

///

/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during

/// DAG building. To adjust for the current scheduling location we need to

/// maintain the number of vreg uses remaining to be top-scheduled.

void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,

                                         const CandPolicy &ZonePolicy,

                                         const RegPressureTracker &RPTracker,

                                         SchedCandidate &Cand) {

  // getMaxPressureDelta temporarily modifies the tracker.

  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);


  ReadyQueue &Q = Zone.Available;

  for (SUnit *SU : Q) {


    SchedCandidate TryCand(ZonePolicy);

    initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);

    // Pass SchedBoundary only when comparing nodes from the same boundary.

    SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;

    if (tryCandidate(Cand, TryCand, ZoneArg)) {

      // Initialize resource delta if needed in case future heuristics query it.

      if (TryCand.ResDelta == SchedResourceDelta())

        TryCand.initResourceDelta(DAG, SchedModel);

      Cand.setBest(TryCand);

      LLVM_DEBUG(traceCandidate(Cand));

    }

  }

}


/// Pick the best candidate node from either the top or bottom queue.

SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {

  // Schedule as far as possible in the direction of no choice. This is most

  // efficient, but also provides the best heuristics for CriticalPSets.

  if (SUnit *SU = Bot.pickOnlyChoice()) {

    IsTopNode = false;

    tracePick(Only1, false);

    return SU;

  }

  if (SUnit *SU = Top.pickOnlyChoice()) {

    IsTopNode = true;

    tracePick(Only1, true);

    return SU;

  }

  // Set the bottom-up policy based on the state of the current bottom zone and

  // the instructions outside the zone, including the top zone.

  CandPolicy BotPolicy;

  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);

  // Set the top-down policy based on the state of the current top zone and

  // the instructions outside the zone, including the bottom zone.

  CandPolicy TopPolicy;

  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);


  // See if BotCand is still valid (because we previously scheduled from Top).

  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");

  if (!BotCand.isValid() || BotCand.SU->isScheduled ||

      BotCand.Policy != BotPolicy) {

    BotCand.reset(CandPolicy());

    pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);

    assert(BotCand.Reason != NoCand && "failed to find the first candidate");

  } else {

    LLVM_DEBUG(traceCandidate(BotCand));

#ifndef NDEBUG

    if (VerifyScheduling) {

      SchedCandidate TCand;

      TCand.reset(CandPolicy());

      pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);

      assert(TCand.SU == BotCand.SU &&

             "Last pick result should correspond to re-picking right now");

    }

#endif

  }


  // Check if the top Q has a better candidate.

  LLVM_DEBUG(dbgs() << "Picking from Top:\n");

  if (!TopCand.isValid() || TopCand.SU->isScheduled ||

      TopCand.Policy != TopPolicy) {

    TopCand.reset(CandPolicy());

    pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);

    assert(TopCand.Reason != NoCand && "failed to find the first candidate");

  } else {

    LLVM_DEBUG(traceCandidate(TopCand));

#ifndef NDEBUG

    if (VerifyScheduling) {

      SchedCandidate TCand;

      TCand.reset(CandPolicy());

      pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);

      assert(TCand.SU == TopCand.SU &&

             "Last pick result should correspond to re-picking right now");

    }

#endif

  }


  // Pick best from BotCand and TopCand.

  assert(BotCand.isValid());

  assert(TopCand.isValid());

  SchedCandidate Cand = BotCand;

  TopCand.Reason = NoCand;

  if (tryCandidate(Cand, TopCand, nullptr)) {

    Cand.setBest(TopCand);

    LLVM_DEBUG(traceCandidate(Cand));

  }


  IsTopNode = Cand.AtTop;

  tracePick(Cand);

  return Cand.SU;

}


/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.

SUnit *GenericScheduler::pickNode(bool &IsTopNode) {

  if (DAG->top() == DAG->bottom()) {

    assert(Top.Available.empty() && Top.Pending.empty() &&

           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");

    return nullptr;

  }

  SUnit *SU;

  do {

    if (RegionPolicy.OnlyTopDown) {

      SU = Top.pickOnlyChoice();

      if (!SU) {

        CandPolicy NoPolicy;

        TopCand.reset(NoPolicy);

        pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);

        assert(TopCand.Reason != NoCand && "failed to find a candidate");

        tracePick(TopCand);

        SU = TopCand.SU;

      }

      IsTopNode = true;

    } else if (RegionPolicy.OnlyBottomUp) {

      SU = Bot.pickOnlyChoice();

      if (!SU) {

        CandPolicy NoPolicy;

        BotCand.reset(NoPolicy);

        pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);

        assert(BotCand.Reason != NoCand && "failed to find a candidate");

        tracePick(BotCand);

        SU = BotCand.SU;

      }

      IsTopNode = false;

    } else {

      SU = pickNodeBidirectional(IsTopNode);

    }

  } while (SU->isScheduled);


  if (SU->isTopReady())

    Top.removeReady(SU);

  if (SU->isBottomReady())

    Bot.removeReady(SU);


  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "

                    << *SU->getInstr());

  return SU;

}


void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {

  MachineBasicBlock::iterator InsertPos = SU->getInstr();

  if (!isTop)

    ++InsertPos;

  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;


  // Find already scheduled copies with a single physreg dependence and move

  // them just above the scheduled instruction.

  for (SDep &Dep : Deps) {

    if (Dep.getKind() != SDep::Data ||

        !Register::isPhysicalRegister(Dep.getReg()))

      continue;

    SUnit *DepSU = Dep.getSUnit();

    if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)

      continue;

    MachineInstr *Copy = DepSU->getInstr();

    if (!Copy->isCopy() && !Copy->isMoveImmediate())

      continue;

    LLVM_DEBUG(dbgs() << "  Rescheduling physreg copy ";

               DAG->dumpNode(*Dep.getSUnit()));

    DAG->moveInstruction(Copy, InsertPos);

  }

}


/// Update the scheduler's state after scheduling a node. This is the same node

/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to

/// update it's state based on the current cycle before MachineSchedStrategy

/// does.

///

/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling

/// them here. See comments in biasPhysReg.

void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {

  if (IsTopNode) {

    SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());

    Top.bumpNode(SU);

    if (SU->hasPhysRegUses)

      reschedulePhysReg(SU, true);

  } else {

    SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());

    Bot.bumpNode(SU);

    if (SU->hasPhysRegDefs)

      reschedulePhysReg(SU, false);

  }

}


/// Create the standard converging machine scheduler. This will be used as the

/// default scheduler if the target does not set a default.

ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {

  ScheduleDAGMILive *DAG =

      new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));

  // Register DAG post-processors.

  //

  // FIXME: extend the mutation API to allow earlier mutations to instantiate

  // data and pass it to later mutations. Have a single mutation that gathers

  // the interesting nodes in one pass.

  DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));


  const TargetSubtargetInfo &STI = C->MF->getSubtarget();

  // Add MacroFusion mutation if fusions are not empty.

  const auto &MacroFusions = STI.getMacroFusions();

  if (!MacroFusions.empty())

    DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));

  return DAG;

}


static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {

  return createGenericSchedLive(C);

}


static MachineSchedRegistry

GenericSchedRegistry("converge", "Standard converging scheduler.",

                     createConvergingSched);


//===----------------------------------------------------------------------===//

// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.

//===----------------------------------------------------------------------===//


void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {

  DAG = Dag;

  SchedModel = DAG->getSchedModel();

  TRI = DAG->TRI;


  Rem.init(DAG, SchedModel);

  Top.init(DAG, SchedModel, &Rem);

  Bot.init(DAG, SchedModel, &Rem);


  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,

  // or are disabled, then these HazardRecs will be disabled.

  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();

  if (!Top.HazardRec) {

    Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);

  }

  if (!Bot.HazardRec) {

    Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);

  }

}


void PostGenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,

                                      MachineBasicBlock::iterator End,

                                      unsigned NumRegionInstrs) {

  if (PostRADirection == MISchedPostRASched::TopDown) {

    RegionPolicy.OnlyTopDown = true;

    RegionPolicy.OnlyBottomUp = false;

  } else if (PostRADirection == MISchedPostRASched::BottomUp) {

    RegionPolicy.OnlyTopDown = false;

    RegionPolicy.OnlyBottomUp = true;

  } else if (PostRADirection == MISchedPostRASched::Bidirectional) {

    RegionPolicy.OnlyBottomUp = false;

    RegionPolicy.OnlyTopDown = false;

  }

}


void PostGenericScheduler::registerRoots() {

  Rem.CriticalPath = DAG->ExitSU.getDepth();


  // Some roots may not feed into ExitSU. Check all of them in case.

  for (const SUnit *SU : Bot.Available) {

    if (SU->getDepth() > Rem.CriticalPath)

      Rem.CriticalPath = SU->getDepth();

  }

  LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');

  if (DumpCriticalPathLength) {

    errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";

  }

}


/// Apply a set of heuristics to a new candidate for PostRA scheduling.

///

/// \param Cand provides the policy and current best candidate.

/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.

/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)

bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand,

                                        SchedCandidate &TryCand) {

  // Initialize the candidate if needed.

  if (!Cand.isValid()) {

    TryCand.Reason = NodeOrder;

    return true;

  }


  // Prioritize instructions that read unbuffered resources by stall cycles.

  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),

              Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))

    return TryCand.Reason != NoCand;


  // Keep clustered nodes together.

  if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),

                 Cand.SU == DAG->getNextClusterSucc(),

                 TryCand, Cand, Cluster))

    return TryCand.Reason != NoCand;


  // Avoid critical resource consumption and balance the schedule.

  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,

              TryCand, Cand, ResourceReduce))

    return TryCand.Reason != NoCand;

  if (tryGreater(TryCand.ResDelta.DemandedResources,

                 Cand.ResDelta.DemandedResources,

                 TryCand, Cand, ResourceDemand))

    return TryCand.Reason != NoCand;


  // Avoid serializing long latency dependence chains.

  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {

    return TryCand.Reason != NoCand;

  }


  // Fall through to original instruction order.

  if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {

    TryCand.Reason = NodeOrder;

    return true;

  }


  return false;

}


void PostGenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,

                                             SchedCandidate &Cand) {

  ReadyQueue &Q = Zone.Available;

  for (SUnit *SU : Q) {

    SchedCandidate TryCand(Cand.Policy);

    TryCand.SU = SU;

    TryCand.AtTop = Zone.isTop();

    TryCand.initResourceDelta(DAG, SchedModel);

    if (tryCandidate(Cand, TryCand)) {

      Cand.setBest(TryCand);

      LLVM_DEBUG(traceCandidate(Cand));

    }

  }

}


/// Pick the best candidate node from either the top or bottom queue.

SUnit *PostGenericScheduler::pickNodeBidirectional(bool &IsTopNode) {

  // FIXME: This is similiar to GenericScheduler::pickNodeBidirectional. Factor

  // out common parts.


  // Schedule as far as possible in the direction of no choice. This is most

  // efficient, but also provides the best heuristics for CriticalPSets.

  if (SUnit *SU = Bot.pickOnlyChoice()) {

    IsTopNode = false;

    tracePick(Only1, false);

    return SU;

  }

  if (SUnit *SU = Top.pickOnlyChoice()) {

    IsTopNode = true;

    tracePick(Only1, true);

    return SU;

  }

  // Set the bottom-up policy based on the state of the current bottom zone and

  // the instructions outside the zone, including the top zone.

  CandPolicy BotPolicy;

  setPolicy(BotPolicy, /*IsPostRA=*/true, Bot, &Top);

  // Set the top-down policy based on the state of the current top zone and

  // the instructions outside the zone, including the bottom zone.

  CandPolicy TopPolicy;

  setPolicy(TopPolicy, /*IsPostRA=*/true, Top, &Bot);


  // See if BotCand is still valid (because we previously scheduled from Top).

  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");

  if (!BotCand.isValid() || BotCand.SU->isScheduled ||

      BotCand.Policy != BotPolicy) {

    BotCand.reset(CandPolicy());

    pickNodeFromQueue(Bot, BotCand);

    assert(BotCand.Reason != NoCand && "failed to find the first candidate");

  } else {

    LLVM_DEBUG(traceCandidate(BotCand));

#ifndef NDEBUG

    if (VerifyScheduling) {

      SchedCandidate TCand;

      TCand.reset(CandPolicy());

      pickNodeFromQueue(Bot, BotCand);

      assert(TCand.SU == BotCand.SU &&

             "Last pick result should correspond to re-picking right now");

    }

#endif

  }


  // Check if the top Q has a better candidate.

  LLVM_DEBUG(dbgs() << "Picking from Top:\n");

  if (!TopCand.isValid() || TopCand.SU->isScheduled ||

      TopCand.Policy != TopPolicy) {

    TopCand.reset(CandPolicy());

    pickNodeFromQueue(Top, TopCand);

    assert(TopCand.Reason != NoCand && "failed to find the first candidate");

  } else {

    LLVM_DEBUG(traceCandidate(TopCand));

#ifndef NDEBUG

    if (VerifyScheduling) {

      SchedCandidate TCand;

      TCand.reset(CandPolicy());

      pickNodeFromQueue(Top, TopCand);

      assert(TCand.SU == TopCand.SU &&

             "Last pick result should correspond to re-picking right now");

    }

#endif

  }


  // Pick best from BotCand and TopCand.

  assert(BotCand.isValid());

  assert(TopCand.isValid());

  SchedCandidate Cand = BotCand;

  TopCand.Reason = NoCand;

  if (tryCandidate(Cand, TopCand)) {

    Cand.setBest(TopCand);

    LLVM_DEBUG(traceCandidate(Cand));

  }


  IsTopNode = Cand.AtTop;

  tracePick(Cand);

  return Cand.SU;

}


/// Pick the next node to schedule.

SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {

  if (DAG->top() == DAG->bottom()) {

    assert(Top.Available.empty() && Top.Pending.empty() &&

           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");

    return nullptr;

  }

  SUnit *SU;

  do {

    if (RegionPolicy.OnlyBottomUp) {

      SU = Bot.pickOnlyChoice();

      if (SU) {

        tracePick(Only1, true);

      } else {

        CandPolicy NoPolicy;

        BotCand.reset(NoPolicy);

        // Set the bottom-up policy based on the state of the current bottom

        // zone and the instructions outside the zone, including the top zone.

        setPolicy(BotCand.Policy, /*IsPostRA=*/true, Bot, nullptr);

        pickNodeFromQueue(Bot, BotCand);

        assert(BotCand.Reason != NoCand && "failed to find a candidate");

        tracePick(BotCand);

        SU = BotCand.SU;

      }

      IsTopNode = false;

    } else if (RegionPolicy.OnlyTopDown) {

      SU = Top.pickOnlyChoice();

      if (SU) {

        tracePick(Only1, true);

      } else {

        CandPolicy NoPolicy;

        TopCand.reset(NoPolicy);

        // Set the top-down policy based on the state of the current top zone

        // and the instructions outside the zone, including the bottom zone.

        setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);

        pickNodeFromQueue(Top, TopCand);

        assert(TopCand.Reason != NoCand && "failed to find a candidate");

        tracePick(TopCand);

        SU = TopCand.SU;

      }

      IsTopNode = true;

    } else {

      SU = pickNodeBidirectional(IsTopNode);

    }

  } while (SU->isScheduled);


  if (SU->isTopReady())

    Top.removeReady(SU);

  if (SU->isBottomReady())

    Bot.removeReady(SU);


  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "

                    << *SU->getInstr());

  return SU;

}


/// Called after ScheduleDAGMI has scheduled an instruction and updated

/// scheduled/remaining flags in the DAG nodes.

void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {

  if (IsTopNode) {

    SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());

    Top.bumpNode(SU);

  } else {

    SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());

    Bot.bumpNode(SU);

  }

}


ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {

  ScheduleDAGMI *DAG =

      new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),

                        /*RemoveKillFlags=*/true);

  const TargetSubtargetInfo &STI = C->MF->getSubtarget();

  // Add MacroFusion mutation if fusions are not empty.

  const auto &MacroFusions = STI.getMacroFusions();

  if (!MacroFusions.empty())

    DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));

  return DAG;

}


//===----------------------------------------------------------------------===//

// ILP Scheduler. Currently for experimental analysis of heuristics.

//===----------------------------------------------------------------------===//


namespace {


/// Order nodes by the ILP metric.

struct ILPOrder {

  const SchedDFSResult *DFSResult = nullptr;

  const BitVector *ScheduledTrees = nullptr;

  bool MaximizeILP;


  ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}


  /// Apply a less-than relation on node priority.

  ///

  /// (Return true if A comes after B in the Q.)

  bool operator()(const SUnit *A, const SUnit *B) const {

    unsigned SchedTreeA = DFSResult->getSubtreeID(A);

    unsigned SchedTreeB = DFSResult->getSubtreeID(B);

    if (SchedTreeA != SchedTreeB) {

      // Unscheduled trees have lower priority.

      if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))

        return ScheduledTrees->test(SchedTreeB);


      // Trees with shallower connections have lower priority.

      if (DFSResult->getSubtreeLevel(SchedTreeA)

          != DFSResult->getSubtreeLevel(SchedTreeB)) {

        return DFSResult->getSubtreeLevel(SchedTreeA)

          < DFSResult->getSubtreeLevel(SchedTreeB);

      }

    }

    if (MaximizeILP)

      return DFSResult->getILP(A) < DFSResult->getILP(B);

    else

      return DFSResult->getILP(A) > DFSResult->getILP(B);

  }

};


/// Schedule based on the ILP metric.

class ILPScheduler : public MachineSchedStrategy {

  ScheduleDAGMILive *DAG = nullptr;

  ILPOrder Cmp;


  std::vector<SUnit*> ReadyQ;


public:

  ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}


  void initialize(ScheduleDAGMI *dag) override {

    assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");

    DAG = static_cast<ScheduleDAGMILive*>(dag);

    DAG->computeDFSResult();

    Cmp.DFSResult = DAG->getDFSResult();

    Cmp.ScheduledTrees = &DAG->getScheduledTrees();

    ReadyQ.clear();

  }


  void registerRoots() override {

    // Restore the heap in ReadyQ with the updated DFS results.

    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);

  }


  /// Implement MachineSchedStrategy interface.

  /// -----------------------------------------


  /// Callback to select the highest priority node from the ready Q.

  SUnit *pickNode(bool &IsTopNode) override {

    if (ReadyQ.empty()) return nullptr;

    std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);

    SUnit *SU = ReadyQ.back();

    ReadyQ.pop_back();

    IsTopNode = false;

    LLVM_DEBUG(dbgs() << "Pick node "

                      << "SU(" << SU->NodeNum << ") "

                      << " ILP: " << DAG->getDFSResult()->getILP(SU)

                      << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)

                      << " @"

                      << DAG->getDFSResult()->getSubtreeLevel(

                             DAG->getDFSResult()->getSubtreeID(SU))

                      << '\n'

                      << "Scheduling " << *SU->getInstr());

    return SU;

  }


  /// Scheduler callback to notify that a new subtree is scheduled.

  void scheduleTree(unsigned SubtreeID) override {

    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);

  }


  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify

  /// DFSResults, and resort the priority Q.

  void schedNode(SUnit *SU, bool IsTopNode) override {

    assert(!IsTopNode && "SchedDFSResult needs bottom-up");

  }


  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }


  void releaseBottomNode(SUnit *SU) override {

    ReadyQ.push_back(SU);

    std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);

  }

};


} // end anonymous namespace


static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {

  return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));

}

static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {

  return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));

}


static MachineSchedRegistry ILPMaxRegistry(

  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);

static MachineSchedRegistry ILPMinRegistry(

  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);


//===----------------------------------------------------------------------===//

// Machine Instruction Shuffler for Correctness Testing

//===----------------------------------------------------------------------===//


#ifndef NDEBUG

namespace {


/// Apply a less-than relation on the node order, which corresponds to the

/// instruction order prior to scheduling. IsReverse implements greater-than.

template<bool IsReverse>

struct SUnitOrder {

  bool operator()(SUnit *A, SUnit *B) const {

    if (IsReverse)

      return A->NodeNum > B->NodeNum;

    else

      return A->NodeNum < B->NodeNum;

  }

};


/// Reorder instructions as much as possible.

class InstructionShuffler : public MachineSchedStrategy {

  bool IsAlternating;

  bool IsTopDown;


  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority

  // gives nodes with a higher number higher priority causing the latest

  // instructions to be scheduled first.

  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>

    TopQ;


  // When scheduling bottom-up, use greater-than as the queue priority.

  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>

    BottomQ;


public:

  InstructionShuffler(bool alternate, bool topdown)

    : IsAlternating(alternate), IsTopDown(topdown) {}


  void initialize(ScheduleDAGMI*) override {

    TopQ.clear();

    BottomQ.clear();

  }


  /// Implement MachineSchedStrategy interface.

  /// -----------------------------------------


  SUnit *pickNode(bool &IsTopNode) override {

    SUnit *SU;

    if (IsTopDown) {

      do {

        if (TopQ.empty()) return nullptr;

        SU = TopQ.top();

        TopQ.pop();

      } while (SU->isScheduled);

      IsTopNode = true;

    } else {

      do {

        if (BottomQ.empty()) return nullptr;

        SU = BottomQ.top();

        BottomQ.pop();

      } while (SU->isScheduled);

      IsTopNode = false;

    }

    if (IsAlternating)

      IsTopDown = !IsTopDown;

    return SU;

  }


  void schedNode(SUnit *SU, bool IsTopNode) override {}


  void releaseTopNode(SUnit *SU) override {

    TopQ.push(SU);

  }

  void releaseBottomNode(SUnit *SU) override {

    BottomQ.push(SU);

  }

};


} // end anonymous namespace


static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {

  bool Alternate = !ForceTopDown && !ForceBottomUp;

  bool TopDown = !ForceBottomUp;

  assert((TopDown || !ForceTopDown) &&

         "-misched-topdown incompatible with -misched-bottomup");

  return new ScheduleDAGMILive(

      C, std::make_unique<InstructionShuffler>(Alternate, TopDown));

}


static MachineSchedRegistry ShufflerRegistry(

  "shuffle", "Shuffle machine instructions alternating directions",

  createInstructionShuffler);

#endif // !NDEBUG


//===----------------------------------------------------------------------===//

// GraphWriter support for ScheduleDAGMILive.

//===----------------------------------------------------------------------===//


#ifndef NDEBUG

namespace llvm {


template<> struct GraphTraits<

  ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};


template<>

struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {

  DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}


  static std::string getGraphName(const ScheduleDAG *G) {

    return std::string(G->MF.getName());

  }


  static bool renderGraphFromBottomUp() {

    return true;

  }


  static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {

    if (ViewMISchedCutoff == 0)

      return false;

    return (Node->Preds.size() > ViewMISchedCutoff

         || Node->Succs.size() > ViewMISchedCutoff);

  }


  /// If you want to override the dot attributes printed for a particular

  /// edge, override this method.

  static std::string getEdgeAttributes(const SUnit *Node,

                                       SUnitIterator EI,

                                       const ScheduleDAG *Graph) {

    if (EI.isArtificialDep())

      return "color=cyan,style=dashed";

    if (EI.isCtrlDep())

      return "color=blue,style=dashed";

    return "";

  }


  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {

    std::string Str;

    raw_string_ostream SS(Str);

    const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);

    const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?

      static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;

    SS << "SU:" << SU->NodeNum;

    if (DFS)

      SS << " I:" << DFS->getNumInstrs(SU);

    return SS.str();

  }


  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {

    return G->getGraphNodeLabel(SU);

  }


  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {

    std::string Str("shape=Mrecord");

    const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);

    const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?

      static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;

    if (DFS) {

      Str += ",style=filled,fillcolor=\"#";

      Str += DOT::getColorString(DFS->getSubtreeID(N));

      Str += '"';

    }

    return Str;

  }

};


} // end namespace llvm

#endif // NDEBUG


/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG

/// rendered using 'dot'.

void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {

#ifndef NDEBUG

  ViewGraph(this, Name, false, Title);

#else

  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "

         << "systems with Graphviz or gv!\n";

#endif  // NDEBUG

}


/// Out-of-line implementation with no arguments is handy for gdb.

void ScheduleDAGMI::viewGraph() {

  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());

}


/// Sort predicate for the intervals stored in an instance of

/// ResourceSegments. Intervals are always disjoint (no intersection

/// for any pairs of intervals), therefore we can sort the totality of

/// the intervals by looking only at the left boundary.

static bool sortIntervals(const ResourceSegments::IntervalTy &A,

                          const ResourceSegments::IntervalTy &B) {

  return A.first < B.first;

}


unsigned ResourceSegments::getFirstAvailableAt(

    unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle,

    std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)>

        IntervalBuilder) const {

  assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals),

                        sortIntervals) &&

         "Cannot execute on an un-sorted set of intervals.");


  // Zero resource usage is allowed by TargetSchedule.td but we do not construct

  // a ResourceSegment interval for that situation.

  if (AcquireAtCycle == ReleaseAtCycle)

    return CurrCycle;


  unsigned RetCycle = CurrCycle;

  ResourceSegments::IntervalTy NewInterval =

      IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);

  for (auto &Interval : _Intervals) {

    if (!intersects(NewInterval, Interval))

      continue;


    // Move the interval right next to the top of the one it

    // intersects.

    assert(Interval.second > NewInterval.first &&

           "Invalid intervals configuration.");

    RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;

    NewInterval = IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);

  }

  return RetCycle;

}


void ResourceSegments::add(ResourceSegments::IntervalTy A,

                           const unsigned CutOff) {

  assert(A.first <= A.second && "Cannot add negative resource usage");

  assert(CutOff > 0 && "0-size interval history has no use.");

  // Zero resource usage is allowed by TargetSchedule.td, in the case that the

  // instruction needed the resource to be available but does not use it.

  // However, ResourceSegment represents an interval that is closed on the left

  // and open on the right. It is impossible to represent an empty interval when

  // the left is closed. Do not add it to Intervals.

  if (A.first == A.second)

    return;


  assert(all_of(_Intervals,

                [&A](const ResourceSegments::IntervalTy &Interval) -> bool {

                  return !intersects(A, Interval);

                }) &&

         "A resource is being overwritten");

  _Intervals.push_back(A);


  sortAndMerge();


  // Do not keep the full history of the intervals, just the

  // latest #CutOff.

  while (_Intervals.size() > CutOff)

    _Intervals.pop_front();

}


bool ResourceSegments::intersects(ResourceSegments::IntervalTy A,

                                  ResourceSegments::IntervalTy B) {

  assert(A.first <= A.second && "Invalid interval");

  assert(B.first <= B.second && "Invalid interval");


  // Share one boundary.

  if ((A.first == B.first) || (A.second == B.second))

    return true;


  // full intersersect: [    ***     )  B

  //                        [***)       A

  if ((A.first > B.first) && (A.second < B.second))

    return true;


  // right intersect: [     ***)        B

  //                       [***      )  A

  if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second))

    return true;


  // left intersect:      [***      )  B

  //                 [     ***)        A

  if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first))

    return true;


  return false;

}


void ResourceSegments::sortAndMerge() {

  if (_Intervals.size() <= 1)

    return;


  // First sort the collection.

  _Intervals.sort(sortIntervals);


  // can use next because I have at least 2 elements in the list

  auto next = std::next(std::begin(_Intervals));

  auto E = std::end(_Intervals);

  for (; next != E; ++next) {

    if (std::prev(next)->second >= next->first) {

      next->first = std::prev(next)->first;

      _Intervals.erase(std::prev(next));

      continue;

    }

  }

}

DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition: AArch64ExpandPseudoInsts.cpp:111

MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:72

AliasAnalysis.h

print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:205

ArrayRef.h

getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:849

true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1915

BitVector.h
This file implements the BitVector class.

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

Machine
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:371

CommandLine.h

clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:693

Compiler.h

LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:529

intersects
static std::optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, const ArrayRef< InsnRange > &Ranges, const InstructionOrdering &Ordering)
Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.
Definition: DbgEntityHistoryCalculator.cpp:114

Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition: DeadArgumentElimination.cpp:354

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101

DenseMap.h
This file defines the DenseMap class.

Name
std::string Name
Definition: ELFObjHandler.cpp:77

End
bool End
Definition: ELF_riscv.cpp:480

rem
expand large div rem
Definition: ExpandLargeDivRem.cpp:178

DEBUG_TYPE
#define DEBUG_TYPE
Definition: GenericCycleImpl.h:30

GraphWriter.h

TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125

MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:113

InitializePasses.h

LaneBitmask.h
A common definition of LaneBitmask for use in TableGen and CodeGen.

LiveInterval.h

LiveIntervals.h

I
#define I(x, y, z)
Definition: MD5.cpp:58

G
#define G(x, y, z)
Definition: MD5.cpp:56

MachineBasicBlock.h

MachineDominators.h

MachineFunctionPass.h

MachineFunction.h

MachineInstr.h

MachineLoopInfo.h

MachineOperand.h

MachinePassRegistry.h

MachineRegisterInfo.h

isSchedBoundary
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
Definition: MachineScheduler.cpp:529

ILPMaxRegistry
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)

tracePick
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
Definition: MachineScheduler.cpp:3237

EnableMemOpCluster
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))

Scheduler
Machine Instruction Scheduler
Definition: MachineScheduler.cpp:274

nextIfDebug
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
Definition: MachineScheduler.cpp:371

MinSubtreeSize
static const unsigned MinSubtreeSize
Definition: MachineScheduler.cpp:197

InvalidCycle
static const unsigned InvalidCycle
Definition: MachineScheduler.cpp:2227

MISchedSortResourcesInTrace
static cl::opt< bool > MISchedSortResourcesInTrace("misched-sort-resources-in-trace", cl::Hidden, cl::init(true), cl::desc("Sort the resources printed in the dump trace"))

EnableCyclicPath
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))

postmisched
postmisched
Definition: MachineScheduler.cpp:302

priorNonDebug
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
Definition: MachineScheduler.cpp:350

MachineSchedOpt
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.

EnableMachineSched
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)

computeRemLatency
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
Definition: MachineScheduler.cpp:3003

MISchedCutoff
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))

SchedOnlyBlock
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))

EnableRegPressure
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))

GenericSchedRegistry
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConvergingSched)

HeaderColWidth
static cl::opt< unsigned > HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden, cl::desc("Set width of the columns with " "the resources and schedule units"), cl::init(19))

ForceFastCluster
static cl::opt< bool > ForceFastCluster("force-fast-cluster", cl::Hidden, cl::desc("Switch to fast cluster algorithm with the lost " "of some fusion opportunities"), cl::init(false))

FastClusterThreshold
static cl::opt< unsigned > FastClusterThreshold("fast-cluster-threshold", cl::Hidden, cl::desc("The threshold for fast cluster"), cl::init(1000))

checkResourceLimit
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
Definition: MachineScheduler.cpp:2235

createInstructionShuffler
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
Definition: MachineScheduler.cpp:4335

useDefaultMachineSched
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line.
Definition: MachineScheduler.cpp:323

sortIntervals
static bool sortIntervals(const ResourceSegments::IntervalTy &A, const ResourceSegments::IntervalTy &B)
Sort predicate for the intervals stored in an instance of ResourceSegments.
Definition: MachineScheduler.cpp:4443

ColWidth
static cl::opt< unsigned > ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden, cl::desc("Set width of the columns showing resource booking."), cl::init(5))

DefaultSchedRegistry
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)

SchedOnlyFunc
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))

scheduleTableLegend
static const char * scheduleTableLegend
Definition: MachineScheduler.cpp:996

createConvergingSched
static ScheduleDAGInstrs * createConvergingSched(MachineSchedContext *C)
Definition: MachineScheduler.cpp:3853

ViewMISchedCutoff
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph,...

ShufflerRegistry
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)

EnablePostRAMachineSched
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)

getSchedRegions
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
Definition: MachineScheduler.cpp:557

MIResourceCutOff
static cl::opt< unsigned > MIResourceCutOff("misched-resource-cutoff", cl::Hidden, cl::desc("Number of intervals to track"), cl::init(10))

createILPMaxScheduler
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
Definition: MachineScheduler.cpp:4243

ReadyListLimit
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists.

createILPMinScheduler
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
Definition: MachineScheduler.cpp:4246

MISchedDumpScheduleTrace
static cl::opt< bool > MISchedDumpScheduleTrace("misched-dump-schedule-trace", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))

ILPMinRegistry
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)

MachineScheduler.h

TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1875

MachineValueType.h

Interval
std::pair< uint64_t, uint64_t > Interval
Definition: MappedBlockStream.cpp:36

P
#define P(N)

if
if(VerifyEach)
Definition: PassBuilderBindings.cpp:71

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52

Pass.h

PriorityQueue.h
This file defines the PriorityQueue class.

RegisterClassInfo.h

RegisterPressure.h

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

isSimple
static bool isSimple(Instruction *I)
Definition: SLPVectorizer.cpp:879

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

ScheduleDAGInstrs.h

ScheduleDAGMutation.h

ScheduleDAG.h

ScheduleDFS.h

ScheduleHazardRecognizer.h

SlotIndexes.h

SmallVector.h
This file defines the SmallVector class.

Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

STATISTIC
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167

TargetFrameLowering.h

TargetInstrInfo.h

initialize
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Definition: TargetLibraryInfo.cpp:166

TargetLowering.h
This file describes how to lower LLVM code to machine code.

TargetPassConfig.h
Target-Independent Code Generator Pass Configuration Options pass.

TargetRegisterInfo.h

TargetSchedule.h

TargetSubtargetInfo.h

Groups
static const X86InstrFMA3Group Groups[]
Definition: X86InstrFMA3Info.cpp:73

RHS
Value * RHS
Definition: X86PartialReduction.cpp:76

LHS
Value * LHS
Definition: X86PartialReduction.cpp:75

LiveDebugValues::DbgValue
Class recording the (high level) value of a variable.
Definition: InstrRefBasedImpl.h:480

T

llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:960

llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:76

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75

llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98

llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41

llvm::ArrayRef::rend
reverse_iterator rend() const
Definition: ArrayRef.h:157

llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165

llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160

llvm::ArrayRef::rbegin
reverse_iterator rbegin() const
Definition: ArrayRef.h:156

llvm::BitVector
Definition: BitVector.h:82

llvm::BitVector::test
bool test(unsigned Idx) const
Definition: BitVector.h:461

llvm::BitVector::resize
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341

llvm::BitVector::clear
void clear()
clear - Removes all bits from the bitvector.
Definition: BitVector.h:335

llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:351

llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition: DWARFExpression.h:32

llvm::DenseMapBase::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151

llvm::DenseMap
Definition: DenseMap.h:742

llvm::DstOp
Definition: MachineIRBuilder.h:70

llvm::DstOp::getReg
Register getReg() const
Definition: MachineIRBuilder.h:111

llvm::GenericSchedulerBase::traceCandidate
void traceCandidate(const SchedCandidate &Cand)
Definition: MachineScheduler.cpp:3114

llvm::GenericSchedulerBase::Rem
SchedRemainder Rem
Definition: MachineScheduler.h:1170

llvm::GenericSchedulerBase::setPolicy
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
Definition: MachineScheduler.cpp:3035

llvm::GenericSchedulerBase::SchedModel
const TargetSchedModel * SchedModel
Definition: MachineScheduler.h:1167

llvm::GenericSchedulerBase::getReasonStr
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
Definition: MachineScheduler.cpp:3090

llvm::GenericSchedulerBase::Context
const MachineSchedContext * Context
Definition: MachineScheduler.h:1166

llvm::GenericSchedulerBase::CandReason
CandReason
Represent the type of SchedCandidate found within a single queue.
Definition: MachineScheduler.h:1071

llvm::GenericSchedulerBase::RegExcess
@ RegExcess
Definition: MachineScheduler.h:1072

llvm::GenericSchedulerBase::RegMax
@ RegMax
Definition: MachineScheduler.h:1073

llvm::GenericSchedulerBase::ResourceDemand
@ ResourceDemand
Definition: MachineScheduler.h:1073

llvm::GenericSchedulerBase::Only1
@ Only1
Definition: MachineScheduler.h:1072

llvm::GenericSchedulerBase::ResourceReduce
@ ResourceReduce
Definition: MachineScheduler.h:1073

llvm::GenericSchedulerBase::Cluster
@ Cluster
Definition: MachineScheduler.h:1072

llvm::GenericSchedulerBase::TopPathReduce
@ TopPathReduce
Definition: MachineScheduler.h:1074

llvm::GenericSchedulerBase::NoCand
@ NoCand
Definition: MachineScheduler.h:1072

llvm::GenericSchedulerBase::BotHeightReduce
@ BotHeightReduce
Definition: MachineScheduler.h:1073

llvm::GenericSchedulerBase::RegCritical
@ RegCritical
Definition: MachineScheduler.h:1072

llvm::GenericSchedulerBase::NodeOrder
@ NodeOrder
Definition: MachineScheduler.h:1074

llvm::GenericSchedulerBase::PhysReg
@ PhysReg
Definition: MachineScheduler.h:1072

llvm::GenericSchedulerBase::Stall
@ Stall
Definition: MachineScheduler.h:1072

llvm::GenericSchedulerBase::BotPathReduce
@ BotPathReduce
Definition: MachineScheduler.h:1073

llvm::GenericSchedulerBase::Weak
@ Weak
Definition: MachineScheduler.h:1072

llvm::GenericSchedulerBase::NextDefUse
@ NextDefUse
Definition: MachineScheduler.h:1074

llvm::GenericSchedulerBase::TopDepthReduce
@ TopDepthReduce
Definition: MachineScheduler.h:1074

llvm::GenericSchedulerBase::TRI
const TargetRegisterInfo * TRI
Definition: MachineScheduler.h:1168

llvm::GenericScheduler::checkAcyclicLatency
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
Definition: MachineScheduler.cpp:3346

llvm::GenericScheduler::tryCandidate
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
Definition: MachineScheduler.cpp:3529

llvm::GenericScheduler::dumpPolicy
void dumpPolicy() const override
Definition: MachineScheduler.cpp:3326

llvm::GenericScheduler::initialize
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
Definition: MachineScheduler.cpp:3246

llvm::GenericScheduler::initCandidate
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
Definition: MachineScheduler.cpp:3481

llvm::GenericScheduler::registerRoots
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Definition: MachineScheduler.cpp:3374

llvm::GenericScheduler::initPolicy
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
Definition: MachineScheduler.cpp:3276

llvm::GenericScheduler::reschedulePhysReg
void reschedulePhysReg(SUnit *SU, bool isTop)
Definition: MachineScheduler.cpp:3788

llvm::GenericScheduler::pickNode
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
Definition: MachineScheduler.cpp:3743

llvm::GenericScheduler::pickNodeFromQueue
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
Definition: MachineScheduler.cpp:3640

llvm::GenericScheduler::schedNode
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
Definition: MachineScheduler.cpp:3819

llvm::GenericScheduler::pickNodeBidirectional
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
Definition: MachineScheduler.cpp:3665

llvm::HexagonInstrInfo::getMemOperandsWithOffsetWidth
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
Definition: HexagonInstrInfo.cpp:3071

llvm::HexagonInstrInfo::isSchedulingBoundary
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Test if the given instruction should be considered a scheduling boundary.
Definition: HexagonInstrInfo.cpp:1792

llvm::InstrItineraryData
Itinerary data supplied by a subtarget to be used by a target.
Definition: MCInstrItineraries.h:110

llvm::Instruction
Definition: Instruction.h:49

llvm::LiveInterval
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687

llvm::LiveIntervals
Definition: LiveIntervals.h:53

llvm::LiveIntervals::getInstructionFromIndex
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
Definition: LiveIntervals.h:232

llvm::LiveIntervals::handleMove
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
Definition: LiveIntervals.cpp:1498

llvm::LiveIntervals::getInstructionIndex
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
Definition: LiveIntervals.h:227

llvm::LiveIntervals::getMBBEndIdx
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
Definition: LiveIntervals.h:242

llvm::LiveIntervals::getInterval
LiveInterval & getInterval(Register Reg)
Definition: LiveIntervals.h:112

llvm::LiveQueryResult
Result of a LiveRange query.
Definition: LiveInterval.h:90

llvm::LiveQueryResult::valueIn
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105

llvm::LiveRange::Query
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:542

llvm::LiveRange::end
iterator end()
Definition: LiveInterval.h:216

llvm::LiveRange::getVNInfoBefore
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx,...
Definition: LiveInterval.h:429

llvm::LiveRange::begin
iterator begin()
Definition: LiveInterval.h:215

llvm::LiveRange::beginIndex
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:385

llvm::LiveRange::endIndex
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:392

llvm::LiveRange::isLocal
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries,...
Definition: LiveInterval.h:518

llvm::LiveRange::find
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
Definition: LiveInterval.cpp:350

llvm::LocationSize
Definition: MemoryLocation.h:69

llvm::MVT::SimpleValueType
SimpleValueType
Definition: MachineValueType.h:36

llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:102

llvm::MachineBasicBlock::getNumber
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
Definition: MachineBasicBlock.h:1179

llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:329

llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:331

llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:285

llvm::MachineBasicBlock::size
unsigned size() const
Definition: MachineBasicBlock.h:299

llvm::MachineBasicBlock::isSuccessor
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
Definition: MachineBasicBlock.cpp:952

llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:1071

llvm::MachineBasicBlock::getName
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
Definition: MachineBasicBlock.cpp:318

llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:51

llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:168

llvm::MachineFunction
Definition: MachineFunction.h:259

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:718

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:684

llvm::MachineFunction::print
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream.
Definition: MachineFunction.cpp:613

llvm::MachineInstrBundleIterator< const MachineInstr >

llvm::MachineInstrBundleIterator::getNonConstIterator
nonconst_iterator getNonConstIterator() const
Definition: MachineInstrBundleIterator.h:276

llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:69

llvm::MachineInstr::isCopy
bool isCopy() const
Definition: MachineInstr.h:1415

llvm::MachineInstr::mayLoad
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:1125

llvm::MachineInstr::mayStore
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:1138

llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:104

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48

llvm::MachinePassRegistry
MachinePassRegistry - Track the registration of machine passes.
Definition: MachinePassRegistry.h:73

llvm::MachineRegisterInfo::getNumVirtRegs
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
Definition: MachineRegisterInfo.h:794

llvm::MachineSchedRegistry
MachineSchedRegistry provides a selection of available machine instruction schedulers.
Definition: MachineScheduler.h:148

llvm::MachineSchedRegistry::Registry
static MachinePassRegistry< ScheduleDAGCtor > Registry
Definition: MachineScheduler.h:155

llvm::MachineSchedRegistry::ScheduleDAGCtor
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
Definition: MachineScheduler.h:150

llvm::MachineSchedStrategy
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
Definition: MachineScheduler.h:211

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65

llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24

llvm::PostGenericScheduler::initPolicy
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Optionally override the per-region scheduling policy.
Definition: MachineScheduler.cpp:3885

llvm::PostGenericScheduler::tryCandidate
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
Definition: MachineScheduler.cpp:3919

llvm::PostGenericScheduler::schedNode
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
Definition: MachineScheduler.cpp:4115

llvm::PostGenericScheduler::pickNodeFromQueue
void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand)
Definition: MachineScheduler.cpp:3961

llvm::PostGenericScheduler::initialize
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
Definition: MachineScheduler.cpp:3865

llvm::PostGenericScheduler::pickNodeBidirectional
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
Definition: MachineScheduler.cpp:3977

llvm::PostGenericScheduler::registerRoots
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Definition: MachineScheduler.cpp:3900

llvm::PostGenericScheduler::pickNode
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
Definition: MachineScheduler.cpp:4058

llvm::PressureChange
Capture a change in pressure for a single pressure set.
Definition: RegisterPressure.h:102

llvm::PressureChange::getUnitInc
int getUnitInc() const
Definition: RegisterPressure.h:124

llvm::PressureChange::getPSetOrMax
unsigned getPSetOrMax() const
Definition: RegisterPressure.h:120

llvm::PressureChange::isValid
bool isValid() const
Definition: RegisterPressure.h:112

llvm::PressureChange::getPSet
unsigned getPSet() const
Definition: RegisterPressure.h:114

llvm::PressureDiff
List of PressureChanges in order of increasing, unique PSetID.
Definition: RegisterPressure.h:140

llvm::PressureDiff::dump
void dump(const TargetRegisterInfo &TRI) const
Definition: RegisterPressure.cpp:126

llvm::PressureDiff::addPressureChange
void addPressureChange(Register RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
Definition: RegisterPressure.cpp:663

llvm::PressureDiffs::clear
void clear()
Definition: RegisterPressure.h:208

llvm::PriorityQueue
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28

llvm::PriorityQueue::clear
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:76

llvm::ReadyQueue
Helpers for implementing custom MachineSchedStrategy classes.
Definition: MachineScheduler.h:540

llvm::ReadyQueue::push
void push(SUnit *SU)
Definition: MachineScheduler.h:571

llvm::ReadyQueue::end
iterator end()
Definition: MachineScheduler.h:565

llvm::ReadyQueue::find
iterator find(SUnit *SU)
Definition: MachineScheduler.h:569

llvm::ReadyQueue::begin
iterator begin()
Definition: MachineScheduler.h:563

llvm::ReadyQueue::elements
ArrayRef< SUnit * > elements()
Definition: MachineScheduler.h:567

llvm::ReadyQueue::dump
void dump() const
Definition: MachineScheduler.cpp:685

llvm::ReadyQueue::clear
void clear()
Definition: MachineScheduler.h:557

llvm::ReadyQueue::isInQueue
bool isInQueue(SUnit *SU) const
Definition: MachineScheduler.h:553

llvm::ReadyQueue::iterator
std::vector< SUnit * >::iterator iterator
Definition: MachineScheduler.h:561

llvm::ReadyQueue::empty
bool empty() const
Definition: MachineScheduler.h:555

llvm::ReadyQueue::getName
StringRef getName() const
Definition: MachineScheduler.h:550

llvm::ReadyQueue::size
unsigned size() const
Definition: MachineScheduler.h:559

llvm::ReadyQueue::remove
iterator remove(iterator I)
Definition: MachineScheduler.h:576

llvm::RegPressureTracker
Track the current register pressure at some position in the instruction stream, and remember the high...
Definition: RegisterPressure.h:359

llvm::RegPressureTracker::closeRegion
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
Definition: RegisterPressure.cpp:343

llvm::RegPressureTracker::recede
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
Definition: RegisterPressure.cpp:865

llvm::RegPressureTracker::setPos
void setPos(MachineBasicBlock::const_iterator Pos)
Definition: RegisterPressure.h:421

llvm::RegPressureTracker::getLiveThru
ArrayRef< unsigned > getLiveThru() const
Definition: RegisterPressure.h:456

llvm::RegPressureTracker::closeBottom
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
Definition: RegisterPressure.cpp:331

llvm::RegPressureTracker::getPressure
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
Definition: RegisterPressure.h:460

llvm::RegPressureTracker::recedeSkipDebugValues
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
Definition: RegisterPressure.cpp:844

llvm::RegPressureTracker::getMaxUpwardPressureDelta
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
Definition: RegisterPressure.cpp:1095

llvm::RegPressureTracker::initLiveThru
void initLiveThru(const RegPressureTracker &RPTracker)
Initialize the LiveThru pressure set based on the untied defs found in RPTracker.
Definition: RegisterPressure.cpp:359

llvm::RegPressureTracker::dump
void dump() const
Definition: RegisterPressure.cpp:117

llvm::RegPressureTracker::init
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
Definition: RegisterPressure.cpp:263

llvm::RegPressureTracker::getPos
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
Definition: RegisterPressure.h:415

llvm::RegPressureTracker::closeTop
void closeTop()
Set the boundary for the top of the region and summarize live ins.
Definition: RegisterPressure.cpp:319

llvm::RegPressureTracker::getMaxDownwardPressureDelta
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
Definition: RegisterPressure.cpp:1344

llvm::RegPressureTracker::advance
void advance()
Advance across the current instruction.
Definition: RegisterPressure.cpp:939

llvm::RegPressureTracker::reset
void reset()
Definition: RegisterPressure.cpp:243

llvm::RegPressureTracker::getRegSetPressureAtPos
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
Definition: RegisterPressure.h:465

llvm::RegPressureTracker::addLiveRegs
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
Definition: RegisterPressure.cpp:698

llvm::RegPressureTracker::getUpwardPressureDelta
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
Definition: RegisterPressure.cpp:1163

llvm::RegisterClassInfo
Definition: RegisterClassInfo.h:29

llvm::RegisterClassInfo::getNumAllocatableRegs
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
Definition: RegisterClassInfo.h:94

llvm::RegisterClassInfo::getRegPressureSetLimit
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
Definition: RegisterClassInfo.h:146

llvm::RegisterOperands
List of registers defined and used by a machine instruction.
Definition: RegisterPressure.h:166

llvm::RegisterOperands::collect
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Definition: RegisterPressure.cpp:567

llvm::RegisterOperands::adjustLaneLiveness
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
Definition: RegisterPressure.cpp:598

llvm::RegisterOperands::detectDeadDefs
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
Definition: RegisterPressure.cpp:578

llvm::RegisterPassParser
RegisterPassParser class - Handle the addition of new machine passes.
Definition: MachinePassRegistry.h:138

llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19

llvm::Register::isVirtual
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91

llvm::Register::isPhysicalRegister
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65

llvm::ResourceSegments::add
void add(IntervalTy A, const unsigned CutOff=10)
Adds an interval [a, b) to the collection of the instance.
Definition: MachineScheduler.cpp:4478

llvm::ResourceSegments::getResourceIntervalBottom
static IntervalTy getResourceIntervalBottom(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
These function return the interval used by a resource in bottom and top scheduling.
Definition: MachineScheduler.h:717

llvm::ResourceSegments::intersects
static bool intersects(IntervalTy A, IntervalTy B)
Checks whether intervals intersect.
Definition: MachineScheduler.cpp:4505

llvm::ResourceSegments::IntervalTy
std::pair< int64_t, int64_t > IntervalTy
Represents an interval of discrete integer values closed on the left and open on the right: [a,...
Definition: MachineScheduler.h:631

llvm::ResourceSegments::getResourceIntervalTop
static IntervalTy getResourceIntervalTop(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
Definition: MachineScheduler.h:722

llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49

llvm::SDep::getSUnit
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480

llvm::SDep::getKind
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486

llvm::SDep::Anti
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54

llvm::SDep::Data
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53

llvm::SDep::isWeak
bool isWeak() const
Tests if this a weak dependence.
Definition: ScheduleDAG.h:194

llvm::SDep::Cluster
@ Cluster
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:74

llvm::SDep::Artificial
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72

llvm::SDep::Weak
@ Weak
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:73

llvm::SDep::getLatency
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142

llvm::SDep::isArtificial
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200

llvm::SDep::isCtrl
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161

llvm::SDep::getReg
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218

llvm::SDep::isCluster
bool isCluster() const
Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...
Definition: ScheduleDAG.h:206

llvm::SUnitIterator
Definition: ScheduleDAG.h:625

llvm::SUnitIterator::isArtificialDep
bool isArtificialDep() const
Definition: ScheduleDAG.h:668

llvm::SUnitIterator::isCtrlDep
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:665

llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242

llvm::SUnit::isCall
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275

llvm::SUnit::TopReadyCycle
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:299

llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264

llvm::SUnit::NumSuccsLeft
unsigned NumSuccsLeft
Definition: ScheduleDAG.h:269

llvm::SUnit::biasCriticalPath
void biasCriticalPath()
Orders this node's predecessor edges such that the critical path edge occurs first.
Definition: ScheduleDAG.cpp:326

llvm::SUnit::isUnbuffered
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:288

llvm::SUnit::getHeight
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406

llvm::SUnit::Latency
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273

llvm::SUnit::getDepth
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398

llvm::SUnit::isScheduled
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284

llvm::SUnit::NumPredsLeft
unsigned NumPredsLeft
Definition: ScheduleDAG.h:268

llvm::SUnit::hasPhysRegDefs
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280

llvm::SUnit::BotReadyCycle
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:300

llvm::SUnit::Succs
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257

llvm::SUnit::hasReservedResource
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:289

llvm::SUnit::WeakPredsLeft
unsigned WeakPredsLeft
Definition: ScheduleDAG.h:270

llvm::SUnit::isBottomReady
bool isBottomReady() const
Definition: ScheduleDAG.h:449

llvm::SUnit::hasPhysRegUses
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:279

llvm::SUnit::isTopReady
bool isTopReady() const
Definition: ScheduleDAG.h:446

llvm::SUnit::Preds
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256

llvm::SUnit::WeakSuccsLeft
unsigned WeakSuccsLeft
Definition: ScheduleDAG.h:271

llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373

llvm::SchedBoundary
Each Scheduling boundary is associated with ready queues.
Definition: MachineScheduler.h:832

llvm::SchedBoundary::getNextResourceCycleByInstance
unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource unit can be scheduled.
Definition: MachineScheduler.cpp:2348

llvm::SchedBoundary::releasePending
void releasePending()
Release pending ready nodes in to the available queue.
Definition: MachineScheduler.cpp:2841

llvm::SchedBoundary::getDependentLatency
unsigned getDependentLatency() const
Definition: MachineScheduler.h:967

llvm::SchedBoundary::getScheduledLatency
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
Definition: MachineScheduler.h:972

llvm::SchedBoundary::incExecutedResources
void incExecutedResources(unsigned PIdx, unsigned Count)
Definition: MachineScheduler.cpp:2623

llvm::SchedBoundary::isResourceLimited
bool isResourceLimited() const
Definition: MachineScheduler.h:1003

llvm::SchedBoundary::SchedModel
const TargetSchedModel * SchedModel
Definition: MachineScheduler.h:842

llvm::SchedBoundary::getExecutedCount
unsigned getExecutedCount() const
Get a scaled count for the minimum execution time of the scheduled micro-ops that are ready to execut...
Definition: MachineScheduler.h:995

llvm::SchedBoundary::getLatencyStallCycles
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
Definition: MachineScheduler.cpp:2336

llvm::SchedBoundary::findMaxLatency
unsigned findMaxLatency(ArrayRef< SUnit * > ReadySUs)
Definition: MachineScheduler.cpp:2504

llvm::SchedBoundary::DAG
ScheduleDAGMI * DAG
Definition: MachineScheduler.h:841

llvm::SchedBoundary::dumpReservedCycles
void dumpReservedCycles() const
Dump the state of the information that tracks resource usage.
Definition: MachineScheduler.cpp:2915

llvm::SchedBoundary::isTop
bool isTop() const
Definition: MachineScheduler.h:956

llvm::SchedBoundary::getOtherResourceCount
unsigned getOtherResourceCount(unsigned &OtherCritIdx)
Definition: MachineScheduler.cpp:2525

llvm::SchedBoundary::Rem
SchedRemainder * Rem
Definition: MachineScheduler.h:843

llvm::SchedBoundary::bumpNode
void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
Definition: MachineScheduler.cpp:2676

llvm::SchedBoundary::getCriticalCount
unsigned getCriticalCount() const
Get the scaled count of scheduled micro-ops and resources, including executed resources.
Definition: MachineScheduler.h:986

llvm::SchedBoundary::pickOnlyChoice
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
Definition: MachineScheduler.cpp:2880

llvm::SchedBoundary::releaseNode
void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, unsigned Idx=0)
Release SU to make it ready.
Definition: MachineScheduler.cpp:2551

llvm::SchedBoundary::countResource
unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, unsigned Cycles, unsigned ReadyCycle, unsigned StartAtCycle)
Add the given processor resource to this scheduled zone.
Definition: MachineScheduler.cpp:2639

llvm::SchedBoundary::~SchedBoundary
~SchedBoundary()
Definition: MachineScheduler.cpp:2229

llvm::SchedBoundary::HazardRec
ScheduleHazardRecognizer * HazardRec
Definition: MachineScheduler.h:848

llvm::SchedBoundary::isUnbufferedGroup
bool isUnbufferedGroup(unsigned PIdx) const
Definition: MachineScheduler.h:1018

llvm::SchedBoundary::Available
ReadyQueue Available
Definition: MachineScheduler.h:845

llvm::SchedBoundary::init
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
Definition: MachineScheduler.cpp:2302

llvm::SchedBoundary::getResourceCount
unsigned getResourceCount(unsigned ResIdx) const
Definition: MachineScheduler.h:980

llvm::SchedBoundary::bumpCycle
void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
Definition: MachineScheduler.cpp:2585

llvm::SchedBoundary::getCurrMOps
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
Definition: MachineScheduler.h:964

llvm::SchedBoundary::getCurrCycle
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
Definition: MachineScheduler.h:961

llvm::SchedBoundary::Pending
ReadyQueue Pending
Definition: MachineScheduler.h:846

llvm::SchedBoundary::reset
void reset()
Definition: MachineScheduler.cpp:2244

llvm::SchedBoundary::checkHazard
bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
Definition: MachineScheduler.cpp:2455

llvm::SchedBoundary::getNextResourceCycle
std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource can be scheduled.
Definition: MachineScheduler.cpp:2374

llvm::SchedBoundary::dumpScheduledState
void dumpScheduledState() const
Definition: MachineScheduler.cpp:2941

llvm::SchedBoundary::removeReady
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
Definition: MachineScheduler.cpp:2868

llvm::SchedBoundary::getZoneCritResIdx
unsigned getZoneCritResIdx() const
Definition: MachineScheduler.h:1000

llvm::SchedBoundary::getUnscheduledLatency
unsigned getUnscheduledLatency(SUnit *SU) const
Definition: MachineScheduler.h:976

llvm::SchedDFSResult
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:65

llvm::SchedDFSResult::getNumInstrs
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
Definition: ScheduleDFS.h:145

llvm::SchedDFSResult::getSubtreeID
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:169

llvm::SchedDFSResult::clear
void clear()
Clear the results.
Definition: ScheduleDFS.h:128

llvm::SchedDFSResult::getILP
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
Definition: ScheduleDFS.h:158

llvm::SchedDFSResult::compute
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
Definition: ScheduleDAGInstrs.cpp:1469

llvm::SchedDFSResult::getNumSubtrees
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
Definition: ScheduleDFS.h:163

llvm::SchedDFSResult::getSubtreeLevel
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:180

llvm::SchedDFSResult::resize
void resize(unsigned NumSUnits)
Initialize the result data with the size of the DAG.
Definition: ScheduleDFS.h:136

llvm::SchedDFSResult::scheduleTree
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
Definition: ScheduleDAGInstrs.cpp:1515

llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition: ScheduleDAGInstrs.h:121

llvm::ScheduleDAGInstrs::finishBlock
virtual void finishBlock()
Cleans up after scheduling in the given block.
Definition: ScheduleDAGInstrs.cpp:184

llvm::ScheduleDAGInstrs::end
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
Definition: ScheduleDAGInstrs.h:294

llvm::ScheduleDAGInstrs::BB
MachineBasicBlock * BB
The block in which to insert instructions.
Definition: ScheduleDAGInstrs.h:147

llvm::ScheduleDAGInstrs::FirstDbgValue
MachineInstr * FirstDbgValue
Definition: ScheduleDAGInstrs.h:263

llvm::ScheduleDAGInstrs::startBlock
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
Definition: ScheduleDAGInstrs.cpp:180

llvm::ScheduleDAGInstrs::getSchedModel
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
Definition: ScheduleDAGInstrs.h:276

llvm::ScheduleDAGInstrs::RegionEnd
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
Definition: ScheduleDAGInstrs.h:153

llvm::ScheduleDAGInstrs::getSchedClass
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
Definition: ScheduleDAGInstrs.h:279

llvm::ScheduleDAGInstrs::DbgValues
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
Definition: ScheduleDAGInstrs.h:262

llvm::ScheduleDAGInstrs::addEdge
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
Definition: ScheduleDAGInstrs.cpp:1222

llvm::ScheduleDAGInstrs::DumpDirection
DumpDirection
The direction that should be used to dump the scheduled Sequence.
Definition: ScheduleDAGInstrs.h:195

llvm::ScheduleDAGInstrs::BottomUp
@ BottomUp
Definition: ScheduleDAGInstrs.h:197

llvm::ScheduleDAGInstrs::TopDown
@ TopDown
Definition: ScheduleDAGInstrs.h:196

llvm::ScheduleDAGInstrs::Bidirectional
@ Bidirectional
Definition: ScheduleDAGInstrs.h:198

llvm::ScheduleDAGInstrs::TrackLaneMasks
bool TrackLaneMasks
Whether lane masks should get tracked.
Definition: ScheduleDAGInstrs.h:141

llvm::ScheduleDAGInstrs::dumpNode
void dumpNode(const SUnit &SU) const override
Definition: ScheduleDAGInstrs.cpp:1178

llvm::ScheduleDAGInstrs::IsReachable
bool IsReachable(SUnit *SU, SUnit *TargetSU)
IsReachable - Checks if SU is reachable from TargetSU.
Definition: ScheduleDAGInstrs.h:286

llvm::ScheduleDAGInstrs::DumpDir
DumpDirection DumpDir
Definition: ScheduleDAGInstrs.h:205

llvm::ScheduleDAGInstrs::begin
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
Definition: ScheduleDAGInstrs.h:291

llvm::ScheduleDAGInstrs::buildSchedGraph
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
Definition: ScheduleDAGInstrs.cpp:739

llvm::ScheduleDAGInstrs::getSUnit
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
Definition: ScheduleDAGInstrs.h:404

llvm::ScheduleDAGInstrs::SchedModel
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
Definition: ScheduleDAGInstrs.h:127

llvm::ScheduleDAGInstrs::canAddEdge
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
Definition: ScheduleDAGInstrs.cpp:1218

llvm::ScheduleDAGInstrs::RegionBegin
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
Definition: ScheduleDAGInstrs.h:150

llvm::ScheduleDAGInstrs::enterRegion
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
Definition: ScheduleDAGInstrs.cpp:189

llvm::ScheduleDAGInstrs::dump
void dump() const override
Definition: ScheduleDAGInstrs.cpp:1189

llvm::ScheduleDAGMILive
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
Definition: MachineScheduler.h:398

llvm::ScheduleDAGMILive::scheduleMI
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
Definition: MachineScheduler.cpp:1651

llvm::ScheduleDAGMILive::schedule
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
Definition: MachineScheduler.cpp:1461

llvm::ScheduleDAGMILive::VRegUses
VReg2SUnitMultiMap VRegUses
Maps vregs to the SUnits of their uses in the current scheduling region.
Definition: MachineScheduler.h:410

llvm::ScheduleDAGMILive::computeDFSResult
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
Definition: MachineScheduler.cpp:1544

llvm::ScheduleDAGMILive::getPressureDiff
PressureDiff & getPressureDiff(const SUnit *SU)
Definition: MachineScheduler.h:466

llvm::ScheduleDAGMILive::DFSResult
SchedDFSResult * DFSResult
Information about DAG subtrees.
Definition: MachineScheduler.h:404

llvm::ScheduleDAGMILive::enterRegion
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
Definition: MachineScheduler.cpp:1235

llvm::ScheduleDAGMILive::initQueues
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
Definition: MachineScheduler.cpp:1641

llvm::ScheduleDAGMILive::updatePressureDiffs
void updatePressureDiffs(ArrayRef< RegisterMaskPair > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
Definition: MachineScheduler.cpp:1360

llvm::ScheduleDAGMILive::ShouldTrackLaneMasks
bool ShouldTrackLaneMasks
Definition: MachineScheduler.h:419

llvm::ScheduleDAGMILive::BotRPTracker
RegPressureTracker BotRPTracker
Definition: MachineScheduler.h:434

llvm::ScheduleDAGMILive::buildDAGWithRegPressure
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
Definition: MachineScheduler.cpp:1521

llvm::ScheduleDAGMILive::RegionCriticalPSets
std::vector< PressureChange > RegionCriticalPSets
List of pressure sets that exceed the target's pressure limit before scheduling, listed in increasing...
Definition: MachineScheduler.h:426

llvm::ScheduleDAGMILive::updateScheduledPressure
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
Definition: MachineScheduler.cpp:1332

llvm::ScheduleDAGMILive::SUPressureDiffs
PressureDiffs SUPressureDiffs
Definition: MachineScheduler.h:415

llvm::ScheduleDAGMILive::computeCyclicCriticalPath
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
Definition: MachineScheduler.cpp:1580

llvm::ScheduleDAGMILive::initRegPressure
void initRegPressure()
Definition: MachineScheduler.cpp:1257

llvm::ScheduleDAGMILive::collectVRegUses
void collectVRegUses(SUnit &SU)
Definition: MachineScheduler.cpp:1193

llvm::ScheduleDAGMILive::RegClassInfo
RegisterClassInfo * RegClassInfo
Definition: MachineScheduler.h:400

llvm::ScheduleDAGMILive::getDFSResult
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
Definition: MachineScheduler.h:478

llvm::ScheduleDAGMILive::RPTracker
RegPressureTracker RPTracker
Definition: MachineScheduler.h:421

llvm::ScheduleDAGMILive::ShouldTrackPressure
bool ShouldTrackPressure
Register pressure in this region computed by initRegPressure.
Definition: MachineScheduler.h:418

llvm::ScheduleDAGMILive::~ScheduleDAGMILive
~ScheduleDAGMILive() override
Definition: MachineScheduler.cpp:1189

llvm::ScheduleDAGMILive::dump
void dump() const override
Definition: MachineScheduler.cpp:1428

llvm::ScheduleDAGMILive::getScheduledTrees
BitVector & getScheduledTrees()
Definition: MachineScheduler.h:480

llvm::ScheduleDAGMILive::LiveRegionEnd
MachineBasicBlock::iterator LiveRegionEnd
Definition: MachineScheduler.h:407

llvm::ScheduleDAGMILive::ScheduledTrees
BitVector ScheduledTrees
Definition: MachineScheduler.h:405

llvm::ScheduleDAGMILive::TopRPTracker
RegPressureTracker TopRPTracker
Definition: MachineScheduler.h:430

llvm::ScheduleDAGMI
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
Definition: MachineScheduler.h:276

llvm::ScheduleDAGMI::dumpSchedule
void dumpSchedule() const
dump the scheduled Sequence.
Definition: MachineScheduler.cpp:1162

llvm::ScheduleDAGMI::SchedImpl
std::unique_ptr< MachineSchedStrategy > SchedImpl
Definition: MachineScheduler.h:280

llvm::ScheduleDAGMI::startBlock
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
Definition: MachineScheduler.cpp:776

llvm::ScheduleDAGMI::releasePred
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
Definition: MachineScheduler.cpp:743

llvm::ScheduleDAGMI::initQueues
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
Definition: MachineScheduler.cpp:934

llvm::ScheduleDAGMI::addMutation
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
Definition: MachineScheduler.h:328

llvm::ScheduleDAGMI::moveInstruction
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
Definition: MachineScheduler.cpp:802

llvm::ScheduleDAGMI::releasePredecessors
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU's predecessors.
Definition: MachineScheduler.cpp:771

llvm::ScheduleDAGMI::postProcessDAG
void postProcessDAG()
Apply each ScheduleDAGMutation step in order.
Definition: MachineScheduler.cpp:909

llvm::ScheduleDAGMI::NextClusterSucc
const SUnit * NextClusterSucc
Definition: MachineScheduler.h:293

llvm::ScheduleDAGMI::dumpScheduleTraceTopDown
void dumpScheduleTraceTopDown() const
Print execution trace of the schedule top-down or bottom-up.
Definition: MachineScheduler.cpp:998

llvm::ScheduleDAGMI::NextClusterPred
const SUnit * NextClusterPred
Record the next node in a scheduled cluster.
Definition: MachineScheduler.h:292

llvm::ScheduleDAGMI::checkSchedLimit
bool checkSchedLimit()
Definition: MachineScheduler.cpp:820

llvm::ScheduleDAGMI::top
MachineBasicBlock::iterator top() const
Definition: MachineScheduler.h:333

llvm::ScheduleDAGMI::schedule
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
Definition: MachineScheduler.cpp:835

llvm::ScheduleDAGMI::findRootsAndBiasEdges
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
Definition: MachineScheduler.cpp:915

llvm::ScheduleDAGMI::bottom
MachineBasicBlock::iterator bottom() const
Definition: MachineScheduler.h:334

llvm::ScheduleDAGMI::CurrentBottom
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
Definition: MachineScheduler.h:289

llvm::ScheduleDAGMI::hasVRegLiveness
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
Definition: MachineScheduler.h:321

llvm::ScheduleDAGMI::enterRegion
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
Definition: MachineScheduler.cpp:790

llvm::ScheduleDAGMI::AA
AAResults * AA
Definition: MachineScheduler.h:278

llvm::ScheduleDAGMI::getLIS
LiveIntervals * getLIS() const
Definition: MachineScheduler.h:318

llvm::ScheduleDAGMI::viewGraph
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
Definition: MachineScheduler.cpp:4435

llvm::ScheduleDAGMI::releaseSucc
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
Definition: MachineScheduler.cpp:706

llvm::ScheduleDAGMI::dumpScheduleTraceBottomUp
void dumpScheduleTraceBottomUp() const
Definition: MachineScheduler.cpp:1079

llvm::ScheduleDAGMI::~ScheduleDAGMI
~ScheduleDAGMI() override

llvm::ScheduleDAGMI::finishBlock
void finishBlock() override
Cleans up after scheduling in the given block.
Definition: MachineScheduler.cpp:781

llvm::ScheduleDAGMI::LIS
LiveIntervals * LIS
Definition: MachineScheduler.h:279

llvm::ScheduleDAGMI::getNextClusterPred
const SUnit * getNextClusterPred() const
Definition: MachineScheduler.h:355

llvm::ScheduleDAGMI::updateQueues
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
Definition: MachineScheduler.cpp:964

llvm::ScheduleDAGMI::placeDebugValues
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
Definition: MachineScheduler.cpp:975

llvm::ScheduleDAGMI::CurrentTop
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
Definition: MachineScheduler.h:286

llvm::ScheduleDAGMI::releaseSuccessors
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU's successors.
Definition: MachineScheduler.cpp:734

llvm::ScheduleDAGMI::getNextClusterSucc
const SUnit * getNextClusterSucc() const
Definition: MachineScheduler.h:357

llvm::ScheduleDAGMI::Mutations
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
Definition: MachineScheduler.h:283

llvm::ScheduleDAGMutation
Mutate the DAG as a postpass after normal DAG building.
Definition: ScheduleDAGMutation.h:22

llvm::ScheduleDAG
Definition: ScheduleDAG.h:554

llvm::ScheduleDAG::MRI
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:560

llvm::ScheduleDAG::TII
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:557

llvm::ScheduleDAG::SUnits
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:561

llvm::ScheduleDAG::TRI
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:558

llvm::ScheduleDAG::EntrySU
SUnit EntrySU
Special node for the region entry.
Definition: ScheduleDAG.h:562

llvm::ScheduleDAG::MF
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:559

llvm::ScheduleDAG::dumpNodeAll
void dumpNodeAll(const SUnit &SU) const
Definition: ScheduleDAG.cpp:364

llvm::ScheduleDAG::ExitSU
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:563

llvm::ScheduleHazardRecognizer::RecedeCycle
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
Definition: ScheduleHazardRecognizer.h:109

llvm::ScheduleHazardRecognizer::Reset
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
Definition: ScheduleHazardRecognizer.h:67

llvm::ScheduleHazardRecognizer::EmitInstruction
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
Definition: ScheduleHazardRecognizer.h:71

llvm::ScheduleHazardRecognizer::NoHazard
@ NoHazard
Definition: ScheduleHazardRecognizer.h:38

llvm::ScheduleHazardRecognizer::AdvanceCycle
virtual void AdvanceCycle()
AdvanceCycle - This callback is invoked whenever the next top-down instruction to be scheduled cannot...
Definition: ScheduleHazardRecognizer.h:104

llvm::ScheduleHazardRecognizer::getHazardType
virtual HazardType getHazardType(SUnit *, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
Definition: ScheduleHazardRecognizer.h:60

llvm::ScheduleHazardRecognizer::isEnabled
bool isEnabled() const
Definition: ScheduleHazardRecognizer.h:45

llvm::SlotIndex
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:68

llvm::SlotIndex::isSameInstr
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:179

llvm::SlotIndex::getRegSlot
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:240

llvm::SlotIndexes
SlotIndexes pass.
Definition: SlotIndexes.h:300

llvm::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:91

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586

llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:623

llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:651

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:426

llvm::SmallVectorTemplateCommon::end
iterator end()
Definition: SmallVector.h:282

llvm::SmallVectorTemplateCommon::const_reverse_iterator
std::reverse_iterator< const_iterator > const_reverse_iterator
Definition: SmallVector.h:267

llvm::SmallVectorTemplateCommon::begin
iterator begin()
Definition: SmallVector.h:280

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209

llvm::SparseMultiSet::find
iterator find(const KeyT &Key)
Find an element by its key.
Definition: SparseMultiSet.h:375

llvm::SparseMultiSet::clear
void clear()
Clears the set.
Definition: SparseMultiSet.h:342

llvm::SparseMultiSet::end
iterator end()
Returns an iterator past this container.
Definition: SparseMultiSet.h:319

llvm::SparseMultiSet::insert
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
Definition: SparseMultiSet.h:419

llvm::SparseMultiSet< VReg2SUnit, VirtReg2IndexFunctor >::iterator
iterator_base< SparseMultiSet * > iterator
Definition: SparseMultiSet.h:311

llvm::SparseMultiSet::setUniverse
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
Definition: SparseMultiSet.h:202

llvm::SrcOp
Definition: MachineIRBuilder.h:131

llvm::SrcOp::getReg
Register getReg() const
Definition: MachineIRBuilder.h:183

llvm::TargetFrameLowering
Information about stack frame layout on the target.
Definition: TargetFrameLowering.h:44

llvm::TargetFrameLowering::StackGrowsDown
@ StackGrowsDown
Definition: TargetFrameLowering.h:48

llvm::TargetFrameLowering::getStackGrowthDirection
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
Definition: TargetFrameLowering.h:93

llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:111

llvm::TargetInstrInfo::CreateTargetMIHazardRecognizer
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
Definition: TargetInstrInfo.cpp:1411

llvm::TargetLoweringBase::getRegClassFor
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
Definition: TargetLowering.h:1022

llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition: TargetLowering.h:1073

llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3765

llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:85

llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:238

llvm::TargetRegisterInfo::getRegPressureSetName
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.

llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:30

llvm::TargetSchedModel::getResourceName
const char * getResourceName(unsigned PIdx) const
Definition: TargetSchedule.h:122

llvm::TargetSchedModel::mustEndGroup
bool mustEndGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if current group must end.
Definition: TargetSchedule.cpp:84

llvm::TargetSchedModel::getIssueWidth
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
Definition: TargetSchedule.h:98

llvm::TargetSchedModel::getMicroOpFactor
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
Definition: TargetSchedule.h:149

llvm::TargetSchedModel::getWriteProcResEnd
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
Definition: TargetSchedule.h:137

llvm::TargetSchedModel::hasInstrSchedModel
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
Definition: TargetSchedule.cpp:43

llvm::TargetSchedModel::mustBeginGroup
bool mustBeginGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if new group must begin.
Definition: TargetSchedule.cpp:73

llvm::TargetSchedModel::getLatencyFactor
unsigned getLatencyFactor() const
Multiply cycle count by this factor to normalize it relative to other resources.
Definition: TargetSchedule.h:155

llvm::TargetSchedModel::getResourceFactor
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
Definition: TargetSchedule.h:143

llvm::TargetSchedModel::getMicroOpBufferSize
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
Definition: TargetSchedule.h:160

llvm::TargetSchedModel::getNumMicroOps
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
Definition: TargetSchedule.cpp:95

llvm::TargetSchedModel::getProcResource
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
Definition: TargetSchedule.h:117

llvm::TargetSchedModel::getNumProcResourceKinds
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
Definition: TargetSchedule.h:112

llvm::TargetSchedModel::getInstrItineraries
const InstrItineraryData * getInstrItineraries() const
Definition: TargetSchedule.h:82

llvm::TargetSchedModel::enableIntervals
bool enableIntervals() const
Definition: TargetSchedule.cpp:344

llvm::TargetSchedModel::getWriteProcResBegin
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
Definition: TargetSchedule.h:133

llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:63

llvm::TargetSubtargetInfo::getMacroFusions
virtual std::vector< MacroFusionPredTy > getMacroFusions() const
Get the list of MacroFusion predicates.
Definition: TargetSubtargetInfo.h:331

llvm::TargetSubtargetInfo::enableMachineScheduler
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
Definition: TargetSubtargetInfo.cpp:35

llvm::TargetSubtargetInfo::overrideSchedPolicy
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
Definition: TargetSubtargetInfo.h:229

llvm::TargetSubtargetInfo::enablePostRAMachineScheduler
virtual bool enablePostRAMachineScheduler() const
True if the subtarget should run a machine scheduler after register allocation.
Definition: TargetSubtargetInfo.cpp:52

llvm::TargetSubtargetInfo::getFrameLowering
virtual const TargetFrameLowering * getFrameLowering() const
Definition: TargetSubtargetInfo.h:97

llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition: TargetSubtargetInfo.h:96

llvm::TargetSubtargetInfo::getTargetLowering
virtual const TargetLowering * getTargetLowering() const
Definition: TargetSubtargetInfo.h:100

llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81

llvm::VNInfo
VNInfo - Value Number Information.
Definition: LiveInterval.h:53

llvm::VNInfo::def
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61

llvm::VNInfo::isPHIDef
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:78

llvm::cl::opt
Definition: CommandLine.h:1430

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52

llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660

unsigned

iterator_range.h
This provides a very simple, boring adaptor for a begin and end iterator into a range type.

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

false
Definition: StackSlotColoring.cpp:184

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::DOT::getColorString
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:90

llvm::MISchedPostRASched::Direction
Direction
Definition: MachineScheduler.cpp:85

llvm::MISchedPostRASched::TopDown
@ TopDown
Definition: MachineScheduler.cpp:86

llvm::MISchedPostRASched::Bidirectional
@ Bidirectional
Definition: MachineScheduler.cpp:88

llvm::MISchedPostRASched::BottomUp
@ BottomUp
Definition: MachineScheduler.cpp:87

llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:314

llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp

llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:138

llvm::cl::apply
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1316

llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:718

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450

llvm::codeview::FrameCookieKind::Copy
@ Copy

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::Offset
@ Offset
Definition: DWP.cpp:456

llvm::operator<
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361

llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1995

llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722

llvm::PrintDAGs
cl::opt< bool > PrintDAGs

llvm::getWeakLeft
unsigned getWeakLeft(const SUnit *SU, bool isTop)
Definition: MachineScheduler.cpp:3433

llvm::createMacroFusionDAGMutation
std::unique_ptr< ScheduleDAGMutation > createMacroFusionDAGMutation(ArrayRef< MacroFusionPredTy > Predicates, bool BranchOnly=false)
Create a DAG scheduling mutation to pair instructions back to back for instructions that benefit acco...
Definition: MacroFusion.cpp:215

llvm::right_justify
FormattedString right_justify(StringRef Str, unsigned Width)
right_justify - add spaces before string so total output is Width characters.
Definition: Format.h:153

llvm::Latency
@ Latency
Definition: SIMachineScheduler.h:34

llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:76

llvm::PrintLaneMask
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:92

llvm::MISchedDumpReservedCycles
cl::opt< bool > MISchedDumpReservedCycles("misched-dump-reserved-cycles", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))

llvm::initializePostMachineSchedulerPass
void initializePostMachineSchedulerPass(PassRegistry &)

llvm::VerifyScheduling
cl::opt< bool > VerifyScheduling

llvm::MachineSchedulerID
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
Definition: MachineScheduler.cpp:264

llvm::PostMachineSchedulerID
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
Definition: MachineScheduler.cpp:295

llvm::createGenericSchedLive
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
Definition: MachineScheduler.cpp:3835

llvm::ViewMISchedDAGs
cl::opt< bool > ViewMISchedDAGs

llvm::tryPressure
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
Definition: MachineScheduler.cpp:3394

llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647

llvm::printVRegOrUnit
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
Definition: TargetRegisterInfo.cpp:162

llvm::createStoreClusterDAGMutation
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
Definition: MachineScheduler.cpp:1833

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163

llvm::DumpCriticalPathLength
cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))

llvm::tryLatency
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
Definition: MachineScheduler.cpp:3203

llvm::PostRADirection
cl::opt< MISchedPostRASched::Direction > PostRADirection("misched-postra-direction", cl::Hidden, cl::desc("Post reg-alloc list scheduling direction"), cl::init(MISchedPostRASched::TopDown), cl::values(clEnumValN(MISchedPostRASched::TopDown, "topdown", "Force top-down post reg-alloc list scheduling"), clEnumValN(MISchedPostRASched::BottomUp, "bottomup", "Force bottom-up post reg-alloc list scheduling"), clEnumValN(MISchedPostRASched::Bidirectional, "bidirectional", "Force bidirectional post reg-alloc list scheduling")))

llvm::ForceBottomUp
cl::opt< bool > ForceBottomUp

llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:908

llvm::initializeMachineSchedulerPass
void initializeMachineSchedulerPass(PassRegistry &)

llvm::createGenericSchedPostRA
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
Definition: MachineScheduler.cpp:4125

llvm::left_justify
FormattedString left_justify(StringRef Str, unsigned Width)
left_justify - append spaces after string so total output is Width characters.
Definition: Format.h:146

llvm::createLoadClusterDAGMutation
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
Definition: MachineScheduler.cpp:1824

llvm::tryGreater
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Definition: MachineScheduler.cpp:3187

llvm::ViewGraph
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427

llvm::ForceTopDown
cl::opt< bool > ForceTopDown

llvm::dumpRegSetPressure
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
Definition: RegisterPressure.cpp:81

llvm::tryLess
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
Definition: MachineScheduler.cpp:3171

llvm::createCopyConstrainDAGMutation
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Definition: MachineScheduler.cpp:2053

llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:108

llvm::printMBBReference
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition: MachineBasicBlock.cpp:120

llvm::MischedDetailResourceBooking
cl::opt< bool > MischedDetailResourceBooking("misched-detail-resource-booking", cl::Hidden, cl::init(false), cl::desc("Show details of invoking getNextResoufceCycle."))

llvm::biasPhysReg
int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Definition: MachineScheduler.cpp:3444

std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860

raw_ostream.h

N
#define N

llvm::DOTGraphTraits< ScheduleDAGMI * >::getNodeDescription
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
Definition: MachineScheduler.cpp:4402

llvm::DOTGraphTraits< ScheduleDAGMI * >::getEdgeAttributes
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method.
Definition: MachineScheduler.cpp:4380

llvm::DOTGraphTraits< ScheduleDAGMI * >::getGraphName
static std::string getGraphName(const ScheduleDAG *G)
Definition: MachineScheduler.cpp:4363

llvm::DOTGraphTraits< ScheduleDAGMI * >::getNodeLabel
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
Definition: MachineScheduler.cpp:4390

llvm::DOTGraphTraits< ScheduleDAGMI * >::isNodeHidden
static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G)
Definition: MachineScheduler.cpp:4371

llvm::DOTGraphTraits< ScheduleDAGMI * >::DOTGraphTraits
DOTGraphTraits(bool isSimple=false)
Definition: MachineScheduler.cpp:4361

llvm::DOTGraphTraits< ScheduleDAGMI * >::getNodeAttributes
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
Definition: MachineScheduler.cpp:4406

llvm::DOTGraphTraits< ScheduleDAGMI * >::renderGraphFromBottomUp
static bool renderGraphFromBottomUp()
Definition: MachineScheduler.cpp:4367

llvm::DOTGraphTraits
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
Definition: DOTGraphTraits.h:166

llvm::DefaultDOTGraphTraits
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Definition: DOTGraphTraits.h:28

llvm::GenericSchedulerBase::CandPolicy
Policy for scheduling the next instruction in the candidate's zone.
Definition: MachineScheduler.h:1081

llvm::GenericSchedulerBase::CandPolicy::ReduceResIdx
unsigned ReduceResIdx
Definition: MachineScheduler.h:1083

llvm::GenericSchedulerBase::CandPolicy::DemandResIdx
unsigned DemandResIdx
Definition: MachineScheduler.h:1084

llvm::GenericSchedulerBase::CandPolicy::ReduceLatency
bool ReduceLatency
Definition: MachineScheduler.h:1082

llvm::GenericSchedulerBase::SchedCandidate
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
Definition: MachineScheduler.h:1119

llvm::GenericSchedulerBase::SchedCandidate::setBest
void setBest(SchedCandidate &Best)
Definition: MachineScheduler.h:1152

llvm::GenericSchedulerBase::SchedCandidate::SU
SUnit * SU
Definition: MachineScheduler.h:1123

llvm::GenericSchedulerBase::SchedCandidate::reset
void reset(const CandPolicy &NewPolicy)
Definition: MachineScheduler.h:1140

llvm::GenericSchedulerBase::SchedCandidate::initResourceDelta
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Definition: MachineScheduler.cpp:2971

llvm::GenericSchedulerBase::SchedCandidate::RPDelta
RegPressureDelta RPDelta
Definition: MachineScheduler.h:1132

llvm::GenericSchedulerBase::SchedCandidate::AtTop
bool AtTop
Definition: MachineScheduler.h:1129

llvm::GenericSchedulerBase::SchedCandidate::ResDelta
SchedResourceDelta ResDelta
Definition: MachineScheduler.h:1135

llvm::GenericSchedulerBase::SchedCandidate::isValid
bool isValid() const
Definition: MachineScheduler.h:1149

llvm::GenericSchedulerBase::SchedCandidate::Reason
CandReason Reason
Definition: MachineScheduler.h:1126

llvm::GenericSchedulerBase::SchedCandidate::Policy
CandPolicy Policy
Definition: MachineScheduler.h:1120

llvm::GenericSchedulerBase::SchedResourceDelta
Status of an instruction's critical resource consumption.
Definition: MachineScheduler.h:1099

llvm::GenericSchedulerBase::SchedResourceDelta::CritResources
unsigned CritResources
Definition: MachineScheduler.h:1101

llvm::GenericSchedulerBase::SchedResourceDelta::DemandedResources
unsigned DemandedResources
Definition: MachineScheduler.h:1104

llvm::GraphTraits
Definition: GraphTraits.h:37

llvm::LaneBitmask::getNone
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:81

llvm::MCProcResourceDesc::BufferSize
int BufferSize
Definition: MCSchedule.h:48

llvm::MCProcResourceDesc::Name
const char * Name
Definition: MCSchedule.h:32

llvm::MCProcResourceDesc::SubUnitsIdxBegin
const unsigned * SubUnitsIdxBegin
Definition: MCSchedule.h:53

llvm::MCProcResourceDesc::NumUnits
unsigned NumUnits
Definition: MCSchedule.h:33

llvm::MCSchedClassDesc
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:118

llvm::MCWriteProcResEntry
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:63

llvm::MachineSchedContext
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Definition: MachineScheduler.h:128

llvm::MachineSchedContext::RegClassInfo
RegisterClassInfo * RegClassInfo
Definition: MachineScheduler.h:136

llvm::MachineSchedContext::~MachineSchedContext
virtual ~MachineSchedContext()
Definition: MachineScheduler.cpp:212

llvm::MachineSchedContext::MachineSchedContext
MachineSchedContext()
Definition: MachineScheduler.cpp:208

llvm::MemOp
Definition: TargetLowering.h:113

llvm::RegPressureDelta::CriticalMax
PressureChange CriticalMax
Definition: RegisterPressure.h:242

llvm::RegPressureDelta::CurrentMax
PressureChange CurrentMax
Definition: RegisterPressure.h:243

llvm::RegPressureDelta::Excess
PressureChange Excess
Definition: RegisterPressure.h:241

llvm::RegionPressure
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
Definition: RegisterPressure.h:82

llvm::RegisterMaskPair
Definition: RegisterPressure.h:38

llvm::RegisterPressure::LiveInRegs
SmallVector< RegisterMaskPair, 8 > LiveInRegs
List of live in virtual registers or physical register units.
Definition: RegisterPressure.h:52

llvm::RegisterPressure::MaxSetPressure
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
Definition: RegisterPressure.h:49

llvm::RegisterPressure::LiveOutRegs
SmallVector< RegisterMaskPair, 8 > LiveOutRegs
Definition: RegisterPressure.h:53

llvm::SchedRemainder
Summarize the unscheduled region.
Definition: MachineScheduler.h:588

llvm::SchedRemainder::init
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Definition: MachineScheduler.cpp:2280

llvm::SchedRemainder::RemIssueCount
unsigned RemIssueCount
Definition: MachineScheduler.h:594

llvm::SchedRemainder::CriticalPath
unsigned CriticalPath
Definition: MachineScheduler.h:590

llvm::SchedRemainder::CyclicCritPath
unsigned CyclicCritPath
Definition: MachineScheduler.h:591

llvm::SchedRemainder::RemainingCounts
SmallVector< unsigned, 16 > RemainingCounts
Definition: MachineScheduler.h:599

llvm::SchedRemainder::IsAcyclicLatencyLimited
bool IsAcyclicLatencyLimited
Definition: MachineScheduler.h:596

llvm::SchedRemainder::reset
void reset()
Definition: MachineScheduler.h:603

llvm::VReg2SUnit
An individual mapping from virtual register number to SUnit.
Definition: ScheduleDAGInstrs.h:53

llvm::cl::desc
Definition: CommandLine.h:416