doxygen/MachinePipeliner_8cpp_source.html

//===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.

//

// This SMS implementation is a target-independent back-end pass. When enabled,

// the pass runs just prior to the register allocation pass, while the machine

// IR is in SSA form. If software pipelining is successful, then the original

// loop is replaced by the optimized loop. The optimized loop contains one or

// more prolog blocks, the pipelined kernel, and one or more epilog blocks. If

// the instructions cannot be scheduled in a given MII, we increase the MII by

// one and try again.

//

// The SMS implementation is an extension of the ScheduleDAGInstrs class. We

// represent loop carried dependences in the DAG as order edges to the Phi

// nodes. We also perform several passes over the DAG to eliminate unnecessary

// edges that inhibit the ability to pipeline. The implementation uses the

// DFAPacketizer class to compute the minimum initiation interval and the check

// where an instruction may be inserted in the pipelined schedule.

//

// In order for the SMS pass to work, several target specific hooks need to be

// implemented to get information about the loop structure and to rewrite

// instructions.

//

//===----------------------------------------------------------------------===//


#include "llvm/CodeGen/MachinePipeliner.h"

#include "llvm/ADT/ArrayRef.h"

#include "llvm/ADT/BitVector.h"

#include "llvm/ADT/DenseMap.h"

#include "llvm/ADT/MapVector.h"

#include "llvm/ADT/PriorityQueue.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/SetOperations.h"

#include "llvm/ADT/SetVector.h"

#include "llvm/ADT/SmallPtrSet.h"

#include "llvm/ADT/SmallSet.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/Statistic.h"

#include "llvm/ADT/iterator_range.h"

#include "llvm/Analysis/AliasAnalysis.h"

#include "llvm/Analysis/CycleAnalysis.h"

#include "llvm/Analysis/MemoryLocation.h"

#include "llvm/Analysis/OptimizationRemarkEmitter.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/CodeGen/DFAPacketizer.h"

#include "llvm/CodeGen/LiveIntervals.h"

#include "llvm/CodeGen/MachineBasicBlock.h"

#include "llvm/CodeGen/MachineDominators.h"

#include "llvm/CodeGen/MachineFunction.h"

#include "llvm/CodeGen/MachineFunctionPass.h"

#include "llvm/CodeGen/MachineInstr.h"

#include "llvm/CodeGen/MachineInstrBuilder.h"

#include "llvm/CodeGen/MachineLoopInfo.h"

#include "llvm/CodeGen/MachineMemOperand.h"

#include "llvm/CodeGen/MachineOperand.h"

#include "llvm/CodeGen/MachineRegisterInfo.h"

#include "llvm/CodeGen/ModuloSchedule.h"

#include "llvm/CodeGen/Register.h"

#include "llvm/CodeGen/RegisterClassInfo.h"

#include "llvm/CodeGen/RegisterPressure.h"

#include "llvm/CodeGen/ScheduleDAG.h"

#include "llvm/CodeGen/ScheduleDAGMutation.h"

#include "llvm/CodeGen/TargetInstrInfo.h"

#include "llvm/CodeGen/TargetOpcodes.h"

#include "llvm/CodeGen/TargetRegisterInfo.h"

#include "llvm/CodeGen/TargetSubtargetInfo.h"

#include "llvm/Config/llvm-config.h"

#include "llvm/IR/Attributes.h"

#include "llvm/IR/Function.h"

#include "llvm/MC/LaneBitmask.h"

#include "llvm/MC/MCInstrDesc.h"

#include "llvm/MC/MCInstrItineraries.h"

#include "llvm/MC/MCRegisterInfo.h"

#include "llvm/Pass.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Compiler.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/MathExtras.h"

#include "llvm/Support/raw_ostream.h"

#include <algorithm>

#include <cassert>

#include <climits>

#include <cstdint>

#include <deque>

#include <functional>

#include <iomanip>

#include <iterator>

#include <map>

#include <memory>

#include <sstream>

#include <tuple>

#include <utility>

#include <vector>


using namespace llvm;


#define DEBUG_TYPE "pipeliner"


STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");

STATISTIC(NumPipelined, "Number of loops software pipelined");

STATISTIC(NumNodeOrderIssues, "Number of node order issues found");

STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch");

STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop");

STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader");

STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large");

STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII");

STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found");

STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage");

STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");


/// A command line option to turn software pipelining on or off.

static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),

                               cl::desc("Enable Software Pipelining"));


/// A command line option to enable SWP at -Os.

static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",

                                      cl::desc("Enable SWP at Os."), cl::Hidden,

                                      cl::init(false));


/// A command line argument to limit minimum initial interval for pipelining.

static cl::opt<int> SwpMaxMii("pipeliner-max-mii",

                              cl::desc("Size limit for the MII."),

                              cl::Hidden, cl::init(27));


/// A command line argument to force pipeliner to use specified initial

/// interval.

static cl::opt<int> SwpForceII("pipeliner-force-ii",

                               cl::desc("Force pipeliner to use specified II."),

                               cl::Hidden, cl::init(-1));


/// A command line argument to limit the number of stages in the pipeline.

static cl::opt<int>

    SwpMaxStages("pipeliner-max-stages",

                 cl::desc("Maximum stages allowed in the generated scheduled."),

                 cl::Hidden, cl::init(3));


/// A command line option to disable the pruning of chain dependences due to

/// an unrelated Phi.

static cl::opt<bool>

    SwpPruneDeps("pipeliner-prune-deps",

                 cl::desc("Prune dependences between unrelated Phi nodes."),

                 cl::Hidden, cl::init(true));


/// A command line option to disable the pruning of loop carried order

/// dependences.

static cl::opt<bool>

    SwpPruneLoopCarried("pipeliner-prune-loop-carried",

                        cl::desc("Prune loop carried order dependences."),

                        cl::Hidden, cl::init(true));


#ifndef NDEBUG

static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));

#endif


static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",

                                     cl::ReallyHidden,

                                     cl::desc("Ignore RecMII"));


static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden,

                                    cl::init(false));

static cl::opt<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden,

                                      cl::init(false));


static cl::opt<bool> EmitTestAnnotations(

    "pipeliner-annotate-for-testing", cl::Hidden, cl::init(false),

    cl::desc("Instead of emitting the pipelined code, annotate instructions "

             "with the generated schedule for feeding into the "

             "-modulo-schedule-test pass"));


static cl::opt<bool> ExperimentalCodeGen(

    "pipeliner-experimental-cg", cl::Hidden, cl::init(false),

    cl::desc(

        "Use the experimental peeling code generator for software pipelining"));


static cl::opt<int> SwpIISearchRange("pipeliner-ii-search-range",

                                     cl::desc("Range to search for II"),

                                     cl::Hidden, cl::init(10));


static cl::opt<bool>

    LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(false),

                     cl::desc("Limit register pressure of scheduled loop"));


static cl::opt<int>

    RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden,

                      cl::init(5),

                      cl::desc("Margin representing the unused percentage of "

                               "the register pressure limit"));


namespace llvm {


// A command line option to enable the CopyToPhi DAG mutation.

cl::opt<bool> SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,

                                 cl::init(true),

                                 cl::desc("Enable CopyToPhi DAG Mutation"));


/// A command line argument to force pipeliner to use specified issue

/// width.

cl::opt<int> SwpForceIssueWidth(

    "pipeliner-force-issue-width",

    cl::desc("Force pipeliner to use specified issue width."), cl::Hidden,

    cl::init(-1));


} // end namespace llvm


unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;

char MachinePipeliner::ID = 0;

#ifndef NDEBUG

int MachinePipeliner::NumTries = 0;

#endif

char &llvm::MachinePipelinerID = MachinePipeliner::ID;


INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE,

                      "Modulo Software Pipelining", false, false)

INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)

INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)

INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)

INITIALIZE_PASS_DEPENDENCY(LiveIntervals)

INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE,

                    "Modulo Software Pipelining", false, false)


/// The "main" function for implementing Swing Modulo Scheduling.

bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {

  if (skipFunction(mf.getFunction()))

    return false;


  if (!EnableSWP)

    return false;


  if (mf.getFunction().getAttributes().hasFnAttr(Attribute::OptimizeForSize) &&

      !EnableSWPOptSize.getPosition())

    return false;


  if (!mf.getSubtarget().enableMachinePipeliner())

    return false;


  // Cannot pipeline loops without instruction itineraries if we are using

  // DFA for the pipeliner.

  if (mf.getSubtarget().useDFAforSMS() &&

      (!mf.getSubtarget().getInstrItineraryData() ||

       mf.getSubtarget().getInstrItineraryData()->isEmpty()))

    return false;


  MF = &mf;

  MLI = &getAnalysis<MachineLoopInfo>();

  MDT = &getAnalysis<MachineDominatorTree>();

  ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();

  TII = MF->getSubtarget().getInstrInfo();

  RegClassInfo.runOnMachineFunction(*MF);


  for (const auto &L : *MLI)

    scheduleLoop(*L);


  return false;

}


/// Attempt to perform the SMS algorithm on the specified loop. This function is

/// the main entry point for the algorithm.  The function identifies candidate

/// loops, calculates the minimum initiation interval, and attempts to schedule

/// the loop.

bool MachinePipeliner::scheduleLoop(MachineLoop &L) {

  bool Changed = false;

  for (const auto &InnerLoop : L)

    Changed |= scheduleLoop(*InnerLoop);


#ifndef NDEBUG

  // Stop trying after reaching the limit (if any).

  int Limit = SwpLoopLimit;

  if (Limit >= 0) {

    if (NumTries >= SwpLoopLimit)

      return Changed;

    NumTries++;

  }

#endif


  setPragmaPipelineOptions(L);

  if (!canPipelineLoop(L)) {

    LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");

    ORE->emit([&]() {

      return MachineOptimizationRemarkMissed(DEBUG_TYPE, "canPipelineLoop",

                                             L.getStartLoc(), L.getHeader())

             << "Failed to pipeline loop";

    });


    LI.LoopPipelinerInfo.reset();

    return Changed;

  }


  ++NumTrytoPipeline;


  Changed = swingModuloScheduler(L);


  LI.LoopPipelinerInfo.reset();

  return Changed;

}


void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {

  // Reset the pragma for the next loop in iteration.

  disabledByPragma = false;

  II_setByPragma = 0;


  MachineBasicBlock *LBLK = L.getTopBlock();


  if (LBLK == nullptr)

    return;


  const BasicBlock *BBLK = LBLK->getBasicBlock();

  if (BBLK == nullptr)

    return;


  const Instruction *TI = BBLK->getTerminator();

  if (TI == nullptr)

    return;


  MDNode *LoopID = TI->getMetadata(LLVMContext::MD_loop);

  if (LoopID == nullptr)

    return;


  assert(LoopID->getNumOperands() > 0 && "requires atleast one operand");

  assert(LoopID->getOperand(0) == LoopID && "invalid loop");


  for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) {

    MDNode *MD = dyn_cast<MDNode>(MDO);


    if (MD == nullptr)

      continue;


    MDString *S = dyn_cast<MDString>(MD->getOperand(0));


    if (S == nullptr)

      continue;


    if (S->getString() == "llvm.loop.pipeline.initiationinterval") {

      assert(MD->getNumOperands() == 2 &&

             "Pipeline initiation interval hint metadata should have two operands.");

      II_setByPragma =

          mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();

      assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");

    } else if (S->getString() == "llvm.loop.pipeline.disable") {

      disabledByPragma = true;

    }

  }

}


/// Return true if the loop can be software pipelined.  The algorithm is

/// restricted to loops with a single basic block.  Make sure that the

/// branch in the loop can be analyzed.

bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {

  if (L.getNumBlocks() != 1) {

    ORE->emit([&]() {

      return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",

                                               L.getStartLoc(), L.getHeader())

             << "Not a single basic block: "

             << ore::NV("NumBlocks", L.getNumBlocks());

    });

    return false;

  }


  if (disabledByPragma) {

    ORE->emit([&]() {

      return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",

                                               L.getStartLoc(), L.getHeader())

             << "Disabled by Pragma.";

    });

    return false;

  }


  // Check if the branch can't be understood because we can't do pipelining

  // if that's the case.

  LI.TBB = nullptr;

  LI.FBB = nullptr;

  LI.BrCond.clear();

  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) {

    LLVM_DEBUG(dbgs() << "Unable to analyzeBranch, can NOT pipeline Loop\n");

    NumFailBranch++;

    ORE->emit([&]() {

      return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",

                                               L.getStartLoc(), L.getHeader())

             << "The branch can't be understood";

    });

    return false;

  }


  LI.LoopInductionVar = nullptr;

  LI.LoopCompare = nullptr;

  LI.LoopPipelinerInfo = TII->analyzeLoopForPipelining(L.getTopBlock());

  if (!LI.LoopPipelinerInfo) {

    LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n");

    NumFailLoop++;

    ORE->emit([&]() {

      return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",

                                               L.getStartLoc(), L.getHeader())

             << "The loop structure is not supported";

    });

    return false;

  }


  if (!L.getLoopPreheader()) {

    LLVM_DEBUG(dbgs() << "Preheader not found, can NOT pipeline Loop\n");

    NumFailPreheader++;

    ORE->emit([&]() {

      return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",

                                               L.getStartLoc(), L.getHeader())

             << "No loop preheader found";

    });

    return false;

  }


  // Remove any subregisters from inputs to phi nodes.

  preprocessPhiNodes(*L.getHeader());

  return true;

}


void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {

  MachineRegisterInfo &MRI = MF->getRegInfo();

  SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();


  for (MachineInstr &PI : B.phis()) {

    MachineOperand &DefOp = PI.getOperand(0);

    assert(DefOp.getSubReg() == 0);

    auto *RC = MRI.getRegClass(DefOp.getReg());


    for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {

      MachineOperand &RegOp = PI.getOperand(i);

      if (RegOp.getSubReg() == 0)

        continue;


      // If the operand uses a subregister, replace it with a new register

      // without subregisters, and generate a copy to the new register.

      Register NewReg = MRI.createVirtualRegister(RC);

      MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();

      MachineBasicBlock::iterator At = PredB.getFirstTerminator();

      const DebugLoc &DL = PredB.findDebugLoc(At);

      auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg)

                    .addReg(RegOp.getReg(), getRegState(RegOp),

                            RegOp.getSubReg());

      Slots.insertMachineInstrInMaps(*Copy);

      RegOp.setReg(NewReg);

      RegOp.setSubReg(0);

    }

  }

}


/// The SMS algorithm consists of the following main steps:

/// 1. Computation and analysis of the dependence graph.

/// 2. Ordering of the nodes (instructions).

/// 3. Attempt to Schedule the loop.

bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {

  assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");


  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,

                        II_setByPragma, LI.LoopPipelinerInfo.get());


  MachineBasicBlock *MBB = L.getHeader();

  // The kernel should not include any terminator instructions.  These

  // will be added back later.

  SMS.startBlock(MBB);


  // Compute the number of 'real' instructions in the basic block by

  // ignoring terminators.

  unsigned size = MBB->size();

  for (MachineBasicBlock::iterator I = MBB->getFirstTerminator(),

                                   E = MBB->instr_end();

       I != E; ++I, --size)

    ;


  SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);

  SMS.schedule();

  SMS.exitRegion();


  SMS.finishBlock();

  return SMS.hasNewSchedule();

}


void MachinePipeliner::getAnalysisUsage(AnalysisUsage &AU) const {

  AU.addRequired<AAResultsWrapperPass>();

  AU.addPreserved<AAResultsWrapperPass>();

  AU.addRequired<MachineLoopInfo>();

  AU.addRequired<MachineDominatorTree>();

  AU.addRequired<LiveIntervals>();

  AU.addRequired<MachineOptimizationRemarkEmitterPass>();

  MachineFunctionPass::getAnalysisUsage(AU);

}


void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {

  if (SwpForceII > 0)

    MII = SwpForceII;

  else if (II_setByPragma > 0)

    MII = II_setByPragma;

  else

    MII = std::max(ResMII, RecMII);

}


void SwingSchedulerDAG::setMAX_II() {

  if (SwpForceII > 0)

    MAX_II = SwpForceII;

  else if (II_setByPragma > 0)

    MAX_II = II_setByPragma;

  else

    MAX_II = MII + SwpIISearchRange;

}


/// We override the schedule function in ScheduleDAGInstrs to implement the

/// scheduling part of the Swing Modulo Scheduling algorithm.

void SwingSchedulerDAG::schedule() {

  AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();

  buildSchedGraph(AA);

  addLoopCarriedDependences(AA);

  updatePhiDependences();

  Topo.InitDAGTopologicalSorting();

  changeDependences();

  postProcessDAG();

  LLVM_DEBUG(dump());


  NodeSetType NodeSets;

  findCircuits(NodeSets);

  NodeSetType Circuits = NodeSets;


  // Calculate the MII.

  unsigned ResMII = calculateResMII();

  unsigned RecMII = calculateRecMII(NodeSets);


  fuseRecs(NodeSets);


  // This flag is used for testing and can cause correctness problems.

  if (SwpIgnoreRecMII)

    RecMII = 0;


  setMII(ResMII, RecMII);

  setMAX_II();


  LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II

                    << " (rec=" << RecMII << ", res=" << ResMII << ")\n");


  // Can't schedule a loop without a valid MII.

  if (MII == 0) {

    LLVM_DEBUG(dbgs() << "Invalid Minimal Initiation Interval: 0\n");

    NumFailZeroMII++;

    Pass.ORE->emit([&]() {

      return MachineOptimizationRemarkAnalysis(

                 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())

             << "Invalid Minimal Initiation Interval: 0";

    });

    return;

  }


  // Don't pipeline large loops.

  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) {

    LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii

                      << ", we don't pipeline large loops\n");

    NumFailLargeMaxMII++;

    Pass.ORE->emit([&]() {

      return MachineOptimizationRemarkAnalysis(

                 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())

             << "Minimal Initiation Interval too large: "

             << ore::NV("MII", (int)MII) << " > "

             << ore::NV("SwpMaxMii", SwpMaxMii) << "."

             << "Refer to -pipeliner-max-mii.";

    });

    return;

  }


  computeNodeFunctions(NodeSets);


  registerPressureFilter(NodeSets);


  colocateNodeSets(NodeSets);


  checkNodeSets(NodeSets);


  LLVM_DEBUG({

    for (auto &I : NodeSets) {

      dbgs() << "  Rec NodeSet ";

      I.dump();

    }

  });


  llvm::stable_sort(NodeSets, std::greater<NodeSet>());


  groupRemainingNodes(NodeSets);


  removeDuplicateNodes(NodeSets);


  LLVM_DEBUG({

    for (auto &I : NodeSets) {

      dbgs() << "  NodeSet ";

      I.dump();

    }

  });


  computeNodeOrder(NodeSets);


  // check for node order issues

  checkValidNodeOrder(Circuits);


  SMSchedule Schedule(Pass.MF, this);

  Scheduled = schedulePipeline(Schedule);


  if (!Scheduled){

    LLVM_DEBUG(dbgs() << "No schedule found, return\n");

    NumFailNoSchedule++;

    Pass.ORE->emit([&]() {

      return MachineOptimizationRemarkAnalysis(

                 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())

             << "Unable to find schedule";

    });

    return;

  }


  unsigned numStages = Schedule.getMaxStageCount();

  // No need to generate pipeline if there are no overlapped iterations.

  if (numStages == 0) {

    LLVM_DEBUG(dbgs() << "No overlapped iterations, skip.\n");

    NumFailZeroStage++;

    Pass.ORE->emit([&]() {

      return MachineOptimizationRemarkAnalysis(

                 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())

             << "No need to pipeline - no overlapped iterations in schedule.";

    });

    return;

  }

  // Check that the maximum stage count is less than user-defined limit.

  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages) {

    LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages

                      << " : too many stages, abort\n");

    NumFailLargeMaxStage++;

    Pass.ORE->emit([&]() {

      return MachineOptimizationRemarkAnalysis(

                 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())

             << "Too many stages in schedule: "

             << ore::NV("numStages", (int)numStages) << " > "

             << ore::NV("SwpMaxStages", SwpMaxStages)

             << ". Refer to -pipeliner-max-stages.";

    });

    return;

  }


  Pass.ORE->emit([&]() {

    return MachineOptimizationRemark(DEBUG_TYPE, "schedule", Loop.getStartLoc(),

                                     Loop.getHeader())

           << "Pipelined succesfully!";

  });


  // Generate the schedule as a ModuloSchedule.

  DenseMap<MachineInstr *, int> Cycles, Stages;

  std::vector<MachineInstr *> OrderedInsts;

  for (int Cycle = Schedule.getFirstCycle(); Cycle <= Schedule.getFinalCycle();

       ++Cycle) {

    for (SUnit *SU : Schedule.getInstructions(Cycle)) {

      OrderedInsts.push_back(SU->getInstr());

      Cycles[SU->getInstr()] = Cycle;

      Stages[SU->getInstr()] = Schedule.stageScheduled(SU);

    }

  }

  DenseMap<MachineInstr *, std::pair<unsigned, int64_t>> NewInstrChanges;

  for (auto &KV : NewMIs) {

    Cycles[KV.first] = Cycles[KV.second];

    Stages[KV.first] = Stages[KV.second];

    NewInstrChanges[KV.first] = InstrChanges[getSUnit(KV.first)];

  }


  ModuloSchedule MS(MF, &Loop, std::move(OrderedInsts), std::move(Cycles),

                    std::move(Stages));

  if (EmitTestAnnotations) {

    assert(NewInstrChanges.empty() &&

           "Cannot serialize a schedule with InstrChanges!");

    ModuloScheduleTestAnnotater MSTI(MF, MS);

    MSTI.annotate();

    return;

  }

  // The experimental code generator can't work if there are InstChanges.

  if (ExperimentalCodeGen && NewInstrChanges.empty()) {

    PeelingModuloScheduleExpander MSE(MF, MS, &LIS);

    MSE.expand();

  } else {

    ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges));

    MSE.expand();

    MSE.cleanup();

  }

  ++NumPipelined;

}


/// Clean up after the software pipeliner runs.

void SwingSchedulerDAG::finishBlock() {

  for (auto &KV : NewMIs)

    MF.deleteMachineInstr(KV.second);

  NewMIs.clear();


  // Call the superclass.

  ScheduleDAGInstrs::finishBlock();

}


/// Return the register values for  the operands of a Phi instruction.

/// This function assume the instruction is a Phi.

static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,

                       unsigned &InitVal, unsigned &LoopVal) {

  assert(Phi.isPHI() && "Expecting a Phi.");


  InitVal = 0;

  LoopVal = 0;

  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)

    if (Phi.getOperand(i + 1).getMBB() != Loop)

      InitVal = Phi.getOperand(i).getReg();

    else

      LoopVal = Phi.getOperand(i).getReg();


  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");

}


/// Return the Phi register value that comes the loop block.

static unsigned getLoopPhiReg(const MachineInstr &Phi,

                              const MachineBasicBlock *LoopBB) {

  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)

    if (Phi.getOperand(i + 1).getMBB() == LoopBB)

      return Phi.getOperand(i).getReg();

  return 0;

}


/// Return true if SUb can be reached from SUa following the chain edges.

static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {

  SmallPtrSet<SUnit *, 8> Visited;

  SmallVector<SUnit *, 8> Worklist;

  Worklist.push_back(SUa);

  while (!Worklist.empty()) {

    const SUnit *SU = Worklist.pop_back_val();

    for (const auto &SI : SU->Succs) {

      SUnit *SuccSU = SI.getSUnit();

      if (SI.getKind() == SDep::Order) {

        if (Visited.count(SuccSU))

          continue;

        if (SuccSU == SUb)

          return true;

        Worklist.push_back(SuccSU);

        Visited.insert(SuccSU);

      }

    }

  }

  return false;

}


/// Return true if the instruction causes a chain between memory

/// references before and after it.

static bool isDependenceBarrier(MachineInstr &MI) {

  return MI.isCall() || MI.mayRaiseFPException() ||

         MI.hasUnmodeledSideEffects() ||

         (MI.hasOrderedMemoryRef() &&

          (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad()));

}


/// Return the underlying objects for the memory references of an instruction.

/// This function calls the code in ValueTracking, but first checks that the

/// instruction has a memory operand.

static void getUnderlyingObjects(const MachineInstr *MI,

                                 SmallVectorImpl<const Value *> &Objs) {

  if (!MI->hasOneMemOperand())

    return;

  MachineMemOperand *MM = *MI->memoperands_begin();

  if (!MM->getValue())

    return;

  getUnderlyingObjects(MM->getValue(), Objs);

  for (const Value *V : Objs) {

    if (!isIdentifiedObject(V)) {

      Objs.clear();

      return;

    }

  }

}


/// Add a chain edge between a load and store if the store can be an

/// alias of the load on a subsequent iteration, i.e., a loop carried

/// dependence. This code is very similar to the code in ScheduleDAGInstrs

/// but that code doesn't create loop carried dependences.

void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {

  MapVector<const Value *, SmallVector<SUnit *, 4>> PendingLoads;

  Value *UnknownValue =

    UndefValue::get(Type::getVoidTy(MF.getFunction().getContext()));

  for (auto &SU : SUnits) {

    MachineInstr &MI = *SU.getInstr();

    if (isDependenceBarrier(MI))

      PendingLoads.clear();

    else if (MI.mayLoad()) {

      SmallVector<const Value *, 4> Objs;

      ::getUnderlyingObjects(&MI, Objs);

      if (Objs.empty())

        Objs.push_back(UnknownValue);

      for (const auto *V : Objs) {

        SmallVector<SUnit *, 4> &SUs = PendingLoads[V];

        SUs.push_back(&SU);

      }

    } else if (MI.mayStore()) {

      SmallVector<const Value *, 4> Objs;

      ::getUnderlyingObjects(&MI, Objs);

      if (Objs.empty())

        Objs.push_back(UnknownValue);

      for (const auto *V : Objs) {

        MapVector<const Value *, SmallVector<SUnit *, 4>>::iterator I =

            PendingLoads.find(V);

        if (I == PendingLoads.end())

          continue;

        for (auto *Load : I->second) {

          if (isSuccOrder(Load, &SU))

            continue;

          MachineInstr &LdMI = *Load->getInstr();

          // First, perform the cheaper check that compares the base register.

          // If they are the same and the load offset is less than the store

          // offset, then mark the dependence as loop carried potentially.

          const MachineOperand *BaseOp1, *BaseOp2;

          int64_t Offset1, Offset2;

          bool Offset1IsScalable, Offset2IsScalable;

          if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1,

                                           Offset1IsScalable, TRI) &&

              TII->getMemOperandWithOffset(MI, BaseOp2, Offset2,

                                           Offset2IsScalable, TRI)) {

            if (BaseOp1->isIdenticalTo(*BaseOp2) &&

                Offset1IsScalable == Offset2IsScalable &&

                (int)Offset1 < (int)Offset2) {

              assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI) &&

                     "What happened to the chain edge?");

              SDep Dep(Load, SDep::Barrier);

              Dep.setLatency(1);

              SU.addPred(Dep);

              continue;

            }

          }

          // Second, the more expensive check that uses alias analysis on the

          // base registers. If they alias, and the load offset is less than

          // the store offset, the mark the dependence as loop carried.

          if (!AA) {

            SDep Dep(Load, SDep::Barrier);

            Dep.setLatency(1);

            SU.addPred(Dep);

            continue;

          }

          MachineMemOperand *MMO1 = *LdMI.memoperands_begin();

          MachineMemOperand *MMO2 = *MI.memoperands_begin();

          if (!MMO1->getValue() || !MMO2->getValue()) {

            SDep Dep(Load, SDep::Barrier);

            Dep.setLatency(1);

            SU.addPred(Dep);

            continue;

          }

          if (MMO1->getValue() == MMO2->getValue() &&

              MMO1->getOffset() <= MMO2->getOffset()) {

            SDep Dep(Load, SDep::Barrier);

            Dep.setLatency(1);

            SU.addPred(Dep);

            continue;

          }

          if (!AA->isNoAlias(

                  MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()),

                  MemoryLocation::getAfter(MMO2->getValue(),

                                           MMO2->getAAInfo()))) {

            SDep Dep(Load, SDep::Barrier);

            Dep.setLatency(1);

            SU.addPred(Dep);

          }

        }

      }

    }

  }

}


/// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer

/// processes dependences for PHIs. This function adds true dependences

/// from a PHI to a use, and a loop carried dependence from the use to the

/// PHI. The loop carried dependence is represented as an anti dependence

/// edge. This function also removes chain dependences between unrelated

/// PHIs.

void SwingSchedulerDAG::updatePhiDependences() {

  SmallVector<SDep, 4> RemoveDeps;

  const TargetSubtargetInfo &ST = MF.getSubtarget<TargetSubtargetInfo>();


  // Iterate over each DAG node.

  for (SUnit &I : SUnits) {

    RemoveDeps.clear();

    // Set to true if the instruction has an operand defined by a Phi.

    unsigned HasPhiUse = 0;

    unsigned HasPhiDef = 0;

    MachineInstr *MI = I.getInstr();

    // Iterate over each operand, and we process the definitions.

    for (const MachineOperand &MO : MI->operands()) {

      if (!MO.isReg())

        continue;

      Register Reg = MO.getReg();

      if (MO.isDef()) {

        // If the register is used by a Phi, then create an anti dependence.

        for (MachineRegisterInfo::use_instr_iterator

                 UI = MRI.use_instr_begin(Reg),

                 UE = MRI.use_instr_end();

             UI != UE; ++UI) {

          MachineInstr *UseMI = &*UI;

          SUnit *SU = getSUnit(UseMI);

          if (SU != nullptr && UseMI->isPHI()) {

            if (!MI->isPHI()) {

              SDep Dep(SU, SDep::Anti, Reg);

              Dep.setLatency(1);

              I.addPred(Dep);

            } else {

              HasPhiDef = Reg;

              // Add a chain edge to a dependent Phi that isn't an existing

              // predecessor.

              if (SU->NodeNum < I.NodeNum && !I.isPred(SU))

                I.addPred(SDep(SU, SDep::Barrier));

            }

          }

        }

      } else if (MO.isUse()) {

        // If the register is defined by a Phi, then create a true dependence.

        MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);

        if (DefMI == nullptr)

          continue;

        SUnit *SU = getSUnit(DefMI);

        if (SU != nullptr && DefMI->isPHI()) {

          if (!MI->isPHI()) {

            SDep Dep(SU, SDep::Data, Reg);

            Dep.setLatency(0);

            ST.adjustSchedDependency(SU, 0, &I, MO.getOperandNo(), Dep,

                                     &SchedModel);

            I.addPred(Dep);

          } else {

            HasPhiUse = Reg;

            // Add a chain edge to a dependent Phi that isn't an existing

            // predecessor.

            if (SU->NodeNum < I.NodeNum && !I.isPred(SU))

              I.addPred(SDep(SU, SDep::Barrier));

          }

        }

      }

    }

    // Remove order dependences from an unrelated Phi.

    if (!SwpPruneDeps)

      continue;

    for (auto &PI : I.Preds) {

      MachineInstr *PMI = PI.getSUnit()->getInstr();

      if (PMI->isPHI() && PI.getKind() == SDep::Order) {

        if (I.getInstr()->isPHI()) {

          if (PMI->getOperand(0).getReg() == HasPhiUse)

            continue;

          if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)

            continue;

        }

        RemoveDeps.push_back(PI);

      }

    }

    for (int i = 0, e = RemoveDeps.size(); i != e; ++i)

      I.removePred(RemoveDeps[i]);

  }

}


/// Iterate over each DAG node and see if we can change any dependences

/// in order to reduce the recurrence MII.

void SwingSchedulerDAG::changeDependences() {

  // See if an instruction can use a value from the previous iteration.

  // If so, we update the base and offset of the instruction and change

  // the dependences.

  for (SUnit &I : SUnits) {

    unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;

    int64_t NewOffset = 0;

    if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,

                               NewOffset))

      continue;


    // Get the MI and SUnit for the instruction that defines the original base.

    Register OrigBase = I.getInstr()->getOperand(BasePos).getReg();

    MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);

    if (!DefMI)

      continue;

    SUnit *DefSU = getSUnit(DefMI);

    if (!DefSU)

      continue;

    // Get the MI and SUnit for the instruction that defins the new base.

    MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);

    if (!LastMI)

      continue;

    SUnit *LastSU = getSUnit(LastMI);

    if (!LastSU)

      continue;


    if (Topo.IsReachable(&I, LastSU))

      continue;


    // Remove the dependence. The value now depends on a prior iteration.

    SmallVector<SDep, 4> Deps;

    for (const SDep &P : I.Preds)

      if (P.getSUnit() == DefSU)

        Deps.push_back(P);

    for (int i = 0, e = Deps.size(); i != e; i++) {

      Topo.RemovePred(&I, Deps[i].getSUnit());

      I.removePred(Deps[i]);

    }

    // Remove the chain dependence between the instructions.

    Deps.clear();

    for (auto &P : LastSU->Preds)

      if (P.getSUnit() == &I && P.getKind() == SDep::Order)

        Deps.push_back(P);

    for (int i = 0, e = Deps.size(); i != e; i++) {

      Topo.RemovePred(LastSU, Deps[i].getSUnit());

      LastSU->removePred(Deps[i]);

    }


    // Add a dependence between the new instruction and the instruction

    // that defines the new base.

    SDep Dep(&I, SDep::Anti, NewBase);

    Topo.AddPred(LastSU, &I);

    LastSU->addPred(Dep);


    // Remember the base and offset information so that we can update the

    // instruction during code generation.

    InstrChanges[&I] = std::make_pair(NewBase, NewOffset);

  }

}


/// Create an instruction stream that represents a single iteration and stage of

/// each instruction. This function differs from SMSchedule::finalizeSchedule in

/// that this doesn't have any side-effect to SwingSchedulerDAG. That is, this

/// function is an approximation of SMSchedule::finalizeSchedule with all

/// non-const operations removed.

static void computeScheduledInsts(const SwingSchedulerDAG *SSD,

                                  SMSchedule &Schedule,

                                  std::vector<MachineInstr *> &OrderedInsts,

                                  DenseMap<MachineInstr *, unsigned> &Stages) {

  DenseMap<int, std::deque<SUnit *>> Instrs;


  // Move all instructions to the first stage from the later stages.

  for (int Cycle = Schedule.getFirstCycle(); Cycle <= Schedule.getFinalCycle();

       ++Cycle) {

    for (int Stage = 0, LastStage = Schedule.getMaxStageCount();

         Stage <= LastStage; ++Stage) {

      for (SUnit *SU : llvm::reverse(Schedule.getInstructions(

               Cycle + Stage * Schedule.getInitiationInterval()))) {

        Instrs[Cycle].push_front(SU);

      }

    }

  }


  for (int Cycle = Schedule.getFirstCycle(); Cycle <= Schedule.getFinalCycle();

       ++Cycle) {

    std::deque<SUnit *> &CycleInstrs = Instrs[Cycle];

    CycleInstrs = Schedule.reorderInstructions(SSD, CycleInstrs);

    for (SUnit *SU : CycleInstrs) {

      MachineInstr *MI = SU->getInstr();

      OrderedInsts.push_back(MI);

      Stages[MI] = Schedule.stageScheduled(SU);

    }

  }

}


namespace {


// FuncUnitSorter - Comparison operator used to sort instructions by

// the number of functional unit choices.

struct FuncUnitSorter {

  const InstrItineraryData *InstrItins;

  const MCSubtargetInfo *STI;

  DenseMap<InstrStage::FuncUnits, unsigned> Resources;


  FuncUnitSorter(const TargetSubtargetInfo &TSI)

      : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}


  // Compute the number of functional unit alternatives needed

  // at each stage, and take the minimum value. We prioritize the

  // instructions by the least number of choices first.

  unsigned minFuncUnits(const MachineInstr *Inst,

                        InstrStage::FuncUnits &F) const {

    unsigned SchedClass = Inst->getDesc().getSchedClass();

    unsigned min = UINT_MAX;

    if (InstrItins && !InstrItins->isEmpty()) {

      for (const InstrStage &IS :

           make_range(InstrItins->beginStage(SchedClass),

                      InstrItins->endStage(SchedClass))) {

        InstrStage::FuncUnits funcUnits = IS.getUnits();

        unsigned numAlternatives = llvm::popcount(funcUnits);

        if (numAlternatives < min) {

          min = numAlternatives;

          F = funcUnits;

        }

      }

      return min;

    }

    if (STI && STI->getSchedModel().hasInstrSchedModel()) {

      const MCSchedClassDesc *SCDesc =

          STI->getSchedModel().getSchedClassDesc(SchedClass);

      if (!SCDesc->isValid())

        // No valid Schedule Class Desc for schedClass, should be

        // Pseudo/PostRAPseudo

        return min;


      for (const MCWriteProcResEntry &PRE :

           make_range(STI->getWriteProcResBegin(SCDesc),

                      STI->getWriteProcResEnd(SCDesc))) {

        if (!PRE.ReleaseAtCycle)

          continue;

        const MCProcResourceDesc *ProcResource =

            STI->getSchedModel().getProcResource(PRE.ProcResourceIdx);

        unsigned NumUnits = ProcResource->NumUnits;

        if (NumUnits < min) {

          min = NumUnits;

          F = PRE.ProcResourceIdx;

        }

      }

      return min;

    }

    llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");

  }


  // Compute the critical resources needed by the instruction. This

  // function records the functional units needed by instructions that

  // must use only one functional unit. We use this as a tie breaker

  // for computing the resource MII. The instrutions that require

  // the same, highly used, functional unit have high priority.

  void calcCriticalResources(MachineInstr &MI) {

    unsigned SchedClass = MI.getDesc().getSchedClass();

    if (InstrItins && !InstrItins->isEmpty()) {

      for (const InstrStage &IS :

           make_range(InstrItins->beginStage(SchedClass),

                      InstrItins->endStage(SchedClass))) {

        InstrStage::FuncUnits FuncUnits = IS.getUnits();

        if (llvm::popcount(FuncUnits) == 1)

          Resources[FuncUnits]++;

      }

      return;

    }

    if (STI && STI->getSchedModel().hasInstrSchedModel()) {

      const MCSchedClassDesc *SCDesc =

          STI->getSchedModel().getSchedClassDesc(SchedClass);

      if (!SCDesc->isValid())

        // No valid Schedule Class Desc for schedClass, should be

        // Pseudo/PostRAPseudo

        return;


      for (const MCWriteProcResEntry &PRE :

           make_range(STI->getWriteProcResBegin(SCDesc),

                      STI->getWriteProcResEnd(SCDesc))) {

        if (!PRE.ReleaseAtCycle)

          continue;

        Resources[PRE.ProcResourceIdx]++;

      }

      return;

    }

    llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");

  }


  /// Return true if IS1 has less priority than IS2.

  bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {

    InstrStage::FuncUnits F1 = 0, F2 = 0;

    unsigned MFUs1 = minFuncUnits(IS1, F1);

    unsigned MFUs2 = minFuncUnits(IS2, F2);

    if (MFUs1 == MFUs2)

      return Resources.lookup(F1) < Resources.lookup(F2);

    return MFUs1 > MFUs2;

  }

};


/// Calculate the maximum register pressure of the scheduled instructions stream

class HighRegisterPressureDetector {

  MachineBasicBlock *OrigMBB;

  const MachineFunction &MF;

  const MachineRegisterInfo &MRI;

  const TargetRegisterInfo *TRI;


  const unsigned PSetNum;


  // Indexed by PSet ID

  // InitSetPressure takes into account the register pressure of live-in

  // registers. It's not depend on how the loop is scheduled, so it's enough to

  // calculate them once at the beginning.

  std::vector<unsigned> InitSetPressure;


  // Indexed by PSet ID

  // Upper limit for each register pressure set

  std::vector<unsigned> PressureSetLimit;


  DenseMap<MachineInstr *, RegisterOperands> ROMap;


  using Instr2LastUsesTy = DenseMap<MachineInstr *, SmallDenseSet<Register, 4>>;


public:

  using OrderedInstsTy = std::vector<MachineInstr *>;

  using Instr2StageTy = DenseMap<MachineInstr *, unsigned>;


private:

  static void dumpRegisterPressures(const std::vector<unsigned> &Pressures) {

    if (Pressures.size() == 0) {

      dbgs() << "[]";

    } else {

      char Prefix = '[';

      for (unsigned P : Pressures) {

        dbgs() << Prefix << P;

        Prefix = ' ';

      }

      dbgs() << ']';

    }

  }


  void dumpPSet(Register Reg) const {

    dbgs() << "Reg=" << printReg(Reg, TRI, 0, &MRI) << " PSet=";

    for (auto PSetIter = MRI.getPressureSets(Reg); PSetIter.isValid();

         ++PSetIter) {

      dbgs() << *PSetIter << ' ';

    }

    dbgs() << '\n';

  }


  void increaseRegisterPressure(std::vector<unsigned> &Pressure,

                                Register Reg) const {

    auto PSetIter = MRI.getPressureSets(Reg);

    unsigned Weight = PSetIter.getWeight();

    for (; PSetIter.isValid(); ++PSetIter)

      Pressure[*PSetIter] += Weight;

  }


  void decreaseRegisterPressure(std::vector<unsigned> &Pressure,

                                Register Reg) const {

    auto PSetIter = MRI.getPressureSets(Reg);

    unsigned Weight = PSetIter.getWeight();

    for (; PSetIter.isValid(); ++PSetIter) {

      auto &P = Pressure[*PSetIter];

      assert(P >= Weight &&

             "register pressure must be greater than or equal weight");

      P -= Weight;

    }

  }


  // Return true if Reg is fixed one, for example, stack pointer

  bool isFixedRegister(Register Reg) const {

    return Reg.isPhysical() && TRI->isFixedRegister(MF, Reg.asMCReg());

  }


  bool isDefinedInThisLoop(Register Reg) const {

    return Reg.isVirtual() && MRI.getVRegDef(Reg)->getParent() == OrigMBB;

  }


  // Search for live-in variables. They are factored into the register pressure

  // from the begining. Live-in variables used by every iteration should be

  // considered as alive throughout the loop. For example, the variable `c` in

  // following code. \code

  //   int c = ...;

  //   for (int i = 0; i < n; i++)

  //     a[i] += b[i] + c;

  // \endcode

  void computeLiveIn() {

    DenseSet<Register> Used;

    for (auto &MI : *OrigMBB) {

      if (MI.isDebugInstr())

        continue;

      for (auto &Use : ROMap[&MI].Uses) {

        auto Reg = Use.RegUnit;

        // Ignore the variable that appears only on one side of phi instruction

        // because it's used only at the first iteration.

        if (MI.isPHI() && Reg != getLoopPhiReg(MI, OrigMBB))

          continue;

        if (isFixedRegister(Reg))

          continue;

        if (isDefinedInThisLoop(Reg))

          continue;

        Used.insert(Reg);

      }

    }


    for (auto LiveIn : Used)

      increaseRegisterPressure(InitSetPressure, LiveIn);

  }


  // Calculate the upper limit of each pressure set

  void computePressureSetLimit(const RegisterClassInfo &RCI) {

    for (unsigned PSet = 0; PSet < PSetNum; PSet++)

      PressureSetLimit[PSet] = TRI->getRegPressureSetLimit(MF, PSet);


    // We assume fixed registers, such as stack pointer, are already in use.

    // Therefore subtracting the weight of the fixed registers from the limit of

    // each pressure set in advance.

    SmallDenseSet<Register, 8> FixedRegs;

    for (const TargetRegisterClass *TRC : TRI->regclasses()) {

      for (const MCPhysReg Reg : *TRC)

        if (isFixedRegister(Reg))

          FixedRegs.insert(Reg);

    }


    LLVM_DEBUG({

      for (auto Reg : FixedRegs) {

        dbgs() << printReg(Reg, TRI, 0, &MRI) << ": [";

        const int *Sets = TRI->getRegUnitPressureSets(Reg);

        for (; *Sets != -1; Sets++) {

          dbgs() << TRI->getRegPressureSetName(*Sets) << ", ";

        }

        dbgs() << "]\n";

      }

    });


    for (auto Reg : FixedRegs) {

      LLVM_DEBUG(dbgs() << "fixed register: " << printReg(Reg, TRI, 0, &MRI)

                        << "\n");

      auto PSetIter = MRI.getPressureSets(Reg);

      unsigned Weight = PSetIter.getWeight();

      for (; PSetIter.isValid(); ++PSetIter) {

        unsigned &Limit = PressureSetLimit[*PSetIter];

        assert(Limit >= Weight &&

               "register pressure limit must be greater than or equal weight");

        Limit -= Weight;

        LLVM_DEBUG(dbgs() << "PSet=" << *PSetIter << " Limit=" << Limit

                          << " (decreased by " << Weight << ")\n");

      }

    }

  }


  // There are two patterns of last-use.

  //   - by an instruction of the current iteration

  //   - by a phi instruction of the next iteration (loop carried value)

  //

  // Furthermore, following two groups of instructions are executed

  // simultaneously

  //   - next iteration's phi instructions in i-th stage

  //   - current iteration's instructions in i+1-th stage

  //

  // This function calculates the last-use of each register while taking into

  // account the above two patterns.

  Instr2LastUsesTy computeLastUses(const OrderedInstsTy &OrderedInsts,

                                   Instr2StageTy &Stages) const {

    // We treat virtual registers that are defined and used in this loop.

    // Following virtual register will be ignored

    //   - live-in one

    //   - defined but not used in the loop (potentially live-out)

    DenseSet<Register> TargetRegs;

    const auto UpdateTargetRegs = [this, &TargetRegs](Register Reg) {

      if (isDefinedInThisLoop(Reg))

        TargetRegs.insert(Reg);

    };

    for (MachineInstr *MI : OrderedInsts) {

      if (MI->isPHI()) {

        Register Reg = getLoopPhiReg(*MI, OrigMBB);

        UpdateTargetRegs(Reg);

      } else {

        for (auto &Use : ROMap.find(MI)->getSecond().Uses)

          UpdateTargetRegs(Use.RegUnit);

      }

    }


    const auto InstrScore = [&Stages](MachineInstr *MI) {

      return Stages[MI] + MI->isPHI();

    };


    DenseMap<Register, MachineInstr *> LastUseMI;

    for (MachineInstr *MI : llvm::reverse(OrderedInsts)) {

      for (auto &Use : ROMap.find(MI)->getSecond().Uses) {

        auto Reg = Use.RegUnit;

        if (!TargetRegs.contains(Reg))

          continue;

        auto Ite = LastUseMI.find(Reg);

        if (Ite == LastUseMI.end()) {

          LastUseMI[Reg] = MI;

        } else {

          MachineInstr *Orig = Ite->second;

          MachineInstr *New = MI;

          if (InstrScore(Orig) < InstrScore(New))

            LastUseMI[Reg] = New;

        }

      }

    }


    Instr2LastUsesTy LastUses;

    for (auto &Entry : LastUseMI)

      LastUses[Entry.second].insert(Entry.first);

    return LastUses;

  }


  // Compute the maximum register pressure of the kernel. We'll simulate #Stage

  // iterations and check the register pressure at the point where all stages

  // overlapping.

  //

  // An example of unrolled loop where #Stage is 4..

  // Iter   i+0 i+1 i+2 i+3

  // ------------------------

  // Stage   0

  // Stage   1   0

  // Stage   2   1   0

  // Stage   3   2   1   0  <- All stages overlap

  //

  std::vector<unsigned>

  computeMaxSetPressure(const OrderedInstsTy &OrderedInsts,

                        Instr2StageTy &Stages,

                        const unsigned StageCount) const {

    using RegSetTy = SmallDenseSet<Register, 16>;


    // Indexed by #Iter. To treat "local" variables of each stage separately, we

    // manage the liveness of the registers independently by iterations.

    SmallVector<RegSetTy> LiveRegSets(StageCount);


    auto CurSetPressure = InitSetPressure;

    auto MaxSetPressure = InitSetPressure;

    auto LastUses = computeLastUses(OrderedInsts, Stages);


    LLVM_DEBUG({

      dbgs() << "Ordered instructions:\n";

      for (MachineInstr *MI : OrderedInsts) {

        dbgs() << "Stage " << Stages[MI] << ": ";

        MI->dump();

      }

    });


    const auto InsertReg = [this, &CurSetPressure](RegSetTy &RegSet,

                                                   Register Reg) {

      if (!Reg.isValid() || isFixedRegister(Reg))

        return;


      bool Inserted = RegSet.insert(Reg).second;

      if (!Inserted)

        return;


      LLVM_DEBUG(dbgs() << "insert " << printReg(Reg, TRI, 0, &MRI) << "\n");

      increaseRegisterPressure(CurSetPressure, Reg);

      LLVM_DEBUG(dumpPSet(Reg));

    };


    const auto EraseReg = [this, &CurSetPressure](RegSetTy &RegSet,

                                                  Register Reg) {

      if (!Reg.isValid() || isFixedRegister(Reg))

        return;


      // live-in register

      if (!RegSet.contains(Reg))

        return;


      LLVM_DEBUG(dbgs() << "erase " << printReg(Reg, TRI, 0, &MRI) << "\n");

      RegSet.erase(Reg);

      decreaseRegisterPressure(CurSetPressure, Reg);

      LLVM_DEBUG(dumpPSet(Reg));

    };


    for (unsigned I = 0; I < StageCount; I++) {

      for (MachineInstr *MI : OrderedInsts) {

        const auto Stage = Stages[MI];

        if (I < Stage)

          continue;


        const unsigned Iter = I - Stage;


        for (auto &Def : ROMap.find(MI)->getSecond().Defs)

          InsertReg(LiveRegSets[Iter], Def.RegUnit);


        for (auto LastUse : LastUses[MI]) {

          if (MI->isPHI()) {

            if (Iter != 0)

              EraseReg(LiveRegSets[Iter - 1], LastUse);

          } else {

            EraseReg(LiveRegSets[Iter], LastUse);

          }

        }


        for (unsigned PSet = 0; PSet < PSetNum; PSet++)

          MaxSetPressure[PSet] =

              std::max(MaxSetPressure[PSet], CurSetPressure[PSet]);


        LLVM_DEBUG({

          dbgs() << "CurSetPressure=";

          dumpRegisterPressures(CurSetPressure);

          dbgs() << " iter=" << Iter << " stage=" << Stage << ":";

          MI->dump();

        });

      }

    }


    return MaxSetPressure;

  }


public:

  HighRegisterPressureDetector(MachineBasicBlock *OrigMBB,

                               const MachineFunction &MF)

      : OrigMBB(OrigMBB), MF(MF), MRI(MF.getRegInfo()),

        TRI(MF.getSubtarget().getRegisterInfo()),

        PSetNum(TRI->getNumRegPressureSets()), InitSetPressure(PSetNum, 0),

        PressureSetLimit(PSetNum, 0) {}


  // Used to calculate register pressure, which is independent of loop

  // scheduling.

  void init(const RegisterClassInfo &RCI) {

    for (MachineInstr &MI : *OrigMBB) {

      if (MI.isDebugInstr())

        continue;

      ROMap[&MI].collect(MI, *TRI, MRI, false, true);

    }


    computeLiveIn();

    computePressureSetLimit(RCI);

  }


  // Calculate the maximum register pressures of the loop and check if they

  // exceed the limit

  bool detect(const SwingSchedulerDAG *SSD, SMSchedule &Schedule,

              const unsigned MaxStage) const {

    assert(0 <= RegPressureMargin && RegPressureMargin <= 100 &&

           "the percentage of the margin must be between 0 to 100");


    OrderedInstsTy OrderedInsts;

    Instr2StageTy Stages;

    computeScheduledInsts(SSD, Schedule, OrderedInsts, Stages);

    const auto MaxSetPressure =

        computeMaxSetPressure(OrderedInsts, Stages, MaxStage + 1);


    LLVM_DEBUG({

      dbgs() << "Dump MaxSetPressure:\n";

      for (unsigned I = 0; I < MaxSetPressure.size(); I++) {

        dbgs() << format("MaxSetPressure[%d]=%d\n", I, MaxSetPressure[I]);

      }

      dbgs() << '\n';

    });


    for (unsigned PSet = 0; PSet < PSetNum; PSet++) {

      unsigned Limit = PressureSetLimit[PSet];

      unsigned Margin = Limit * RegPressureMargin / 100;

      LLVM_DEBUG(dbgs() << "PSet=" << PSet << " Limit=" << Limit

                        << " Margin=" << Margin << "\n");

      if (Limit < MaxSetPressure[PSet] + Margin) {

        LLVM_DEBUG(

            dbgs()

            << "Rejected the schedule because of too high register pressure\n");

        return true;

      }

    }

    return false;

  }

};


} // end anonymous namespace


/// Calculate the resource constrained minimum initiation interval for the

/// specified loop. We use the DFA to model the resources needed for

/// each instruction, and we ignore dependences. A different DFA is created

/// for each cycle that is required. When adding a new instruction, we attempt

/// to add it to each existing DFA, until a legal space is found. If the

/// instruction cannot be reserved in an existing DFA, we create a new one.

unsigned SwingSchedulerDAG::calculateResMII() {

  LLVM_DEBUG(dbgs() << "calculateResMII:\n");

  ResourceManager RM(&MF.getSubtarget(), this);

  return RM.calculateResMII();

}


/// Calculate the recurrence-constrainted minimum initiation interval.

/// Iterate over each circuit.  Compute the delay(c) and distance(c)

/// for each circuit. The II needs to satisfy the inequality

/// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest

/// II that satisfies the inequality, and the RecMII is the maximum

/// of those values.

unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {

  unsigned RecMII = 0;


  for (NodeSet &Nodes : NodeSets) {

    if (Nodes.empty())

      continue;


    unsigned Delay = Nodes.getLatency();

    unsigned Distance = 1;


    // ii = ceil(delay / distance)

    unsigned CurMII = (Delay + Distance - 1) / Distance;

    Nodes.setRecMII(CurMII);

    if (CurMII > RecMII)

      RecMII = CurMII;

  }


  return RecMII;

}


/// Swap all the anti dependences in the DAG. That means it is no longer a DAG,

/// but we do this to find the circuits, and then change them back.

static void swapAntiDependences(std::vector<SUnit> &SUnits) {

  SmallVector<std::pair<SUnit *, SDep>, 8> DepsAdded;

  for (SUnit &SU : SUnits) {

    for (SDep &Pred : SU.Preds)

      if (Pred.getKind() == SDep::Anti)

        DepsAdded.push_back(std::make_pair(&SU, Pred));

  }

  for (std::pair<SUnit *, SDep> &P : DepsAdded) {

    // Remove this anti dependency and add one in the reverse direction.

    SUnit *SU = P.first;

    SDep &D = P.second;

    SUnit *TargetSU = D.getSUnit();

    unsigned Reg = D.getReg();

    unsigned Lat = D.getLatency();

    SU->removePred(D);

    SDep Dep(SU, SDep::Anti, Reg);

    Dep.setLatency(Lat);

    TargetSU->addPred(Dep);

  }

}


/// Create the adjacency structure of the nodes in the graph.

void SwingSchedulerDAG::Circuits::createAdjacencyStructure(

    SwingSchedulerDAG *DAG) {

  BitVector Added(SUnits.size());

  DenseMap<int, int> OutputDeps;

  for (int i = 0, e = SUnits.size(); i != e; ++i) {

    Added.reset();

    // Add any successor to the adjacency matrix and exclude duplicates.

    for (auto &SI : SUnits[i].Succs) {

      // Only create a back-edge on the first and last nodes of a dependence

      // chain. This records any chains and adds them later.

      if (SI.getKind() == SDep::Output) {

        int N = SI.getSUnit()->NodeNum;

        int BackEdge = i;

        auto Dep = OutputDeps.find(BackEdge);

        if (Dep != OutputDeps.end()) {

          BackEdge = Dep->second;

          OutputDeps.erase(Dep);

        }

        OutputDeps[N] = BackEdge;

      }

      // Do not process a boundary node, an artificial node.

      // A back-edge is processed only if it goes to a Phi.

      if (SI.getSUnit()->isBoundaryNode() || SI.isArtificial() ||

          (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))

        continue;

      int N = SI.getSUnit()->NodeNum;

      if (!Added.test(N)) {

        AdjK[i].push_back(N);

        Added.set(N);

      }

    }

    // A chain edge between a store and a load is treated as a back-edge in the

    // adjacency matrix.

    for (auto &PI : SUnits[i].Preds) {

      if (!SUnits[i].getInstr()->mayStore() ||

          !DAG->isLoopCarriedDep(&SUnits[i], PI, false))

        continue;

      if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {

        int N = PI.getSUnit()->NodeNum;

        if (!Added.test(N)) {

          AdjK[i].push_back(N);

          Added.set(N);

        }

      }

    }

  }

  // Add back-edges in the adjacency matrix for the output dependences.

  for (auto &OD : OutputDeps)

    if (!Added.test(OD.second)) {

      AdjK[OD.first].push_back(OD.second);

      Added.set(OD.second);

    }

}


/// Identify an elementary circuit in the dependence graph starting at the

/// specified node.

bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,

                                          bool HasBackedge) {

  SUnit *SV = &SUnits[V];

  bool F = false;

  Stack.insert(SV);

  Blocked.set(V);


  for (auto W : AdjK[V]) {

    if (NumPaths > MaxPaths)

      break;

    if (W < S)

      continue;

    if (W == S) {

      if (!HasBackedge)

        NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));

      F = true;

      ++NumPaths;

      break;

    } else if (!Blocked.test(W)) {

      if (circuit(W, S, NodeSets,

                  Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))

        F = true;

    }

  }


  if (F)

    unblock(V);

  else {

    for (auto W : AdjK[V]) {

      if (W < S)

        continue;

      B[W].insert(SV);

    }

  }

  Stack.pop_back();

  return F;

}


/// Unblock a node in the circuit finding algorithm.

void SwingSchedulerDAG::Circuits::unblock(int U) {

  Blocked.reset(U);

  SmallPtrSet<SUnit *, 4> &BU = B[U];

  while (!BU.empty()) {

    SmallPtrSet<SUnit *, 4>::iterator SI = BU.begin();

    assert(SI != BU.end() && "Invalid B set.");

    SUnit *W = *SI;

    BU.erase(W);

    if (Blocked.test(W->NodeNum))

      unblock(W->NodeNum);

  }

}


/// Identify all the elementary circuits in the dependence graph using

/// Johnson's circuit algorithm.

void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {

  // Swap all the anti dependences in the DAG. That means it is no longer a DAG,

  // but we do this to find the circuits, and then change them back.

  swapAntiDependences(SUnits);


  Circuits Cir(SUnits, Topo);

  // Create the adjacency structure.

  Cir.createAdjacencyStructure(this);

  for (int i = 0, e = SUnits.size(); i != e; ++i) {

    Cir.reset();

    Cir.circuit(i, i, NodeSets);

  }


  // Change the dependences back so that we've created a DAG again.

  swapAntiDependences(SUnits);

}


// Create artificial dependencies between the source of COPY/REG_SEQUENCE that

// is loop-carried to the USE in next iteration. This will help pipeliner avoid

// additional copies that are needed across iterations. An artificial dependence

// edge is added from USE to SOURCE of COPY/REG_SEQUENCE.


// PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried)

// SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE

// PHI-------True-Dep------> USEOfPhi


// The mutation creates

// USEOfPHI -------Artificial-Dep---> SRCOfCopy


// This overall will ensure, the USEOfPHI is scheduled before SRCOfCopy

// (since USE is a predecessor), implies, the COPY/ REG_SEQUENCE is scheduled

// late  to avoid additional copies across iterations. The possible scheduling

// order would be

// USEOfPHI --- SRCOfCopy---  COPY/REG_SEQUENCE.


void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {

  for (SUnit &SU : DAG->SUnits) {

    // Find the COPY/REG_SEQUENCE instruction.

    if (!SU.getInstr()->isCopy() && !SU.getInstr()->isRegSequence())

      continue;


    // Record the loop carried PHIs.

    SmallVector<SUnit *, 4> PHISUs;

    // Record the SrcSUs that feed the COPY/REG_SEQUENCE instructions.

    SmallVector<SUnit *, 4> SrcSUs;


    for (auto &Dep : SU.Preds) {

      SUnit *TmpSU = Dep.getSUnit();

      MachineInstr *TmpMI = TmpSU->getInstr();

      SDep::Kind DepKind = Dep.getKind();

      // Save the loop carried PHI.

      if (DepKind == SDep::Anti && TmpMI->isPHI())

        PHISUs.push_back(TmpSU);

      // Save the source of COPY/REG_SEQUENCE.

      // If the source has no pre-decessors, we will end up creating cycles.

      else if (DepKind == SDep::Data && !TmpMI->isPHI() && TmpSU->NumPreds > 0)

        SrcSUs.push_back(TmpSU);

    }


    if (PHISUs.size() == 0 || SrcSUs.size() == 0)

      continue;


    // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this

    // SUnit to the container.

    SmallVector<SUnit *, 8> UseSUs;

    // Do not use iterator based loop here as we are updating the container.

    for (size_t Index = 0; Index < PHISUs.size(); ++Index) {

      for (auto &Dep : PHISUs[Index]->Succs) {

        if (Dep.getKind() != SDep::Data)

          continue;


        SUnit *TmpSU = Dep.getSUnit();

        MachineInstr *TmpMI = TmpSU->getInstr();

        if (TmpMI->isPHI() || TmpMI->isRegSequence()) {

          PHISUs.push_back(TmpSU);

          continue;

        }

        UseSUs.push_back(TmpSU);

      }

    }


    if (UseSUs.size() == 0)

      continue;


    SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG);

    // Add the artificial dependencies if it does not form a cycle.

    for (auto *I : UseSUs) {

      for (auto *Src : SrcSUs) {

        if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {

          Src->addPred(SDep(I, SDep::Artificial));

          SDAG->Topo.AddPred(Src, I);

        }

      }

    }

  }

}


/// Return true for DAG nodes that we ignore when computing the cost functions.

/// We ignore the back-edge recurrence in order to avoid unbounded recursion

/// in the calculation of the ASAP, ALAP, etc functions.

static bool ignoreDependence(const SDep &D, bool isPred) {

  if (D.isArtificial() || D.getSUnit()->isBoundaryNode())

    return true;

  return D.getKind() == SDep::Anti && isPred;

}


/// Compute several functions need to order the nodes for scheduling.

///  ASAP - Earliest time to schedule a node.

///  ALAP - Latest time to schedule a node.

///  MOV - Mobility function, difference between ALAP and ASAP.

///  D - Depth of each node.

///  H - Height of each node.

void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {

  ScheduleInfo.resize(SUnits.size());


  LLVM_DEBUG({

    for (int I : Topo) {

      const SUnit &SU = SUnits[I];

      dumpNode(SU);

    }

  });


  int maxASAP = 0;

  // Compute ASAP and ZeroLatencyDepth.

  for (int I : Topo) {

    int asap = 0;

    int zeroLatencyDepth = 0;

    SUnit *SU = &SUnits[I];

    for (const SDep &P : SU->Preds) {

      SUnit *pred = P.getSUnit();

      if (P.getLatency() == 0)

        zeroLatencyDepth =

            std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);

      if (ignoreDependence(P, true))

        continue;

      asap = std::max(asap, (int)(getASAP(pred) + P.getLatency() -

                                  getDistance(pred, SU, P) * MII));

    }

    maxASAP = std::max(maxASAP, asap);

    ScheduleInfo[I].ASAP = asap;

    ScheduleInfo[I].ZeroLatencyDepth = zeroLatencyDepth;

  }


  // Compute ALAP, ZeroLatencyHeight, and MOV.

  for (int I : llvm::reverse(Topo)) {

    int alap = maxASAP;

    int zeroLatencyHeight = 0;

    SUnit *SU = &SUnits[I];

    for (const SDep &S : SU->Succs) {

      SUnit *succ = S.getSUnit();

      if (succ->isBoundaryNode())

        continue;

      if (S.getLatency() == 0)

        zeroLatencyHeight =

            std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);

      if (ignoreDependence(S, true))

        continue;

      alap = std::min(alap, (int)(getALAP(succ) - S.getLatency() +

                                  getDistance(SU, succ, S) * MII));

    }


    ScheduleInfo[I].ALAP = alap;

    ScheduleInfo[I].ZeroLatencyHeight = zeroLatencyHeight;

  }


  // After computing the node functions, compute the summary for each node set.

  for (NodeSet &I : NodeSets)

    I.computeNodeSetInfo(this);


  LLVM_DEBUG({

    for (unsigned i = 0; i < SUnits.size(); i++) {

      dbgs() << "\tNode " << i << ":\n";

      dbgs() << "\t   ASAP = " << getASAP(&SUnits[i]) << "\n";

      dbgs() << "\t   ALAP = " << getALAP(&SUnits[i]) << "\n";

      dbgs() << "\t   MOV  = " << getMOV(&SUnits[i]) << "\n";

      dbgs() << "\t   D    = " << getDepth(&SUnits[i]) << "\n";

      dbgs() << "\t   H    = " << getHeight(&SUnits[i]) << "\n";

      dbgs() << "\t   ZLD  = " << getZeroLatencyDepth(&SUnits[i]) << "\n";

      dbgs() << "\t   ZLH  = " << getZeroLatencyHeight(&SUnits[i]) << "\n";

    }

  });

}


/// Compute the Pred_L(O) set, as defined in the paper. The set is defined

/// as the predecessors of the elements of NodeOrder that are not also in

/// NodeOrder.

static bool pred_L(SetVector<SUnit *> &NodeOrder,

                   SmallSetVector<SUnit *, 8> &Preds,

                   const NodeSet *S = nullptr) {

  Preds.clear();

  for (const SUnit *SU : NodeOrder) {

    for (const SDep &Pred : SU->Preds) {

      if (S && S->count(Pred.getSUnit()) == 0)

        continue;

      if (ignoreDependence(Pred, true))

        continue;

      if (NodeOrder.count(Pred.getSUnit()) == 0)

        Preds.insert(Pred.getSUnit());

    }

    // Back-edges are predecessors with an anti-dependence.

    for (const SDep &Succ : SU->Succs) {

      if (Succ.getKind() != SDep::Anti)

        continue;

      if (S && S->count(Succ.getSUnit()) == 0)

        continue;

      if (NodeOrder.count(Succ.getSUnit()) == 0)

        Preds.insert(Succ.getSUnit());

    }

  }

  return !Preds.empty();

}


/// Compute the Succ_L(O) set, as defined in the paper. The set is defined

/// as the successors of the elements of NodeOrder that are not also in

/// NodeOrder.

static bool succ_L(SetVector<SUnit *> &NodeOrder,

                   SmallSetVector<SUnit *, 8> &Succs,

                   const NodeSet *S = nullptr) {

  Succs.clear();

  for (const SUnit *SU : NodeOrder) {

    for (const SDep &Succ : SU->Succs) {

      if (S && S->count(Succ.getSUnit()) == 0)

        continue;

      if (ignoreDependence(Succ, false))

        continue;

      if (NodeOrder.count(Succ.getSUnit()) == 0)

        Succs.insert(Succ.getSUnit());

    }

    for (const SDep &Pred : SU->Preds) {

      if (Pred.getKind() != SDep::Anti)

        continue;

      if (S && S->count(Pred.getSUnit()) == 0)

        continue;

      if (NodeOrder.count(Pred.getSUnit()) == 0)

        Succs.insert(Pred.getSUnit());

    }

  }

  return !Succs.empty();

}


/// Return true if there is a path from the specified node to any of the nodes

/// in DestNodes. Keep track and return the nodes in any path.

static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,

                        SetVector<SUnit *> &DestNodes,

                        SetVector<SUnit *> &Exclude,

                        SmallPtrSet<SUnit *, 8> &Visited) {

  if (Cur->isBoundaryNode())

    return false;

  if (Exclude.contains(Cur))

    return false;

  if (DestNodes.contains(Cur))

    return true;

  if (!Visited.insert(Cur).second)

    return Path.contains(Cur);

  bool FoundPath = false;

  for (auto &SI : Cur->Succs)

    if (!ignoreDependence(SI, false))

      FoundPath |=

          computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);

  for (auto &PI : Cur->Preds)

    if (PI.getKind() == SDep::Anti)

      FoundPath |=

          computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);

  if (FoundPath)

    Path.insert(Cur);

  return FoundPath;

}


/// Compute the live-out registers for the instructions in a node-set.

/// The live-out registers are those that are defined in the node-set,

/// but not used. Except for use operands of Phis.

static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker,

                            NodeSet &NS) {

  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();

  MachineRegisterInfo &MRI = MF.getRegInfo();

  SmallVector<RegisterMaskPair, 8> LiveOutRegs;

  SmallSet<unsigned, 4> Uses;

  for (SUnit *SU : NS) {

    const MachineInstr *MI = SU->getInstr();

    if (MI->isPHI())

      continue;

    for (const MachineOperand &MO : MI->all_uses()) {

      Register Reg = MO.getReg();

      if (Reg.isVirtual())

        Uses.insert(Reg);

      else if (MRI.isAllocatable(Reg))

        for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))

          Uses.insert(Unit);

    }

  }

  for (SUnit *SU : NS)

    for (const MachineOperand &MO : SU->getInstr()->all_defs())

      if (!MO.isDead()) {

        Register Reg = MO.getReg();

        if (Reg.isVirtual()) {

          if (!Uses.count(Reg))

            LiveOutRegs.push_back(RegisterMaskPair(Reg,

                                                   LaneBitmask::getNone()));

        } else if (MRI.isAllocatable(Reg)) {

          for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))

            if (!Uses.count(Unit))

              LiveOutRegs.push_back(

                  RegisterMaskPair(Unit, LaneBitmask::getNone()));

        }

      }

  RPTracker.addLiveRegs(LiveOutRegs);

}


/// A heuristic to filter nodes in recurrent node-sets if the register

/// pressure of a set is too high.

void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {

  for (auto &NS : NodeSets) {

    // Skip small node-sets since they won't cause register pressure problems.

    if (NS.size() <= 2)

      continue;

    IntervalPressure RecRegPressure;

    RegPressureTracker RecRPTracker(RecRegPressure);

    RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);

    computeLiveOuts(MF, RecRPTracker, NS);

    RecRPTracker.closeBottom();


    std::vector<SUnit *> SUnits(NS.begin(), NS.end());

    llvm::sort(SUnits, [](const SUnit *A, const SUnit *B) {

      return A->NodeNum > B->NodeNum;

    });


    for (auto &SU : SUnits) {

      // Since we're computing the register pressure for a subset of the

      // instructions in a block, we need to set the tracker for each

      // instruction in the node-set. The tracker is set to the instruction

      // just after the one we're interested in.

      MachineBasicBlock::const_iterator CurInstI = SU->getInstr();

      RecRPTracker.setPos(std::next(CurInstI));


      RegPressureDelta RPDelta;

      ArrayRef<PressureChange> CriticalPSets;

      RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,

                                             CriticalPSets,

                                             RecRegPressure.MaxSetPressure);

      if (RPDelta.Excess.isValid()) {

        LLVM_DEBUG(

            dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "

                   << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())

                   << ":" << RPDelta.Excess.getUnitInc() << "\n");

        NS.setExceedPressure(SU);

        break;

      }

      RecRPTracker.recede();

    }

  }

}


/// A heuristic to colocate node sets that have the same set of

/// successors.

void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {

  unsigned Colocate = 0;

  for (int i = 0, e = NodeSets.size(); i < e; ++i) {

    NodeSet &N1 = NodeSets[i];

    SmallSetVector<SUnit *, 8> S1;

    if (N1.empty() || !succ_L(N1, S1))

      continue;

    for (int j = i + 1; j < e; ++j) {

      NodeSet &N2 = NodeSets[j];

      if (N1.compareRecMII(N2) != 0)

        continue;

      SmallSetVector<SUnit *, 8> S2;

      if (N2.empty() || !succ_L(N2, S2))

        continue;

      if (llvm::set_is_subset(S1, S2) && S1.size() == S2.size()) {

        N1.setColocate(++Colocate);

        N2.setColocate(Colocate);

        break;

      }

    }

  }

}


/// Check if the existing node-sets are profitable. If not, then ignore the

/// recurrent node-sets, and attempt to schedule all nodes together. This is

/// a heuristic. If the MII is large and all the recurrent node-sets are small,

/// then it's best to try to schedule all instructions together instead of

/// starting with the recurrent node-sets.

void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {

  // Look for loops with a large MII.

  if (MII < 17)

    return;

  // Check if the node-set contains only a simple add recurrence.

  for (auto &NS : NodeSets) {

    if (NS.getRecMII() > 2)

      return;

    if (NS.getMaxDepth() > MII)

      return;

  }

  NodeSets.clear();

  LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");

}


/// Add the nodes that do not belong to a recurrence set into groups

/// based upon connected components.

void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {

  SetVector<SUnit *> NodesAdded;

  SmallPtrSet<SUnit *, 8> Visited;

  // Add the nodes that are on a path between the previous node sets and

  // the current node set.

  for (NodeSet &I : NodeSets) {

    SmallSetVector<SUnit *, 8> N;

    // Add the nodes from the current node set to the previous node set.

    if (succ_L(I, N)) {

      SetVector<SUnit *> Path;

      for (SUnit *NI : N) {

        Visited.clear();

        computePath(NI, Path, NodesAdded, I, Visited);

      }

      if (!Path.empty())

        I.insert(Path.begin(), Path.end());

    }

    // Add the nodes from the previous node set to the current node set.

    N.clear();

    if (succ_L(NodesAdded, N)) {

      SetVector<SUnit *> Path;

      for (SUnit *NI : N) {

        Visited.clear();

        computePath(NI, Path, I, NodesAdded, Visited);

      }

      if (!Path.empty())

        I.insert(Path.begin(), Path.end());

    }

    NodesAdded.insert(I.begin(), I.end());

  }


  // Create a new node set with the connected nodes of any successor of a node

  // in a recurrent set.

  NodeSet NewSet;

  SmallSetVector<SUnit *, 8> N;

  if (succ_L(NodesAdded, N))

    for (SUnit *I : N)

      addConnectedNodes(I, NewSet, NodesAdded);

  if (!NewSet.empty())

    NodeSets.push_back(NewSet);


  // Create a new node set with the connected nodes of any predecessor of a node

  // in a recurrent set.

  NewSet.clear();

  if (pred_L(NodesAdded, N))

    for (SUnit *I : N)

      addConnectedNodes(I, NewSet, NodesAdded);

  if (!NewSet.empty())

    NodeSets.push_back(NewSet);


  // Create new nodes sets with the connected nodes any remaining node that

  // has no predecessor.

  for (SUnit &SU : SUnits) {

    if (NodesAdded.count(&SU) == 0) {

      NewSet.clear();

      addConnectedNodes(&SU, NewSet, NodesAdded);

      if (!NewSet.empty())

        NodeSets.push_back(NewSet);

    }

  }

}


/// Add the node to the set, and add all of its connected nodes to the set.

void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,

                                          SetVector<SUnit *> &NodesAdded) {

  NewSet.insert(SU);

  NodesAdded.insert(SU);

  for (auto &SI : SU->Succs) {

    SUnit *Successor = SI.getSUnit();

    if (!SI.isArtificial() && !Successor->isBoundaryNode() &&

        NodesAdded.count(Successor) == 0)

      addConnectedNodes(Successor, NewSet, NodesAdded);

  }

  for (auto &PI : SU->Preds) {

    SUnit *Predecessor = PI.getSUnit();

    if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)

      addConnectedNodes(Predecessor, NewSet, NodesAdded);

  }

}


/// Return true if Set1 contains elements in Set2. The elements in common

/// are returned in a different container.

static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,

                        SmallSetVector<SUnit *, 8> &Result) {

  Result.clear();

  for (SUnit *SU : Set1) {

    if (Set2.count(SU) != 0)

      Result.insert(SU);

  }

  return !Result.empty();

}


/// Merge the recurrence node sets that have the same initial node.

void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {

  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;

       ++I) {

    NodeSet &NI = *I;

    for (NodeSetType::iterator J = I + 1; J != E;) {

      NodeSet &NJ = *J;

      if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {

        if (NJ.compareRecMII(NI) > 0)

          NI.setRecMII(NJ.getRecMII());

        for (SUnit *SU : *J)

          I->insert(SU);

        NodeSets.erase(J);

        E = NodeSets.end();

      } else {

        ++J;

      }

    }

  }

}


/// Remove nodes that have been scheduled in previous NodeSets.

void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {

  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;

       ++I)

    for (NodeSetType::iterator J = I + 1; J != E;) {

      J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });


      if (J->empty()) {

        NodeSets.erase(J);

        E = NodeSets.end();

      } else {

        ++J;

      }

    }

}


/// Compute an ordered list of the dependence graph nodes, which

/// indicates the order that the nodes will be scheduled.  This is a

/// two-level algorithm. First, a partial order is created, which

/// consists of a list of sets ordered from highest to lowest priority.

void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {

  SmallSetVector<SUnit *, 8> R;

  NodeOrder.clear();


  for (auto &Nodes : NodeSets) {

    LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");

    OrderKind Order;

    SmallSetVector<SUnit *, 8> N;

    if (pred_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) {

      R.insert(N.begin(), N.end());

      Order = BottomUp;

      LLVM_DEBUG(dbgs() << "  Bottom up (preds) ");

    } else if (succ_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) {

      R.insert(N.begin(), N.end());

      Order = TopDown;

      LLVM_DEBUG(dbgs() << "  Top down (succs) ");

    } else if (isIntersect(N, Nodes, R)) {

      // If some of the successors are in the existing node-set, then use the

      // top-down ordering.

      Order = TopDown;

      LLVM_DEBUG(dbgs() << "  Top down (intersect) ");

    } else if (NodeSets.size() == 1) {

      for (const auto &N : Nodes)

        if (N->Succs.size() == 0)

          R.insert(N);

      Order = BottomUp;

      LLVM_DEBUG(dbgs() << "  Bottom up (all) ");

    } else {

      // Find the node with the highest ASAP.

      SUnit *maxASAP = nullptr;

      for (SUnit *SU : Nodes) {

        if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) ||

            (getASAP(SU) == getASAP(maxASAP) && SU->NodeNum > maxASAP->NodeNum))

          maxASAP = SU;

      }

      R.insert(maxASAP);

      Order = BottomUp;

      LLVM_DEBUG(dbgs() << "  Bottom up (default) ");

    }


    while (!R.empty()) {

      if (Order == TopDown) {

        // Choose the node with the maximum height.  If more than one, choose

        // the node wiTH the maximum ZeroLatencyHeight. If still more than one,

        // choose the node with the lowest MOV.

        while (!R.empty()) {

          SUnit *maxHeight = nullptr;

          for (SUnit *I : R) {

            if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))

              maxHeight = I;

            else if (getHeight(I) == getHeight(maxHeight) &&

                     getZeroLatencyHeight(I) > getZeroLatencyHeight(maxHeight))

              maxHeight = I;

            else if (getHeight(I) == getHeight(maxHeight) &&

                     getZeroLatencyHeight(I) ==

                         getZeroLatencyHeight(maxHeight) &&

                     getMOV(I) < getMOV(maxHeight))

              maxHeight = I;

          }

          NodeOrder.insert(maxHeight);

          LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " ");

          R.remove(maxHeight);

          for (const auto &I : maxHeight->Succs) {

            if (Nodes.count(I.getSUnit()) == 0)

              continue;

            if (NodeOrder.contains(I.getSUnit()))

              continue;

            if (ignoreDependence(I, false))

              continue;

            R.insert(I.getSUnit());

          }

          // Back-edges are predecessors with an anti-dependence.

          for (const auto &I : maxHeight->Preds) {

            if (I.getKind() != SDep::Anti)

              continue;

            if (Nodes.count(I.getSUnit()) == 0)

              continue;

            if (NodeOrder.contains(I.getSUnit()))

              continue;

            R.insert(I.getSUnit());

          }

        }

        Order = BottomUp;

        LLVM_DEBUG(dbgs() << "\n   Switching order to bottom up ");

        SmallSetVector<SUnit *, 8> N;

        if (pred_L(NodeOrder, N, &Nodes))

          R.insert(N.begin(), N.end());

      } else {

        // Choose the node with the maximum depth.  If more than one, choose

        // the node with the maximum ZeroLatencyDepth. If still more than one,

        // choose the node with the lowest MOV.

        while (!R.empty()) {

          SUnit *maxDepth = nullptr;

          for (SUnit *I : R) {

            if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))

              maxDepth = I;

            else if (getDepth(I) == getDepth(maxDepth) &&

                     getZeroLatencyDepth(I) > getZeroLatencyDepth(maxDepth))

              maxDepth = I;

            else if (getDepth(I) == getDepth(maxDepth) &&

                     getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth) &&

                     getMOV(I) < getMOV(maxDepth))

              maxDepth = I;

          }

          NodeOrder.insert(maxDepth);

          LLVM_DEBUG(dbgs() << maxDepth->NodeNum << " ");

          R.remove(maxDepth);

          if (Nodes.isExceedSU(maxDepth)) {

            Order = TopDown;

            R.clear();

            R.insert(Nodes.getNode(0));

            break;

          }

          for (const auto &I : maxDepth->Preds) {

            if (Nodes.count(I.getSUnit()) == 0)

              continue;

            if (NodeOrder.contains(I.getSUnit()))

              continue;

            R.insert(I.getSUnit());

          }

          // Back-edges are predecessors with an anti-dependence.

          for (const auto &I : maxDepth->Succs) {

            if (I.getKind() != SDep::Anti)

              continue;

            if (Nodes.count(I.getSUnit()) == 0)

              continue;

            if (NodeOrder.contains(I.getSUnit()))

              continue;

            R.insert(I.getSUnit());

          }

        }

        Order = TopDown;

        LLVM_DEBUG(dbgs() << "\n   Switching order to top down ");

        SmallSetVector<SUnit *, 8> N;

        if (succ_L(NodeOrder, N, &Nodes))

          R.insert(N.begin(), N.end());

      }

    }

    LLVM_DEBUG(dbgs() << "\nDone with Nodeset\n");

  }


  LLVM_DEBUG({

    dbgs() << "Node order: ";

    for (SUnit *I : NodeOrder)

      dbgs() << " " << I->NodeNum << " ";

    dbgs() << "\n";

  });

}


/// Process the nodes in the computed order and create the pipelined schedule

/// of the instructions, if possible. Return true if a schedule is found.

bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {


  if (NodeOrder.empty()){

    LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" );

    return false;

  }


  bool scheduleFound = false;

  std::unique_ptr<HighRegisterPressureDetector> HRPDetector;

  if (LimitRegPressure) {

    HRPDetector =

        std::make_unique<HighRegisterPressureDetector>(Loop.getHeader(), MF);

    HRPDetector->init(RegClassInfo);

  }

  // Keep increasing II until a valid schedule is found.

  for (unsigned II = MII; II <= MAX_II && !scheduleFound; ++II) {

    Schedule.reset();

    Schedule.setInitiationInterval(II);

    LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");


    SetVector<SUnit *>::iterator NI = NodeOrder.begin();

    SetVector<SUnit *>::iterator NE = NodeOrder.end();

    do {

      SUnit *SU = *NI;


      // Compute the schedule time for the instruction, which is based

      // upon the scheduled time for any predecessors/successors.

      int EarlyStart = INT_MIN;

      int LateStart = INT_MAX;

      // These values are set when the size of the schedule window is limited

      // due to chain dependences.

      int SchedEnd = INT_MAX;

      int SchedStart = INT_MIN;

      Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,

                            II, this);

      LLVM_DEBUG({

        dbgs() << "\n";

        dbgs() << "Inst (" << SU->NodeNum << ") ";

        SU->getInstr()->dump();

        dbgs() << "\n";

      });

      LLVM_DEBUG({

        dbgs() << format("\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,

                         LateStart, SchedEnd, SchedStart);

      });


      if (EarlyStart > LateStart || SchedEnd < EarlyStart ||

          SchedStart > LateStart)

        scheduleFound = false;

      else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {

        SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);

        scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);

      } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {

        SchedStart = std::max(SchedStart, LateStart - (int)II + 1);

        scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);

      } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {

        SchedEnd =

            std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));

        // When scheduling a Phi it is better to start at the late cycle and go

        // backwards. The default order may insert the Phi too far away from

        // its first dependence.

        if (SU->getInstr()->isPHI())

          scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);

        else

          scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);

      } else {

        int FirstCycle = Schedule.getFirstCycle();

        scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),

                                        FirstCycle + getASAP(SU) + II - 1, II);

      }

      // Even if we find a schedule, make sure the schedule doesn't exceed the

      // allowable number of stages. We keep trying if this happens.

      if (scheduleFound)

        if (SwpMaxStages > -1 &&

            Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)

          scheduleFound = false;


      LLVM_DEBUG({

        if (!scheduleFound)

          dbgs() << "\tCan't schedule\n";

      });

    } while (++NI != NE && scheduleFound);


    // If a schedule is found, ensure non-pipelined instructions are in stage 0

    if (scheduleFound)

      scheduleFound =

          Schedule.normalizeNonPipelinedInstructions(this, LoopPipelinerInfo);


    // If a schedule is found, check if it is a valid schedule too.

    if (scheduleFound)

      scheduleFound = Schedule.isValidSchedule(this);


    // If a schedule was found and the option is enabled, check if the schedule

    // might generate additional register spills/fills.

    if (scheduleFound && LimitRegPressure)

      scheduleFound =

          !HRPDetector->detect(this, Schedule, Schedule.getMaxStageCount());

  }


  LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound

                    << " (II=" << Schedule.getInitiationInterval()

                    << ")\n");


  if (scheduleFound) {

    scheduleFound = LoopPipelinerInfo->shouldUseSchedule(*this, Schedule);

    if (!scheduleFound)

      LLVM_DEBUG(dbgs() << "Target rejected schedule\n");

  }


  if (scheduleFound) {

    Schedule.finalizeSchedule(this);

    Pass.ORE->emit([&]() {

      return MachineOptimizationRemarkAnalysis(

                 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())

             << "Schedule found with Initiation Interval: "

             << ore::NV("II", Schedule.getInitiationInterval())

             << ", MaxStageCount: "

             << ore::NV("MaxStageCount", Schedule.getMaxStageCount());

    });

  } else

    Schedule.reset();


  return scheduleFound && Schedule.getMaxStageCount() > 0;

}


/// Return true if we can compute the amount the instruction changes

/// during each iteration. Set Delta to the amount of the change.

bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {

  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();

  const MachineOperand *BaseOp;

  int64_t Offset;

  bool OffsetIsScalable;

  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))

    return false;


  // FIXME: This algorithm assumes instructions have fixed-size offsets.

  if (OffsetIsScalable)

    return false;


  if (!BaseOp->isReg())

    return false;


  Register BaseReg = BaseOp->getReg();


  MachineRegisterInfo &MRI = MF.getRegInfo();

  // Check if there is a Phi. If so, get the definition in the loop.

  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);

  if (BaseDef && BaseDef->isPHI()) {

    BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());

    BaseDef = MRI.getVRegDef(BaseReg);

  }

  if (!BaseDef)

    return false;


  int D = 0;

  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)

    return false;


  Delta = D;

  return true;

}


/// Check if we can change the instruction to use an offset value from the

/// previous iteration. If so, return true and set the base and offset values

/// so that we can rewrite the load, if necessary.

///   v1 = Phi(v0, v3)

///   v2 = load v1, 0

///   v3 = post_store v1, 4, x

/// This function enables the load to be rewritten as v2 = load v3, 4.

bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,

                                              unsigned &BasePos,

                                              unsigned &OffsetPos,

                                              unsigned &NewBase,

                                              int64_t &Offset) {

  // Get the load instruction.

  if (TII->isPostIncrement(*MI))

    return false;

  unsigned BasePosLd, OffsetPosLd;

  if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))

    return false;

  Register BaseReg = MI->getOperand(BasePosLd).getReg();


  // Look for the Phi instruction.

  MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();

  MachineInstr *Phi = MRI.getVRegDef(BaseReg);

  if (!Phi || !Phi->isPHI())

    return false;

  // Get the register defined in the loop block.

  unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());

  if (!PrevReg)

    return false;


  // Check for the post-increment load/store instruction.

  MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);

  if (!PrevDef || PrevDef == MI)

    return false;


  if (!TII->isPostIncrement(*PrevDef))

    return false;


  unsigned BasePos1 = 0, OffsetPos1 = 0;

  if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))

    return false;


  // Make sure that the instructions do not access the same memory location in

  // the next iteration.

  int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();

  int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();

  MachineInstr *NewMI = MF.CloneMachineInstr(MI);

  NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);

  bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef);

  MF.deleteMachineInstr(NewMI);

  if (!Disjoint)

    return false;


  // Set the return value once we determine that we return true.

  BasePos = BasePosLd;

  OffsetPos = OffsetPosLd;

  NewBase = PrevReg;

  Offset = StoreOffset;

  return true;

}


/// Apply changes to the instruction if needed. The changes are need

/// to improve the scheduling and depend up on the final schedule.

void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,

                                         SMSchedule &Schedule) {

  SUnit *SU = getSUnit(MI);

  DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =

      InstrChanges.find(SU);

  if (It != InstrChanges.end()) {

    std::pair<unsigned, int64_t> RegAndOffset = It->second;

    unsigned BasePos, OffsetPos;

    if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))

      return;

    Register BaseReg = MI->getOperand(BasePos).getReg();

    MachineInstr *LoopDef = findDefInLoop(BaseReg);

    int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));

    int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));

    int BaseStageNum = Schedule.stageScheduled(SU);

    int BaseCycleNum = Schedule.cycleScheduled(SU);

    if (BaseStageNum < DefStageNum) {

      MachineInstr *NewMI = MF.CloneMachineInstr(MI);

      int OffsetDiff = DefStageNum - BaseStageNum;

      if (DefCycleNum < BaseCycleNum) {

        NewMI->getOperand(BasePos).setReg(RegAndOffset.first);

        if (OffsetDiff > 0)

          --OffsetDiff;

      }

      int64_t NewOffset =

          MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;

      NewMI->getOperand(OffsetPos).setImm(NewOffset);

      SU->setInstr(NewMI);

      MISUnitMap[NewMI] = SU;

      NewMIs[MI] = NewMI;

    }

  }

}


/// Return the instruction in the loop that defines the register.

/// If the definition is a Phi, then follow the Phi operand to

/// the instruction in the loop.

MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {

  SmallPtrSet<MachineInstr *, 8> Visited;

  MachineInstr *Def = MRI.getVRegDef(Reg);

  while (Def->isPHI()) {

    if (!Visited.insert(Def).second)

      break;

    for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)

      if (Def->getOperand(i + 1).getMBB() == BB) {

        Def = MRI.getVRegDef(Def->getOperand(i).getReg());

        break;

      }

  }

  return Def;

}


/// Return true for an order or output dependence that is loop carried

/// potentially. A dependence is loop carried if the destination defines a value

/// that may be used or defined by the source in a subsequent iteration.

bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,

                                         bool isSucc) {

  if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||

      Dep.isArtificial() || Dep.getSUnit()->isBoundaryNode())

    return false;


  if (!SwpPruneLoopCarried)

    return true;


  if (Dep.getKind() == SDep::Output)

    return true;


  MachineInstr *SI = Source->getInstr();

  MachineInstr *DI = Dep.getSUnit()->getInstr();

  if (!isSucc)

    std::swap(SI, DI);

  assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");


  // Assume ordered loads and stores may have a loop carried dependence.

  if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||

      SI->mayRaiseFPException() || DI->mayRaiseFPException() ||

      SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())

    return true;


  if (!DI->mayLoadOrStore() || !SI->mayLoadOrStore())

    return false;


  // The conservative assumption is that a dependence between memory operations

  // may be loop carried. The following code checks when it can be proved that

  // there is no loop carried dependence.

  unsigned DeltaS, DeltaD;

  if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))

    return true;


  const MachineOperand *BaseOpS, *BaseOpD;

  int64_t OffsetS, OffsetD;

  bool OffsetSIsScalable, OffsetDIsScalable;

  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();

  if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, OffsetSIsScalable,

                                    TRI) ||

      !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, OffsetDIsScalable,

                                    TRI))

    return true;


  assert(!OffsetSIsScalable && !OffsetDIsScalable &&

         "Expected offsets to be byte offsets");


  MachineInstr *DefS = MRI.getVRegDef(BaseOpS->getReg());

  MachineInstr *DefD = MRI.getVRegDef(BaseOpD->getReg());

  if (!DefS || !DefD || !DefS->isPHI() || !DefD->isPHI())

    return true;


  unsigned InitValS = 0;

  unsigned LoopValS = 0;

  unsigned InitValD = 0;

  unsigned LoopValD = 0;

  getPhiRegs(*DefS, BB, InitValS, LoopValS);

  getPhiRegs(*DefD, BB, InitValD, LoopValD);

  MachineInstr *InitDefS = MRI.getVRegDef(InitValS);

  MachineInstr *InitDefD = MRI.getVRegDef(InitValD);


  if (!InitDefS->isIdenticalTo(*InitDefD))

    return true;


  // Check that the base register is incremented by a constant value for each

  // iteration.

  MachineInstr *LoopDefS = MRI.getVRegDef(LoopValS);

  int D = 0;

  if (!LoopDefS || !TII->getIncrementValue(*LoopDefS, D))

    return true;


  LocationSize AccessSizeS = (*SI->memoperands_begin())->getSize();

  LocationSize AccessSizeD = (*DI->memoperands_begin())->getSize();


  // This is the main test, which checks the offset values and the loop

  // increment value to determine if the accesses may be loop carried.

  if (!AccessSizeS.hasValue() || !AccessSizeD.hasValue())

    return true;


  if (DeltaS != DeltaD || DeltaS < AccessSizeS.getValue() ||

      DeltaD < AccessSizeD.getValue())

    return true;


  return (OffsetS + (int64_t)AccessSizeS.getValue() <

          OffsetD + (int64_t)AccessSizeD.getValue());

}


void SwingSchedulerDAG::postProcessDAG() {

  for (auto &M : Mutations)

    M->apply(this);

}


/// Try to schedule the node at the specified StartCycle and continue

/// until the node is schedule or the EndCycle is reached.  This function

/// returns true if the node is scheduled.  This routine may search either

/// forward or backward for a place to insert the instruction based upon

/// the relative values of StartCycle and EndCycle.

bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {

  bool forward = true;

  LLVM_DEBUG({

    dbgs() << "Trying to insert node between " << StartCycle << " and "

           << EndCycle << " II: " << II << "\n";

  });

  if (StartCycle > EndCycle)

    forward = false;


  // The terminating condition depends on the direction.

  int termCycle = forward ? EndCycle + 1 : EndCycle - 1;

  for (int curCycle = StartCycle; curCycle != termCycle;

       forward ? ++curCycle : --curCycle) {


    if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||

        ProcItinResources.canReserveResources(*SU, curCycle)) {

      LLVM_DEBUG({

        dbgs() << "\tinsert at cycle " << curCycle << " ";

        SU->getInstr()->dump();

      });


      if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))

        ProcItinResources.reserveResources(*SU, curCycle);

      ScheduledInstrs[curCycle].push_back(SU);

      InstrToCycle.insert(std::make_pair(SU, curCycle));

      if (curCycle > LastCycle)

        LastCycle = curCycle;

      if (curCycle < FirstCycle)

        FirstCycle = curCycle;

      return true;

    }

    LLVM_DEBUG({

      dbgs() << "\tfailed to insert at cycle " << curCycle << " ";

      SU->getInstr()->dump();

    });

  }

  return false;

}


// Return the cycle of the earliest scheduled instruction in the chain.

int SMSchedule::earliestCycleInChain(const SDep &Dep) {

  SmallPtrSet<SUnit *, 8> Visited;

  SmallVector<SDep, 8> Worklist;

  Worklist.push_back(Dep);

  int EarlyCycle = INT_MAX;

  while (!Worklist.empty()) {

    const SDep &Cur = Worklist.pop_back_val();

    SUnit *PrevSU = Cur.getSUnit();

    if (Visited.count(PrevSU))

      continue;

    std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);

    if (it == InstrToCycle.end())

      continue;

    EarlyCycle = std::min(EarlyCycle, it->second);

    for (const auto &PI : PrevSU->Preds)

      if (PI.getKind() == SDep::Order || PI.getKind() == SDep::Output)

        Worklist.push_back(PI);

    Visited.insert(PrevSU);

  }

  return EarlyCycle;

}


// Return the cycle of the latest scheduled instruction in the chain.

int SMSchedule::latestCycleInChain(const SDep &Dep) {

  SmallPtrSet<SUnit *, 8> Visited;

  SmallVector<SDep, 8> Worklist;

  Worklist.push_back(Dep);

  int LateCycle = INT_MIN;

  while (!Worklist.empty()) {

    const SDep &Cur = Worklist.pop_back_val();

    SUnit *SuccSU = Cur.getSUnit();

    if (Visited.count(SuccSU) || SuccSU->isBoundaryNode())

      continue;

    std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);

    if (it == InstrToCycle.end())

      continue;

    LateCycle = std::max(LateCycle, it->second);

    for (const auto &SI : SuccSU->Succs)

      if (SI.getKind() == SDep::Order || SI.getKind() == SDep::Output)

        Worklist.push_back(SI);

    Visited.insert(SuccSU);

  }

  return LateCycle;

}


/// If an instruction has a use that spans multiple iterations, then

/// return true. These instructions are characterized by having a back-ege

/// to a Phi, which contains a reference to another Phi.

static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) {

  for (auto &P : SU->Preds)

    if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())

      for (auto &S : P.getSUnit()->Succs)

        if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())

          return P.getSUnit();

  return nullptr;

}


/// Compute the scheduling start slot for the instruction.  The start slot

/// depends on any predecessor or successor nodes scheduled already.

void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,

                              int *MinEnd, int *MaxStart, int II,

                              SwingSchedulerDAG *DAG) {

  // Iterate over each instruction that has been scheduled already.  The start

  // slot computation depends on whether the previously scheduled instruction

  // is a predecessor or successor of the specified instruction.

  for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {


    // Iterate over each instruction in the current cycle.

    for (SUnit *I : getInstructions(cycle)) {

      // Because we're processing a DAG for the dependences, we recognize

      // the back-edge in recurrences by anti dependences.

      for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {

        const SDep &Dep = SU->Preds[i];

        if (Dep.getSUnit() == I) {

          if (!DAG->isBackedge(SU, Dep)) {

            int EarlyStart = cycle + Dep.getLatency() -

                             DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;

            *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);

            if (DAG->isLoopCarriedDep(SU, Dep, false)) {

              int End = earliestCycleInChain(Dep) + (II - 1);

              *MinEnd = std::min(*MinEnd, End);

            }

          } else {

            int LateStart = cycle - Dep.getLatency() +

                            DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;

            *MinLateStart = std::min(*MinLateStart, LateStart);

          }

        }

        // For instruction that requires multiple iterations, make sure that

        // the dependent instruction is not scheduled past the definition.

        SUnit *BE = multipleIterations(I, DAG);

        if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&

            !SU->isPred(I))

          *MinLateStart = std::min(*MinLateStart, cycle);

      }

      for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) {

        if (SU->Succs[i].getSUnit() == I) {

          const SDep &Dep = SU->Succs[i];

          if (!DAG->isBackedge(SU, Dep)) {

            int LateStart = cycle - Dep.getLatency() +

                            DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;

            *MinLateStart = std::min(*MinLateStart, LateStart);

            if (DAG->isLoopCarriedDep(SU, Dep)) {

              int Start = latestCycleInChain(Dep) + 1 - II;

              *MaxStart = std::max(*MaxStart, Start);

            }

          } else {

            int EarlyStart = cycle + Dep.getLatency() -

                             DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;

            *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);

          }

        }

      }

    }

  }

}


/// Order the instructions within a cycle so that the definitions occur

/// before the uses. Returns true if the instruction is added to the start

/// of the list, or false if added to the end.

void SMSchedule::orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU,

                                 std::deque<SUnit *> &Insts) const {

  MachineInstr *MI = SU->getInstr();

  bool OrderBeforeUse = false;

  bool OrderAfterDef = false;

  bool OrderBeforeDef = false;

  unsigned MoveDef = 0;

  unsigned MoveUse = 0;

  int StageInst1 = stageScheduled(SU);


  unsigned Pos = 0;

  for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;

       ++I, ++Pos) {

    for (MachineOperand &MO : MI->operands()) {

      if (!MO.isReg() || !MO.getReg().isVirtual())

        continue;


      Register Reg = MO.getReg();

      unsigned BasePos, OffsetPos;

      if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))

        if (MI->getOperand(BasePos).getReg() == Reg)

          if (unsigned NewReg = SSD->getInstrBaseReg(SU))

            Reg = NewReg;

      bool Reads, Writes;

      std::tie(Reads, Writes) =

          (*I)->getInstr()->readsWritesVirtualRegister(Reg);

      if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {

        OrderBeforeUse = true;

        if (MoveUse == 0)

          MoveUse = Pos;

      } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {

        // Add the instruction after the scheduled instruction.

        OrderAfterDef = true;

        MoveDef = Pos;

      } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {

        if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {

          OrderBeforeUse = true;

          if (MoveUse == 0)

            MoveUse = Pos;

        } else {

          OrderAfterDef = true;

          MoveDef = Pos;

        }

      } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {

        OrderBeforeUse = true;

        if (MoveUse == 0)

          MoveUse = Pos;

        if (MoveUse != 0) {

          OrderAfterDef = true;

          MoveDef = Pos - 1;

        }

      } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {

        // Add the instruction before the scheduled instruction.

        OrderBeforeUse = true;

        if (MoveUse == 0)

          MoveUse = Pos;

      } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&

                 isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {

        if (MoveUse == 0) {

          OrderBeforeDef = true;

          MoveUse = Pos;

        }

      }

    }

    // Check for order dependences between instructions. Make sure the source

    // is ordered before the destination.

    for (auto &S : SU->Succs) {

      if (S.getSUnit() != *I)

        continue;

      if (S.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {

        OrderBeforeUse = true;

        if (Pos < MoveUse)

          MoveUse = Pos;

      }

      // We did not handle HW dependences in previous for loop,

      // and we normally set Latency = 0 for Anti deps,

      // so may have nodes in same cycle with Anti denpendent on HW regs.

      else if (S.getKind() == SDep::Anti && stageScheduled(*I) == StageInst1) {

        OrderBeforeUse = true;

        if ((MoveUse == 0) || (Pos < MoveUse))

          MoveUse = Pos;

      }

    }

    for (auto &P : SU->Preds) {

      if (P.getSUnit() != *I)

        continue;

      if (P.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {

        OrderAfterDef = true;

        MoveDef = Pos;

      }

    }

  }


  // A circular dependence.

  if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)

    OrderBeforeUse = false;


  // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due

  // to a loop-carried dependence.

  if (OrderBeforeDef)

    OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);


  // The uncommon case when the instruction order needs to be updated because

  // there is both a use and def.

  if (OrderBeforeUse && OrderAfterDef) {

    SUnit *UseSU = Insts.at(MoveUse);

    SUnit *DefSU = Insts.at(MoveDef);

    if (MoveUse > MoveDef) {

      Insts.erase(Insts.begin() + MoveUse);

      Insts.erase(Insts.begin() + MoveDef);

    } else {

      Insts.erase(Insts.begin() + MoveDef);

      Insts.erase(Insts.begin() + MoveUse);

    }

    orderDependence(SSD, UseSU, Insts);

    orderDependence(SSD, SU, Insts);

    orderDependence(SSD, DefSU, Insts);

    return;

  }

  // Put the new instruction first if there is a use in the list. Otherwise,

  // put it at the end of the list.

  if (OrderBeforeUse)

    Insts.push_front(SU);

  else

    Insts.push_back(SU);

}


/// Return true if the scheduled Phi has a loop carried operand.

bool SMSchedule::isLoopCarried(const SwingSchedulerDAG *SSD,

                               MachineInstr &Phi) const {

  if (!Phi.isPHI())

    return false;

  assert(Phi.isPHI() && "Expecting a Phi.");

  SUnit *DefSU = SSD->getSUnit(&Phi);

  unsigned DefCycle = cycleScheduled(DefSU);

  int DefStage = stageScheduled(DefSU);


  unsigned InitVal = 0;

  unsigned LoopVal = 0;

  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);

  SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));

  if (!UseSU)

    return true;

  if (UseSU->getInstr()->isPHI())

    return true;

  unsigned LoopCycle = cycleScheduled(UseSU);

  int LoopStage = stageScheduled(UseSU);

  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);

}


/// Return true if the instruction is a definition that is loop carried

/// and defines the use on the next iteration.

///        v1 = phi(v2, v3)

///  (Def) v3 = op v1

///  (MO)   = v1

/// If MO appears before Def, then v1 and v3 may get assigned to the same

/// register.

bool SMSchedule::isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD,

                                       MachineInstr *Def,

                                       MachineOperand &MO) const {

  if (!MO.isReg())

    return false;

  if (Def->isPHI())

    return false;

  MachineInstr *Phi = MRI.getVRegDef(MO.getReg());

  if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())

    return false;

  if (!isLoopCarried(SSD, *Phi))

    return false;

  unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());

  for (MachineOperand &DMO : Def->all_defs()) {

    if (DMO.getReg() == LoopReg)

      return true;

  }

  return false;

}


/// Determine transitive dependences of unpipelineable instructions

SmallSet<SUnit *, 8> SMSchedule::computeUnpipelineableNodes(

    SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) {

  SmallSet<SUnit *, 8> DoNotPipeline;

  SmallVector<SUnit *, 8> Worklist;


  for (auto &SU : SSD->SUnits)

    if (SU.isInstr() && PLI->shouldIgnoreForPipelining(SU.getInstr()))

      Worklist.push_back(&SU);


  while (!Worklist.empty()) {

    auto SU = Worklist.pop_back_val();

    if (DoNotPipeline.count(SU))

      continue;

    LLVM_DEBUG(dbgs() << "Do not pipeline SU(" << SU->NodeNum << ")\n");

    DoNotPipeline.insert(SU);

    for (auto &Dep : SU->Preds)

      Worklist.push_back(Dep.getSUnit());

    if (SU->getInstr()->isPHI())

      for (auto &Dep : SU->Succs)

        if (Dep.getKind() == SDep::Anti)

          Worklist.push_back(Dep.getSUnit());

  }

  return DoNotPipeline;

}


// Determine all instructions upon which any unpipelineable instruction depends

// and ensure that they are in stage 0.  If unable to do so, return false.

bool SMSchedule::normalizeNonPipelinedInstructions(

    SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) {

  SmallSet<SUnit *, 8> DNP = computeUnpipelineableNodes(SSD, PLI);


  int NewLastCycle = INT_MIN;

  for (SUnit &SU : SSD->SUnits) {

    if (!SU.isInstr())

      continue;

    if (!DNP.contains(&SU) || stageScheduled(&SU) == 0) {

      NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);

      continue;

    }


    // Put the non-pipelined instruction as early as possible in the schedule

    int NewCycle = getFirstCycle();

    for (auto &Dep : SU.Preds)

      NewCycle = std::max(InstrToCycle[Dep.getSUnit()], NewCycle);


    int OldCycle = InstrToCycle[&SU];

    if (OldCycle != NewCycle) {

      InstrToCycle[&SU] = NewCycle;

      auto &OldS = getInstructions(OldCycle);

      llvm::erase(OldS, &SU);

      getInstructions(NewCycle).emplace_back(&SU);

      LLVM_DEBUG(dbgs() << "SU(" << SU.NodeNum

                        << ") is not pipelined; moving from cycle " << OldCycle

                        << " to " << NewCycle << " Instr:" << *SU.getInstr());

    }

    NewLastCycle = std::max(NewLastCycle, NewCycle);

  }

  LastCycle = NewLastCycle;

  return true;

}


// Check if the generated schedule is valid. This function checks if

// an instruction that uses a physical register is scheduled in a

// different stage than the definition. The pipeliner does not handle

// physical register values that may cross a basic block boundary.

// Furthermore, if a physical def/use pair is assigned to the same

// cycle, orderDependence does not guarantee def/use ordering, so that

// case should be considered invalid.  (The test checks for both

// earlier and same-cycle use to be more robust.)

bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {

  for (SUnit &SU : SSD->SUnits) {

    if (!SU.hasPhysRegDefs)

      continue;

    int StageDef = stageScheduled(&SU);

    int CycleDef = InstrToCycle[&SU];

    assert(StageDef != -1 && "Instruction should have been scheduled.");

    for (auto &SI : SU.Succs)

      if (SI.isAssignedRegDep() && !SI.getSUnit()->isBoundaryNode())

        if (Register::isPhysicalRegister(SI.getReg())) {

          if (stageScheduled(SI.getSUnit()) != StageDef)

            return false;

          if (InstrToCycle[SI.getSUnit()] <= CycleDef)

            return false;

        }

  }

  return true;

}


/// A property of the node order in swing-modulo-scheduling is

/// that for nodes outside circuits the following holds:

/// none of them is scheduled after both a successor and a

/// predecessor.

/// The method below checks whether the property is met.

/// If not, debug information is printed and statistics information updated.

/// Note that we do not use an assert statement.

/// The reason is that although an invalid node oder may prevent

/// the pipeliner from finding a pipelined schedule for arbitrary II,

/// it does not lead to the generation of incorrect code.

void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {


  // a sorted vector that maps each SUnit to its index in the NodeOrder

  typedef std::pair<SUnit *, unsigned> UnitIndex;

  std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(nullptr, 0));


  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i)

    Indices.push_back(std::make_pair(NodeOrder[i], i));


  auto CompareKey = [](UnitIndex i1, UnitIndex i2) {

    return std::get<0>(i1) < std::get<0>(i2);

  };


  // sort, so that we can perform a binary search

  llvm::sort(Indices, CompareKey);


  bool Valid = true;

  (void)Valid;

  // for each SUnit in the NodeOrder, check whether

  // it appears after both a successor and a predecessor

  // of the SUnit. If this is the case, and the SUnit

  // is not part of circuit, then the NodeOrder is not

  // valid.

  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {

    SUnit *SU = NodeOrder[i];

    unsigned Index = i;


    bool PredBefore = false;

    bool SuccBefore = false;


    SUnit *Succ;

    SUnit *Pred;

    (void)Succ;

    (void)Pred;


    for (SDep &PredEdge : SU->Preds) {

      SUnit *PredSU = PredEdge.getSUnit();

      unsigned PredIndex = std::get<1>(

          *llvm::lower_bound(Indices, std::make_pair(PredSU, 0), CompareKey));

      if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {

        PredBefore = true;

        Pred = PredSU;

        break;

      }

    }


    for (SDep &SuccEdge : SU->Succs) {

      SUnit *SuccSU = SuccEdge.getSUnit();

      // Do not process a boundary node, it was not included in NodeOrder,

      // hence not in Indices either, call to std::lower_bound() below will

      // return Indices.end().

      if (SuccSU->isBoundaryNode())

        continue;

      unsigned SuccIndex = std::get<1>(

          *llvm::lower_bound(Indices, std::make_pair(SuccSU, 0), CompareKey));

      if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {

        SuccBefore = true;

        Succ = SuccSU;

        break;

      }

    }


    if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {

      // instructions in circuits are allowed to be scheduled

      // after both a successor and predecessor.

      bool InCircuit = llvm::any_of(

          Circuits, [SU](const NodeSet &Circuit) { return Circuit.count(SU); });

      if (InCircuit)

        LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);

      else {

        Valid = false;

        NumNodeOrderIssues++;

        LLVM_DEBUG(dbgs() << "Predecessor ";);

      }

      LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum

                        << " are scheduled before node " << SU->NodeNum

                        << "\n";);

    }

  }


  LLVM_DEBUG({

    if (!Valid)

      dbgs() << "Invalid node order found!\n";

  });

}


/// Attempt to fix the degenerate cases when the instruction serialization

/// causes the register lifetimes to overlap. For example,

///   p' = store_pi(p, b)

///      = load p, offset

/// In this case p and p' overlap, which means that two registers are needed.

/// Instead, this function changes the load to use p' and updates the offset.

void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {

  unsigned OverlapReg = 0;

  unsigned NewBaseReg = 0;

  for (SUnit *SU : Instrs) {

    MachineInstr *MI = SU->getInstr();

    for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {

      const MachineOperand &MO = MI->getOperand(i);

      // Look for an instruction that uses p. The instruction occurs in the

      // same cycle but occurs later in the serialized order.

      if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {

        // Check that the instruction appears in the InstrChanges structure,

        // which contains instructions that can have the offset updated.

        DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =

          InstrChanges.find(SU);

        if (It != InstrChanges.end()) {

          unsigned BasePos, OffsetPos;

          // Update the base register and adjust the offset.

          if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {

            MachineInstr *NewMI = MF.CloneMachineInstr(MI);

            NewMI->getOperand(BasePos).setReg(NewBaseReg);

            int64_t NewOffset =

                MI->getOperand(OffsetPos).getImm() - It->second.second;

            NewMI->getOperand(OffsetPos).setImm(NewOffset);

            SU->setInstr(NewMI);

            MISUnitMap[NewMI] = SU;

            NewMIs[MI] = NewMI;

          }

        }

        OverlapReg = 0;

        NewBaseReg = 0;

        break;

      }

      // Look for an instruction of the form p' = op(p), which uses and defines

      // two virtual registers that get allocated to the same physical register.

      unsigned TiedUseIdx = 0;

      if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {

        // OverlapReg is p in the example above.

        OverlapReg = MI->getOperand(TiedUseIdx).getReg();

        // NewBaseReg is p' in the example above.

        NewBaseReg = MI->getOperand(i).getReg();

        break;

      }

    }

  }

}


std::deque<SUnit *>

SMSchedule::reorderInstructions(const SwingSchedulerDAG *SSD,

                                const std::deque<SUnit *> &Instrs) const {

  std::deque<SUnit *> NewOrderPhi;

  for (SUnit *SU : Instrs) {

    if (SU->getInstr()->isPHI())

      NewOrderPhi.push_back(SU);

  }

  std::deque<SUnit *> NewOrderI;

  for (SUnit *SU : Instrs) {

    if (!SU->getInstr()->isPHI())

      orderDependence(SSD, SU, NewOrderI);

  }

  llvm::append_range(NewOrderPhi, NewOrderI);

  return NewOrderPhi;

}


/// After the schedule has been formed, call this function to combine

/// the instructions from the different stages/cycles.  That is, this

/// function creates a schedule that represents a single iteration.

void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {

  // Move all instructions to the first stage from later stages.

  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {

    for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;

         ++stage) {

      std::deque<SUnit *> &cycleInstrs =

          ScheduledInstrs[cycle + (stage * InitiationInterval)];

      for (SUnit *SU : llvm::reverse(cycleInstrs))

        ScheduledInstrs[cycle].push_front(SU);

    }

  }


  // Erase all the elements in the later stages. Only one iteration should

  // remain in the scheduled list, and it contains all the instructions.

  for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)

    ScheduledInstrs.erase(cycle);


  // Change the registers in instruction as specified in the InstrChanges

  // map. We need to use the new registers to create the correct order.

  for (const SUnit &SU : SSD->SUnits)

    SSD->applyInstrChange(SU.getInstr(), *this);


  // Reorder the instructions in each cycle to fix and improve the

  // generated code.

  for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {

    std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];

    cycleInstrs = reorderInstructions(SSD, cycleInstrs);

    SSD->fixupRegisterOverlaps(cycleInstrs);

  }


  LLVM_DEBUG(dump(););

}


void NodeSet::print(raw_ostream &os) const {

  os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV

     << " depth " << MaxDepth << " col " << Colocate << "\n";

  for (const auto &I : Nodes)

    os << "   SU(" << I->NodeNum << ") " << *(I->getInstr());

  os << "\n";

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

/// Print the schedule information to the given output.

void SMSchedule::print(raw_ostream &os) const {

  // Iterate over each cycle.

  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {

    // Iterate over each instruction in the cycle.

    const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);

    for (SUnit *CI : cycleInstrs->second) {

      os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";

      os << "(" << CI->NodeNum << ") ";

      CI->getInstr()->print(os);

      os << "\n";

    }

  }

}


/// Utility function used for debugging to print the schedule.

LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); }

LLVM_DUMP_METHOD void NodeSet::dump() const { print(dbgs()); }


void ResourceManager::dumpMRT() const {

  LLVM_DEBUG({

    if (UseDFA)

      return;

    std::stringstream SS;

    SS << "MRT:\n";

    SS << std::setw(4) << "Slot";

    for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I)

      SS << std::setw(3) << I;

    SS << std::setw(7) << "#Mops"

       << "\n";

    for (int Slot = 0; Slot < InitiationInterval; ++Slot) {

      SS << std::setw(4) << Slot;

      for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I)

        SS << std::setw(3) << MRT[Slot][I];

      SS << std::setw(7) << NumScheduledMops[Slot] << "\n";

    }

    dbgs() << SS.str();

  });

}

#endif


void ResourceManager::initProcResourceVectors(

    const MCSchedModel &SM, SmallVectorImpl<uint64_t> &Masks) {

  unsigned ProcResourceID = 0;


  // We currently limit the resource kinds to 64 and below so that we can use

  // uint64_t for Masks

  assert(SM.getNumProcResourceKinds() < 64 &&

         "Too many kinds of resources, unsupported");

  // Create a unique bitmask for every processor resource unit.

  // Skip resource at index 0, since it always references 'InvalidUnit'.

  Masks.resize(SM.getNumProcResourceKinds());

  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {

    const MCProcResourceDesc &Desc = *SM.getProcResource(I);

    if (Desc.SubUnitsIdxBegin)

      continue;

    Masks[I] = 1ULL << ProcResourceID;

    ProcResourceID++;

  }

  // Create a unique bitmask for every processor resource group.

  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {

    const MCProcResourceDesc &Desc = *SM.getProcResource(I);

    if (!Desc.SubUnitsIdxBegin)

      continue;

    Masks[I] = 1ULL << ProcResourceID;

    for (unsigned U = 0; U < Desc.NumUnits; ++U)

      Masks[I] |= Masks[Desc.SubUnitsIdxBegin[U]];

    ProcResourceID++;

  }

  LLVM_DEBUG({

    if (SwpShowResMask) {

      dbgs() << "ProcResourceDesc:\n";

      for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {

        const MCProcResourceDesc *ProcResource = SM.getProcResource(I);

        dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",

                         ProcResource->Name, I, Masks[I],

                         ProcResource->NumUnits);

      }

      dbgs() << " -----------------\n";

    }

  });

}


bool ResourceManager::canReserveResources(SUnit &SU, int Cycle) {

  LLVM_DEBUG({

    if (SwpDebugResource)

      dbgs() << "canReserveResources:\n";

  });

  if (UseDFA)

    return DFAResources[positiveModulo(Cycle, InitiationInterval)]

        ->canReserveResources(&SU.getInstr()->getDesc());


  const MCSchedClassDesc *SCDesc = DAG->getSchedClass(&SU);

  if (!SCDesc->isValid()) {

    LLVM_DEBUG({

      dbgs() << "No valid Schedule Class Desc for schedClass!\n";

      dbgs() << "isPseudo:" << SU.getInstr()->isPseudo() << "\n";

    });

    return true;

  }


  reserveResources(SCDesc, Cycle);

  bool Result = !isOverbooked();

  unreserveResources(SCDesc, Cycle);


  LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return " << Result << "\n\n";);

  return Result;

}


void ResourceManager::reserveResources(SUnit &SU, int Cycle) {

  LLVM_DEBUG({

    if (SwpDebugResource)

      dbgs() << "reserveResources:\n";

  });

  if (UseDFA)

    return DFAResources[positiveModulo(Cycle, InitiationInterval)]

        ->reserveResources(&SU.getInstr()->getDesc());


  const MCSchedClassDesc *SCDesc = DAG->getSchedClass(&SU);

  if (!SCDesc->isValid()) {

    LLVM_DEBUG({

      dbgs() << "No valid Schedule Class Desc for schedClass!\n";

      dbgs() << "isPseudo:" << SU.getInstr()->isPseudo() << "\n";

    });

    return;

  }


  reserveResources(SCDesc, Cycle);


  LLVM_DEBUG({

    if (SwpDebugResource) {

      dumpMRT();

      dbgs() << "reserveResources: done!\n\n";

    }

  });

}


void ResourceManager::reserveResources(const MCSchedClassDesc *SCDesc,

                                       int Cycle) {

  assert(!UseDFA);

  for (const MCWriteProcResEntry &PRE : make_range(

           STI->getWriteProcResBegin(SCDesc), STI->getWriteProcResEnd(SCDesc)))

    for (int C = Cycle; C < Cycle + PRE.ReleaseAtCycle; ++C)

      ++MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx];


  for (int C = Cycle; C < Cycle + SCDesc->NumMicroOps; ++C)

    ++NumScheduledMops[positiveModulo(C, InitiationInterval)];

}


void ResourceManager::unreserveResources(const MCSchedClassDesc *SCDesc,

                                         int Cycle) {

  assert(!UseDFA);

  for (const MCWriteProcResEntry &PRE : make_range(

           STI->getWriteProcResBegin(SCDesc), STI->getWriteProcResEnd(SCDesc)))

    for (int C = Cycle; C < Cycle + PRE.ReleaseAtCycle; ++C)

      --MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx];


  for (int C = Cycle; C < Cycle + SCDesc->NumMicroOps; ++C)

    --NumScheduledMops[positiveModulo(C, InitiationInterval)];

}


bool ResourceManager::isOverbooked() const {

  assert(!UseDFA);

  for (int Slot = 0; Slot < InitiationInterval; ++Slot) {

    for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {

      const MCProcResourceDesc *Desc = SM.getProcResource(I);

      if (MRT[Slot][I] > Desc->NumUnits)

        return true;

    }

    if (NumScheduledMops[Slot] > IssueWidth)

      return true;

  }

  return false;

}


int ResourceManager::calculateResMIIDFA() const {

  assert(UseDFA);


  // Sort the instructions by the number of available choices for scheduling,

  // least to most. Use the number of critical resources as the tie breaker.

  FuncUnitSorter FUS = FuncUnitSorter(*ST);

  for (SUnit &SU : DAG->SUnits)

    FUS.calcCriticalResources(*SU.getInstr());

  PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>

      FuncUnitOrder(FUS);


  for (SUnit &SU : DAG->SUnits)

    FuncUnitOrder.push(SU.getInstr());


  SmallVector<std::unique_ptr<DFAPacketizer>, 8> Resources;

  Resources.push_back(

      std::unique_ptr<DFAPacketizer>(TII->CreateTargetScheduleState(*ST)));


  while (!FuncUnitOrder.empty()) {

    MachineInstr *MI = FuncUnitOrder.top();

    FuncUnitOrder.pop();

    if (TII->isZeroCost(MI->getOpcode()))

      continue;


    // Attempt to reserve the instruction in an existing DFA. At least one

    // DFA is needed for each cycle.

    unsigned NumCycles = DAG->getSUnit(MI)->Latency;

    unsigned ReservedCycles = 0;

    auto *RI = Resources.begin();

    auto *RE = Resources.end();

    LLVM_DEBUG({

      dbgs() << "Trying to reserve resource for " << NumCycles

             << " cycles for \n";

      MI->dump();

    });

    for (unsigned C = 0; C < NumCycles; ++C)

      while (RI != RE) {

        if ((*RI)->canReserveResources(*MI)) {

          (*RI)->reserveResources(*MI);

          ++ReservedCycles;

          break;

        }

        RI++;

      }

    LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles

                      << ", NumCycles:" << NumCycles << "\n");

    // Add new DFAs, if needed, to reserve resources.

    for (unsigned C = ReservedCycles; C < NumCycles; ++C) {

      LLVM_DEBUG(if (SwpDebugResource) dbgs()

                 << "NewResource created to reserve resources"

                 << "\n");

      auto *NewResource = TII->CreateTargetScheduleState(*ST);

      assert(NewResource->canReserveResources(*MI) && "Reserve error.");

      NewResource->reserveResources(*MI);

      Resources.push_back(std::unique_ptr<DFAPacketizer>(NewResource));

    }

  }


  int Resmii = Resources.size();

  LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n");

  return Resmii;

}


int ResourceManager::calculateResMII() const {

  if (UseDFA)

    return calculateResMIIDFA();


  // Count each resource consumption and divide it by the number of units.

  // ResMII is the max value among them.


  int NumMops = 0;

  SmallVector<uint64_t> ResourceCount(SM.getNumProcResourceKinds());

  for (SUnit &SU : DAG->SUnits) {

    if (TII->isZeroCost(SU.getInstr()->getOpcode()))

      continue;


    const MCSchedClassDesc *SCDesc = DAG->getSchedClass(&SU);

    if (!SCDesc->isValid())

      continue;


    LLVM_DEBUG({

      if (SwpDebugResource) {

        DAG->dumpNode(SU);

        dbgs() << "  #Mops: " << SCDesc->NumMicroOps << "\n"

               << "  WriteProcRes: ";

      }

    });

    NumMops += SCDesc->NumMicroOps;

    for (const MCWriteProcResEntry &PRE :

         make_range(STI->getWriteProcResBegin(SCDesc),

                    STI->getWriteProcResEnd(SCDesc))) {

      LLVM_DEBUG({

        if (SwpDebugResource) {

          const MCProcResourceDesc *Desc =

              SM.getProcResource(PRE.ProcResourceIdx);

          dbgs() << Desc->Name << ": " << PRE.ReleaseAtCycle << ", ";

        }

      });

      ResourceCount[PRE.ProcResourceIdx] += PRE.ReleaseAtCycle;

    }

    LLVM_DEBUG(if (SwpDebugResource) dbgs() << "\n");

  }


  int Result = (NumMops + IssueWidth - 1) / IssueWidth;

  LLVM_DEBUG({

    if (SwpDebugResource)

      dbgs() << "#Mops: " << NumMops << ", "

             << "IssueWidth: " << IssueWidth << ", "

             << "Cycles: " << Result << "\n";

  });


  LLVM_DEBUG({

    if (SwpDebugResource) {

      std::stringstream SS;

      SS << std::setw(2) << "ID" << std::setw(16) << "Name" << std::setw(10)

         << "Units" << std::setw(10) << "Consumed" << std::setw(10) << "Cycles"

         << "\n";

      dbgs() << SS.str();

    }

  });

  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {

    const MCProcResourceDesc *Desc = SM.getProcResource(I);

    int Cycles = (ResourceCount[I] + Desc->NumUnits - 1) / Desc->NumUnits;

    LLVM_DEBUG({

      if (SwpDebugResource) {

        std::stringstream SS;

        SS << std::setw(2) << I << std::setw(16) << Desc->Name << std::setw(10)

           << Desc->NumUnits << std::setw(10) << ResourceCount[I]

           << std::setw(10) << Cycles << "\n";

        dbgs() << SS.str();

      }

    });

    if (Cycles > Result)

      Result = Cycles;

  }

  return Result;

}


void ResourceManager::init(int II) {

  InitiationInterval = II;

  DFAResources.clear();

  DFAResources.resize(II);

  for (auto &I : DFAResources)

    I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));

  MRT.clear();

  MRT.resize(II, SmallVector<uint64_t>(SM.getNumProcResourceKinds()));

  NumScheduledMops.clear();

  NumScheduledMops.resize(II);

}

MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105

UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:110

DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition: AArch64ExpandPseudoInsts.cpp:111

MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:72

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:74

S1
static const LLT S1
Definition: AMDGPULegalizerInfo.cpp:282

AliasAnalysis.h

ArrayRef.h

Attributes.h
This file contains the simple types necessary to represent the attributes associated with functions a...

BitVector.h
This file implements the BitVector class.

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

CommandLine.h

Compiler.h

LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:529

CycleAnalysis.h
This file declares an analysis pass that computes CycleInfo for LLVM IR, specialized from GenericCycl...

DFAPacketizer.h

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101

DenseMap.h
This file defines the DenseMap class.

End
bool End
Definition: ELF_riscv.cpp:480

Writes
SmallVector< uint32_t, 0 > Writes
Definition: ELF_riscv.cpp:497

Function.h

Uses
Rewrite Partial Register Uses
Definition: GCNRewritePartialRegUses.cpp:500

DEBUG_TYPE
#define DEBUG_TYPE
Definition: GenericCycleImpl.h:30

TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125

pred
hexagon gen pred
Definition: HexagonGenPredicate.cpp:134

MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:113

LaneBitmask.h
A common definition of LaneBitmask for use in TableGen and CodeGen.

LiveIntervals.h

MCInstrDesc.h

MCInstrItineraries.h

MCRegisterInfo.h

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

MachineBasicBlock.h

MachineDominators.h

MachineFunctionPass.h

MachineFunction.h

MachineInstrBuilder.h

MachineInstr.h

MachineLoopInfo.h

MachineMemOperand.h

MachineOperand.h

SwpForceII
static cl::opt< int > SwpForceII("pipeliner-force-ii", cl::desc("Force pipeliner to use specified II."), cl::Hidden, cl::init(-1))
A command line argument to force pipeliner to use specified initial interval.

ExperimentalCodeGen
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))

pred_L
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
Definition: MachinePipeliner.cpp:1888

RegPressureMargin
static cl::opt< int > RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden, cl::init(5), cl::desc("Margin representing the unused percentage of " "the register pressure limit"))

SwpDebugResource
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))

computeLiveOuts
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
Definition: MachinePipeliner.cpp:1973

computeScheduledInsts
static void computeScheduledInsts(const SwingSchedulerDAG *SSD, SMSchedule &Schedule, std::vector< MachineInstr * > &OrderedInsts, DenseMap< MachineInstr *, unsigned > &Stages)
Create an instruction stream that represents a single iteration and stage of each instruction.
Definition: MachinePipeliner.cpp:1023

EmitTestAnnotations
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))

isIntersect
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
Definition: MachinePipeliner.cpp:2183

SwpIgnoreRecMII
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::desc("Ignore RecMII"))

SwpLoopLimit
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))

succ_L
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
Definition: MachinePipeliner.cpp:1917

Pipelining
Modulo Software Pipelining
Definition: MachinePipeliner.cpp:225

SwpPruneLoopCarried
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.

isDependenceBarrier
static bool isDependenceBarrier(MachineInstr &MI)
Return true if the instruction causes a chain between memory references before and after it.
Definition: MachinePipeliner.cpp:748

SwpMaxMii
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.

getLoopPhiReg
static unsigned getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
Definition: MachinePipeliner.cpp:716

swapAntiDependences
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
Definition: MachinePipeliner.cpp:1570

isSuccOrder
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
Definition: MachinePipeliner.cpp:725

SwpMaxStages
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.

EnableSWPOptSize
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.

SwpShowResMask
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))

SwpIISearchRange
static cl::opt< int > SwpIISearchRange("pipeliner-ii-search-range", cl::desc("Range to search for II"), cl::Hidden, cl::init(10))

getPhiRegs
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
Definition: MachinePipeliner.cpp:700

LimitRegPressure
static cl::opt< bool > LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(false), cl::desc("Limit register pressure of scheduled loop"))

DEBUG_TYPE
#define DEBUG_TYPE
Definition: MachinePipeliner.cpp:103

EnableSWP
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.

ignoreDependence
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
Definition: MachinePipeliner.cpp:1802

SwpPruneDeps
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.

multipleIterations
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
Definition: MachinePipeliner.cpp:2850

computePath
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
Definition: MachinePipeliner.cpp:1944

MachinePipeliner.h

MachineRegisterInfo.h

TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1875

Reg
unsigned Reg
Definition: MachineSink.cpp:1874

MapVector.h
This file implements a map that provides insertion order iteration.

MathExtras.h

MemoryLocation.h
This file provides utility analysis objects describing memory locations.

ModuloSchedule.h

OptimizationRemarkEmitter.h

P
#define P(N)

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52

Pass.h

PriorityQueue.h
This file defines the PriorityQueue class.

RegisterClassInfo.h

RegisterPressure.h

Register.h

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

ScheduleDAGMutation.h

ScheduleDAG.h

SetOperations.h
This file defines generic set operations that may be used on set's of different types,...

SetVector.h
This file implements a set that has insertion order iteration characteristics.

SmallPtrSet.h
This file defines the SmallPtrSet class.

SmallSet.h
This file defines the SmallSet class.

SmallVector.h
This file defines the SmallVector class.

Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

STATISTIC
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167

TargetInstrInfo.h

TargetOpcodes.h

TargetRegisterInfo.h

TargetSubtargetInfo.h

ValueTracking.h

getSize
static unsigned getSize(unsigned Kind)
Definition: XtensaAsmBackend.cpp:135

llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:960

llvm::AAResults
Definition: AliasAnalysis.h:307

llvm::AAResults::isNoAlias
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
Definition: AliasAnalysis.h:361

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75

llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41

llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:60

llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221

llvm::BitVector
Definition: BitVector.h:82

llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33

llvm::DenseMapBase::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202

llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155

llvm::DenseMapBase::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:329

llvm::DenseMapBase::empty
bool empty() const
Definition: DenseMap.h:98

llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:84

llvm::DenseMapIterator
Definition: DenseMap.h:1221

llvm::DenseMap
Definition: DenseMap.h:742

llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271

llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:356

llvm::GenericCycle
A possibly irreducible generalization of a Loop.
Definition: GenericCycleInfo.h:44

llvm::InstrItineraryData
Itinerary data supplied by a subtarget to be used by a target.
Definition: MCInstrItineraries.h:110

llvm::InstrItineraryData::beginStage
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
Definition: MCInstrItineraries.h:136

llvm::InstrItineraryData::endStage
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
Definition: MCInstrItineraries.h:142

llvm::InstrItineraryData::isEmpty
bool isEmpty() const
Returns true if there are no itineraries.
Definition: MCInstrItineraries.h:127

llvm::Instruction
Definition: Instruction.h:49

llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:359

llvm::LiveIntervals
Definition: LiveIntervals.h:53

llvm::LocationSize
Definition: MemoryLocation.h:69

llvm::LocationSize::hasValue
bool hasValue() const
Definition: MemoryLocation.h:166

llvm::LocationSize::getValue
TypeSize getValue() const
Definition: MemoryLocation.h:171

llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: GenericLoopInfo.h:90

llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44

llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:631

llvm::MCInstrDesc::getSchedClass
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:600

llvm::MCInstrInfo::get
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63

llvm::MCSubtargetInfo
Generic base class for all target subtargets.
Definition: MCSubtargetInfo.h:76

llvm::MCSubtargetInfo::getWriteProcResEnd
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
Definition: MCSubtargetInfo.h:171

llvm::MCSubtargetInfo::getWriteProcResBegin
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
Definition: MCSubtargetInfo.h:167

llvm::MCSubtargetInfo::getSchedModel
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
Definition: MCSubtargetInfo.h:163

llvm::MDNode
Metadata node.
Definition: Metadata.h:1067

llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428

llvm::MDNode::operands
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1426

llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1434

llvm::MDOperand
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:889

llvm::MDString
A single uniqued string.
Definition: Metadata.h:720

llvm::MDString::getString
StringRef getString() const
Definition: Metadata.cpp:610

llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:102

llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:233

llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:242

llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:329

llvm::MachineBasicBlock::findDebugLoc
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Definition: MachineBasicBlock.cpp:1500

llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition: MachineBasicBlock.h:315

llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:331

llvm::MachineBasicBlock::size
unsigned size() const
Definition: MachineBasicBlock.h:299

llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:51

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:168

llvm::MachineFunction
Definition: MachineFunction.h:259

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:718

llvm::MachineFunction::deleteMachineInstr
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
Definition: MachineFunction.cpp:441

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:728

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:684

llvm::MachineFunction::CloneMachineInstr
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
Definition: MachineFunction.cpp:406

llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:98

llvm::MachineInstrBundleIterator< MachineInstr >

llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:69

llvm::MachineInstr::mayRaiseFPException
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
Definition: MachineInstr.h:1137

llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:546

llvm::MachineInstr::mayLoadOrStore
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:1127

llvm::MachineInstr::isCopy
bool isCopy() const
Definition: MachineInstr.h:1394

llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:329

llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:543

llvm::MachineInstr::hasUnmodeledSideEffects
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
Definition: MachineInstr.cpp:1508

llvm::MachineInstr::isRegSequence
bool isRegSequence() const
Definition: MachineInstr.h:1386

llvm::MachineInstr::memoperands_begin
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:777

llvm::MachineInstr::isIdenticalTo
bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
Definition: MachineInstr.cpp:625

llvm::MachineInstr::hasOrderedMemoryRef
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
Definition: MachineInstr.cpp:1434

llvm::MachineInstr::print
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
Definition: MachineInstr.cpp:1636

llvm::MachineInstr::isPseudo
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
Definition: MachineInstr.h:898

llvm::MachineInstr::dump
void dump() const
Definition: MachineInstr.cpp:1599

llvm::MachineInstr::isPHI
bool isPHI() const
Definition: MachineInstr.h:1360

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:556

llvm::MachineInstr::all_defs
iterator_range< filtered_mop_iterator > all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
Definition: MachineInstr.h:733

llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:104

llvm::MachineLoop
Definition: MachineLoopInfo.h:45

llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:129

llvm::MachineMemOperand::getAAInfo
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Definition: MachineMemOperand.h:265

llvm::MachineMemOperand::getValue
const Value * getValue() const
Return the base address of the memory access.
Definition: MachineMemOperand.h:212

llvm::MachineMemOperand::getOffset
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
Definition: MachineMemOperand.h:230

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48

llvm::MachineOperand::setSubReg
void setSubReg(unsigned subReg)
Definition: MachineOperand.h:490

llvm::MachineOperand::getSubReg
unsigned getSubReg() const
Definition: MachineOperand.h:374

llvm::MachineOperand::setImm
void setImm(int64_t immVal)
Definition: MachineOperand.h:684

llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:556

llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:329

llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:61

llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:379

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:369

llvm::MachineOperand::isIdenticalTo
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
Definition: MachineOperand.cpp:319

llvm::MachineOptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: MachineOptimizationRemarkEmitter.h:110

llvm::MachineOptimizationRemarkEmitterPass
The analysis pass.
Definition: MachineOptimizationRemarkEmitter.h:220

llvm::MachineOptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Emit an optimization remark.
Definition: MachineOptimizationRemarkEmitter.cpp:49

llvm::MachineOptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: MachineOptimizationRemarkEmitter.h:84

llvm::MachineOptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: MachineOptimizationRemarkEmitter.h:58

llvm::MachinePipeliner
The main class in the implementation of the target independent software pipeliner pass.
Definition: MachinePipeliner.h:66

llvm::MachinePipeliner::TII
const TargetInstrInfo * TII
Definition: MachinePipeliner.h:73

llvm::MachinePipeliner::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachinePipeliner.cpp:480

llvm::MachinePipeliner::MF
MachineFunction * MF
Definition: MachinePipeliner.h:68

llvm::MachinePipeliner::LI
LoopInfo LI
Definition: MachinePipeliner.h:92

llvm::MachinePipeliner::NumTries
static int NumTries
Definition: MachinePipeliner.h:79

llvm::MachinePipeliner::disabledByPragma
bool disabledByPragma
Definition: MachinePipeliner.h:75

llvm::MachinePipeliner::II_setByPragma
unsigned II_setByPragma
Definition: MachinePipeliner.h:76

llvm::MachinePipeliner::ORE
MachineOptimizationRemarkEmitter * ORE
Definition: MachinePipeliner.h:69

llvm::MachinePipeliner::RegClassInfo
RegisterClassInfo RegClassInfo
Definition: MachinePipeliner.h:74

llvm::MachinePipeliner::ID
static char ID
Definition: MachinePipeliner.h:94

llvm::MachineRegisterInfo::defusechain_instr_iterator
defusechain_iterator - This class provides iterator support for machine operands in the function that...
Definition: MachineRegisterInfo.h:1145

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:51

llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:407

llvm::MachineRegisterInfo::use_instr_begin
use_instr_iterator use_instr_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:491

llvm::MachineRegisterInfo::use_instr_end
static use_instr_iterator use_instr_end()
Definition: MachineRegisterInfo.h:494

llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:418

llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36

llvm::MapVector::end
iterator end()
Definition: MapVector.h:71

llvm::MapVector::find
iterator find(const KeyT &Key)
Definition: MapVector.h:167

llvm::MapVector::clear
void clear()
Definition: MapVector.h:88

llvm::MemoryLocation::getAfter
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
Definition: MemoryLocation.h:288

llvm::ModuloScheduleExpander
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
Definition: ModuloSchedule.h:161

llvm::ModuloScheduleExpander::cleanup
void cleanup()
Performs final cleanup after expansion.
Definition: ModuloSchedule.cpp:181

llvm::ModuloScheduleExpander::expand
void expand()
Performs the actual expansion.
Definition: ModuloSchedule.cpp:67

llvm::ModuloScheduleTestAnnotater
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
Definition: ModuloSchedule.h:379

llvm::ModuloScheduleTestAnnotater::annotate
void annotate()
Performs the annotation.
Definition: ModuloSchedule.cpp:2200

llvm::ModuloSchedule
Represents a schedule for a single-block loop.
Definition: ModuloSchedule.h:80

llvm::NodeSet
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
Definition: MachinePipeliner.h:323

llvm::NodeSet::getNode
SUnit * getNode(unsigned i) const
Definition: MachinePipeliner.h:373

llvm::NodeSet::print
void print(raw_ostream &os) const
Definition: MachinePipeliner.cpp:3389

llvm::NodeSet::setRecMII
void setRecMII(unsigned mii)
Definition: MachinePipeliner.h:375

llvm::NodeSet::count
unsigned count(SUnit *SU) const
Definition: MachinePipeliner.h:365

llvm::NodeSet::setColocate
void setColocate(unsigned c)
Definition: MachinePipeliner.h:377

llvm::NodeSet::getRecMII
int getRecMII()
Definition: MachinePipeliner.h:385

llvm::NodeSet::compareRecMII
int compareRecMII(NodeSet &RHS)
Definition: MachinePipeliner.h:383

llvm::NodeSet::size
unsigned size() const
Definition: MachinePipeliner.h:369

llvm::NodeSet::insert
bool insert(SUnit *SU)
Definition: MachinePipeliner.h:357

llvm::NodeSet::dump
LLVM_DUMP_METHOD void dump() const
Definition: MachinePipeliner.cpp:3415

llvm::NodeSet::empty
bool empty() const
Definition: MachinePipeliner.h:371

llvm::NodeSet::clear
void clear()
Definition: MachinePipeliner.h:399

llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94

llvm::Pass::getAnalysis
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
Definition: PassAnalysisSupport.h:230

llvm::Pass::dump
void dump() const
Definition: Pass.cpp:136

llvm::PeelingModuloScheduleExpander
A reimplementation of ModuloScheduleExpander.
Definition: ModuloSchedule.h:283

llvm::PeelingModuloScheduleExpander::expand
void expand()
Definition: ModuloSchedule.cpp:2001

llvm::PressureChange::getUnitInc
int getUnitInc() const
Definition: RegisterPressure.h:124

llvm::PressureChange::isValid
bool isValid() const
Definition: RegisterPressure.h:112

llvm::PressureChange::getPSet
unsigned getPSet() const
Definition: RegisterPressure.h:114

llvm::PriorityQueue
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28

llvm::RegPressureTracker
Track the current register pressure at some position in the instruction stream, and remember the high...
Definition: RegisterPressure.h:359

llvm::RegPressureTracker::addLiveRegs
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
Definition: RegisterPressure.cpp:698

llvm::RegisterClassInfo
Definition: RegisterClassInfo.h:29

llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19

llvm::Register::isPhysicalRegister
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65

llvm::ResourceManager
Definition: MachinePipeliner.h:446

llvm::ResourceManager::calculateResMII
int calculateResMII() const
Definition: MachinePipeliner.cpp:3636

llvm::ResourceManager::initProcResourceVectors
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
Definition: MachinePipeliner.cpp:3439

llvm::ResourceManager::init
void init(int II)
Initialize resources with the initiation interval II.
Definition: MachinePipeliner.cpp:3711

llvm::ResourceManager::canReserveResources
bool canReserveResources(SUnit &SU, int Cycle)
Check if the resources occupied by a machine instruction are available in the current state.
Definition: MachinePipeliner.cpp:3481

llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49

llvm::SDep::getSUnit
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480

llvm::SDep::getKind
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486

llvm::SDep::Kind
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:52

llvm::SDep::Output
@ Output
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55

llvm::SDep::Order
@ Order
Any other ordering dependency.
Definition: ScheduleDAG.h:56

llvm::SDep::Anti
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54

llvm::SDep::Data
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53

llvm::SDep::setLatency
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147

llvm::SDep::Barrier
@ Barrier
An unknown scheduling barrier.
Definition: ScheduleDAG.h:69

llvm::SDep::Artificial
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72

llvm::SDep::getLatency
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142

llvm::SDep::isArtificial
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200

llvm::SMSchedule
This class represents the scheduled code.
Definition: MachinePipeliner.h:534

llvm::SMSchedule::reorderInstructions
std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const
Definition: MachinePipeliner.cpp:3337

llvm::SMSchedule::earliestCycleInChain
int earliestCycleInChain(const SDep &Dep)
Return the cycle of the earliest scheduled instruction in the dependence chain.
Definition: MachinePipeliner.cpp:2802

llvm::SMSchedule::setInitiationInterval
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
Definition: MachinePipeliner.h:574

llvm::SMSchedule::computeUnpipelineableNodes
SmallSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Determine transitive dependences of unpipelineable instructions.
Definition: MachinePipeliner.cpp:3100

llvm::SMSchedule::dump
void dump() const
Utility function used for debugging to print the schedule.
Definition: MachinePipeliner.cpp:3414

llvm::SMSchedule::insert
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
Definition: MachinePipeliner.cpp:2762

llvm::SMSchedule::getMaxStageCount
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
Definition: MachinePipeliner.h:629

llvm::SMSchedule::print
void print(raw_ostream &os) const
Print the schedule information to the given output.
Definition: MachinePipeliner.cpp:3399

llvm::SMSchedule::reset
void reset()
Definition: MachinePipeliner.h:565

llvm::SMSchedule::latestCycleInChain
int latestCycleInChain(const SDep &Dep)
Return the cycle of the latest scheduled instruction in the dependence chain.
Definition: MachinePipeliner.cpp:2825

llvm::SMSchedule::stageScheduled
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
Definition: MachinePipeliner.h:613

llvm::SMSchedule::orderDependence
void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const
Order the instructions within a cycle so that the definitions occur before the uses.
Definition: MachinePipeliner.cpp:2922

llvm::SMSchedule::computeStart
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
Definition: MachinePipeliner.cpp:2861

llvm::SMSchedule::isValidSchedule
bool isValidSchedule(SwingSchedulerDAG *SSD)
Definition: MachinePipeliner.cpp:3169

llvm::SMSchedule::getInitiationInterval
int getInitiationInterval() const
Return the initiation interval for this schedule.
Definition: MachinePipeliner.h:580

llvm::SMSchedule::getInstructions
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
Definition: MachinePipeliner.h:634

llvm::SMSchedule::getFirstCycle
int getFirstCycle() const
Return the first cycle in the completed schedule.
Definition: MachinePipeliner.h:584

llvm::SMSchedule::isLoopCarriedDefOfUse
bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
Definition: MachinePipeliner.cpp:3079

llvm::SMSchedule::cycleScheduled
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
Definition: MachinePipeliner.h:622

llvm::SMSchedule::normalizeNonPipelinedInstructions
bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Definition: MachinePipeliner.cpp:3127

llvm::SMSchedule::isLoopCarried
bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const
Return true if the scheduled Phi has a loop carried operand.
Definition: MachinePipeliner.cpp:3050

llvm::SMSchedule::getFinalCycle
int getFinalCycle() const
Return the last cycle in the finalized schedule.
Definition: MachinePipeliner.h:587

llvm::SMSchedule::finalizeSchedule
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Definition: MachinePipeliner.cpp:3356

llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242

llvm::SUnit::NumPreds
unsigned NumPreds
Definition: ScheduleDAG.h:266

llvm::SUnit::isInstr
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
Definition: ScheduleDAG.h:362

llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264

llvm::SUnit::setInstr
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:366

llvm::SUnit::removePred
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
Definition: ScheduleDAG.cpp:174

llvm::SUnit::isPred
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
Definition: ScheduleDAG.h:431

llvm::SUnit::Latency
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273

llvm::SUnit::isBoundaryNode
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:344

llvm::SUnit::hasPhysRegDefs
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280

llvm::SUnit::Succs
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257

llvm::SUnit::Preds
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256

llvm::SUnit::addPred
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
Definition: ScheduleDAG.cpp:106

llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373

llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition: ScheduleDAGInstrs.h:121

llvm::ScheduleDAGInstrs::MISUnitMap
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
Definition: ScheduleDAGInstrs.h:160

llvm::ScheduleDAGInstrs::finishBlock
virtual void finishBlock()
Cleans up after scheduling in the given block.
Definition: ScheduleDAGInstrs.cpp:184

llvm::ScheduleDAGInstrs::BB
MachineBasicBlock * BB
The block in which to insert instructions.
Definition: ScheduleDAGInstrs.h:147

llvm::ScheduleDAGInstrs::getSchedClass
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
Definition: ScheduleDAGInstrs.h:279

llvm::ScheduleDAGInstrs::dumpNode
void dumpNode(const SUnit &SU) const override
Definition: ScheduleDAGInstrs.cpp:1178

llvm::ScheduleDAGInstrs::UnknownValue
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
Definition: ScheduleDAGInstrs.h:250

llvm::ScheduleDAGInstrs::buildSchedGraph
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
Definition: ScheduleDAGInstrs.cpp:739

llvm::ScheduleDAGInstrs::getSUnit
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
Definition: ScheduleDAGInstrs.h:404

llvm::ScheduleDAGInstrs::SchedModel
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
Definition: ScheduleDAGInstrs.h:127

llvm::ScheduleDAGInstrs::dump
void dump() const override
Definition: ScheduleDAGInstrs.cpp:1189

llvm::ScheduleDAGTopologicalSort::RemovePred
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an edge to be removed from the specified node N from ...
Definition: ScheduleDAG.cpp:567

llvm::ScheduleDAGTopologicalSort::InitDAGTopologicalSorting
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
Definition: ScheduleDAG.cpp:439

llvm::ScheduleDAGTopologicalSort::AddPred
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
Definition: ScheduleDAG.cpp:549

llvm::ScheduleDAGTopologicalSort::IsReachable
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
Definition: ScheduleDAG.cpp:724

llvm::ScheduleDAG::MRI
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:560

llvm::ScheduleDAG::TII
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:557

llvm::ScheduleDAG::SUnits
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:561

llvm::ScheduleDAG::TRI
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:558

llvm::ScheduleDAG::MF
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:559

llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:57

llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98

llvm::SetVector::iterator
typename vector_type::const_iterator iterator
Definition: SetVector.h:69

llvm::SetVector::clear
void clear()
Completely clear the SetVector.
Definition: SetVector.h:273

llvm::SetVector::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264

llvm::SetVector::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93

llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162

llvm::SetVector::contains
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:254

llvm::SlotIndexes
SlotIndexes pass.
Definition: SlotIndexes.h:300

llvm::SlotIndexes::insertMachineInstrInMaps
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:523

llvm::SmallDenseSet
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:290

llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:96

llvm::SmallPtrSetImplBase::empty
bool empty() const
Definition: SmallPtrSet.h:93

llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356

llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360

llvm::SmallPtrSetImpl::end
iterator end() const
Definition: SmallPtrSet.h:385

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342

llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:380

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427

llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370

llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135

llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166

llvm::SmallSet::contains
bool contains(const T &V) const
Check if the SmallSet contains the given element.
Definition: SmallSet.h:236

llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179

llvm::SmallVectorBase::empty
bool empty() const
Definition: SmallVector.h:94

llvm::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:91

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586

llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:686

llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:623

llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:651

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:426

llvm::SmallVectorTemplateCommon::end
iterator end()
Definition: SmallVector.h:282

llvm::SmallVectorTemplateCommon::begin
iterator begin()
Definition: SmallVector.h:280

llvm::SmallVectorTemplateCommon::iterator
T * iterator
Definition: SmallVector.h:264

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209

llvm::SwingSchedulerDAG
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
Definition: MachinePipeliner.h:114

llvm::SwingSchedulerDAG::getInstrBaseReg
unsigned getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
Definition: MachinePipeliner.h:276

llvm::SwingSchedulerDAG::getDepth
unsigned getDepth(SUnit *Node)
The depth, in the dependence graph, for a node.
Definition: MachinePipeliner.h:232

llvm::SwingSchedulerDAG::getASAP
int getASAP(SUnit *Node)
Return the earliest time an instruction may be scheduled.
Definition: MachinePipeliner.h:222

llvm::SwingSchedulerDAG::applyInstrChange
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
Definition: MachinePipeliner.cpp:2610

llvm::SwingSchedulerDAG::finishBlock
void finishBlock() override
Clean up after the software pipeliner runs.
Definition: MachinePipeliner.cpp:689

llvm::SwingSchedulerDAG::fixupRegisterOverlaps
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
Definition: MachinePipeliner.cpp:3290

llvm::SwingSchedulerDAG::getZeroLatencyDepth
int getZeroLatencyDepth(SUnit *Node)
The maximum unweighted length of a path from an arbitrary node to the given node in which each edge h...
Definition: MachinePipeliner.h:236

llvm::SwingSchedulerDAG::isLoopCarriedDep
bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc=true)
Return true for an order or output dependence that is loop carried potentially.
Definition: MachinePipeliner.cpp:2665

llvm::SwingSchedulerDAG::getDistance
unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep)
The distance function, which indicates that operation V of iteration I depends on operations U of ite...
Definition: MachinePipeliner.h:262

llvm::SwingSchedulerDAG::schedule
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
Definition: MachinePipeliner.cpp:510

llvm::SwingSchedulerDAG::getMOV
int getMOV(SUnit *Node)
The mobility function, which the number of slots in which an instruction may be scheduled.
Definition: MachinePipeliner.h:229

llvm::SwingSchedulerDAG::getZeroLatencyHeight
int getZeroLatencyHeight(SUnit *Node)
The maximum unweighted length of a path from the given node to an arbitrary node in which each edge h...
Definition: MachinePipeliner.h:245

llvm::SwingSchedulerDAG::isBackedge
bool isBackedge(SUnit *Source, const SDep &Dep)
Return true if the dependence is a back-edge in the data dependence graph.
Definition: MachinePipeliner.h:252

llvm::SwingSchedulerDAG::getHeight
unsigned getHeight(SUnit *Node)
The height, in the dependence graph, for a node.
Definition: MachinePipeliner.h:241

llvm::SwingSchedulerDAG::getALAP
int getALAP(SUnit *Node)
Return the latest time an instruction my be scheduled.
Definition: MachinePipeliner.h:225

llvm::TargetInstrInfo::PipelinerLoopInfo
Object returned by analyzeLoopForPipelining.
Definition: TargetInstrInfo.h:740

llvm::TargetInstrInfo::PipelinerLoopInfo::shouldIgnoreForPipelining
virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const =0
Return true if the given instruction should not be pipelined and should be ignored.

llvm::TargetInstrInfo::PipelinerLoopInfo::shouldUseSchedule
virtual bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS)
Return true if the proposed schedule should used.
Definition: TargetInstrInfo.h:751

llvm::TargetInstrInfo::analyzeLoopForPipelining
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
Definition: TargetInstrInfo.h:788

llvm::TargetInstrInfo::isZeroCost
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don't consume any machine resources in their current form.
Definition: TargetInstrInfo.h:1741

llvm::TargetInstrInfo::analyzeBranch
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
Definition: TargetInstrInfo.h:650

llvm::TargetInstrInfo::CreateTargetScheduleState
virtual DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const
Create machine specific model for scheduling.
Definition: TargetInstrInfo.h:1924

llvm::TargetInstrInfo::isPostIncrement
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
Definition: TargetInstrInfo.h:1566

llvm::TargetInstrInfo::getBaseAndOffsetPosition
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
Definition: TargetInstrInfo.h:1469

llvm::TargetInstrInfo::areMemAccessesTriviallyDisjoint
virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const
Sometimes, it is possible for the target to tell, even without aliasing information,...
Definition: TargetInstrInfo.h:1940

llvm::TargetInstrInfo::getIncrementValue
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
Definition: TargetInstrInfo.h:1517

llvm::TargetInstrInfo::getMemOperandWithOffset
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
Definition: TargetInstrInfo.cpp:1424

llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45

llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:238

llvm::TargetRegisterInfo::getRegPressureSetName
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.

llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:63

llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:128

llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)

llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1808

llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::cl::opt
Definition: CommandLine.h:1430

llvm::detail::DenseSetImpl::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206

llvm::detail::DenseSetImpl::contains
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:185

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52

uint16_t

uint64_t

unsigned

iterator_range.h
This provides a very simple, boring adaptor for a begin and end iterator into a range type.

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

false
Definition: StackSlotColoring.cpp:184

llvm::AArch64CC::NE
@ NE
Definition: AArch64BaseInfo.h:257

llvm::AArch64::RM
@ RM
Definition: AArch64ISelLowering.h:518

llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::M68k::MemAddrModeKind::j
@ j

llvm::M68k::MemAddrModeKind::U
@ U

llvm::M68k::MemAddrModeKind::V
@ V

llvm::M68k::MemAddrModeKind::L
@ L

llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:315

llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:314

llvm::SIEncodingFamily::SI
@ SI
Definition: SIDefines.h:36

llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32

llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:614

llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::Used
@ Used

llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:138

llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:139

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450

llvm::cl::Prefix
@ Prefix
Definition: CommandLine.h:159

llvm::codeview::FrameCookieKind::Copy
@ Copy

llvm::dwarf::Index
Index
Definition: Dwarf.h:558

llvm::logicalview::LVComparePass::Added
@ Added

llvm::logicalview::LVAttributeKind::Inserted
@ Inserted

llvm::ms_demangle::IntrinsicFunctionKind::New
@ New

llvm::numbers::e
constexpr double e
Definition: MathExtras.h:31

llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136

llvm::pdb::PDB_MemoryType::Stack
@ Stack

llvm::pdb::PDB_LocType::Slot
@ Slot

llvm::pdb::PDB_ColorItem::Path
@ Path

llvm::rdf::Phi
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390

llvm::rdf::Def
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329

llvm::Offset
@ Offset
Definition: DWP.cpp:456

llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1995

llvm::popcount
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385

llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680

llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:363

llvm::NodeOrder
@ NodeOrder
Definition: SIMachineScheduler.h:37

llvm::Successor
@ Successor
Definition: SIMachineScheduler.h:35

llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:76

llvm::set_is_subset
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
Definition: SetOperations.h:104

llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073

llvm::erase
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2059

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729

llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419

llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163

llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125

llvm::getUnderlyingObjects
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
Definition: ValueTracking.cpp:6338

llvm::getRegState
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
Definition: MachineInstrBuilder.h:557

llvm::lower_bound
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1954

llvm::MachinePipelinerID
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
Definition: MachinePipeliner.cpp:216

llvm::Cycle
CycleInfo::CycleT Cycle
Definition: CycleInfo.h:24

llvm::SwpEnableCopyToPhi
cl::opt< bool > SwpEnableCopyToPhi

llvm::isIdentifiedObject
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Definition: AliasAnalysis.cpp:851

llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:108

llvm::SwpForceIssueWidth
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.

std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860

raw_ostream.h

N
#define N

llvm::DWARFExpression::Operation::Description
Description of the encoding of one expression Op.
Definition: DWARFExpression.h:66

llvm::InstrStage
These values represent a non-pipelined step in the execution of an instruction.
Definition: MCInstrItineraries.h:59

llvm::IntervalPressure
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.
Definition: RegisterPressure.h:67

llvm::LaneBitmask::getNone
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:81

llvm::MCProcResourceDesc
Define a kind of processor resource that will be modeled by the scheduler.
Definition: MCSchedule.h:31

llvm::MCProcResourceDesc::Name
const char * Name
Definition: MCSchedule.h:32

llvm::MCProcResourceDesc::NumUnits
unsigned NumUnits
Definition: MCSchedule.h:33

llvm::MCSchedClassDesc
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:118

llvm::MCSchedClassDesc::isValid
bool isValid() const
Definition: MCSchedule.h:136

llvm::MCSchedClassDesc::NumMicroOps
uint16_t NumMicroOps
Definition: MCSchedule.h:125

llvm::MCSchedModel
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:253

llvm::MCSchedModel::getSchedClassDesc
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
Definition: MCSchedule.h:360

llvm::MCSchedModel::getNumProcResourceKinds
unsigned getNumProcResourceKinds() const
Definition: MCSchedule.h:349

llvm::MCSchedModel::hasInstrSchedModel
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
Definition: MCSchedule.h:334

llvm::MCSchedModel::getProcResource
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Definition: MCSchedule.h:353

llvm::MCWriteProcResEntry
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:63

llvm::MachinePipeliner::LoopInfo::FBB
MachineBasicBlock * FBB
Definition: MachinePipeliner.h:85

llvm::MachinePipeliner::LoopInfo::LoopInductionVar
MachineInstr * LoopInductionVar
Definition: MachinePipeliner.h:87

llvm::MachinePipeliner::LoopInfo::BrCond
SmallVector< MachineOperand, 4 > BrCond
Definition: MachinePipeliner.h:86

llvm::MachinePipeliner::LoopInfo::TBB
MachineBasicBlock * TBB
Definition: MachinePipeliner.h:84

llvm::MachinePipeliner::LoopInfo::LoopCompare
MachineInstr * LoopCompare
Definition: MachinePipeliner.h:88

llvm::MachinePipeliner::LoopInfo::LoopPipelinerInfo
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopPipelinerInfo
Definition: MachinePipeliner.h:89

llvm::RegPressureDelta
Store the effects of a change in pressure on things that MI scheduler cares about.
Definition: RegisterPressure.h:240

llvm::RegPressureDelta::Excess
PressureChange Excess
Definition: RegisterPressure.h:241

llvm::RegisterMaskPair
Definition: RegisterPressure.h:38

llvm::RegisterPressure::MaxSetPressure
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
Definition: RegisterPressure.h:49

llvm::cl::desc
Definition: CommandLine.h:416