LCOV - code coverage report
Current view: top level - lib/CodeGen - MachineScheduler.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 1142 1177 97.0 %
Date: 2018-07-13 00:08:38 Functions: 118 133 88.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // MachineScheduler schedules machine instructions after phi elimination. It
      11             : // preserves LiveIntervals so it can be invoked before register allocation.
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #include "llvm/CodeGen/MachineScheduler.h"
      16             : #include "llvm/ADT/ArrayRef.h"
      17             : #include "llvm/ADT/BitVector.h"
      18             : #include "llvm/ADT/DenseMap.h"
      19             : #include "llvm/ADT/PriorityQueue.h"
      20             : #include "llvm/ADT/STLExtras.h"
      21             : #include "llvm/ADT/SmallVector.h"
      22             : #include "llvm/ADT/iterator_range.h"
      23             : #include "llvm/Analysis/AliasAnalysis.h"
      24             : #include "llvm/CodeGen/LiveInterval.h"
      25             : #include "llvm/CodeGen/LiveIntervals.h"
      26             : #include "llvm/CodeGen/MachineBasicBlock.h"
      27             : #include "llvm/CodeGen/MachineDominators.h"
      28             : #include "llvm/CodeGen/MachineFunction.h"
      29             : #include "llvm/CodeGen/MachineFunctionPass.h"
      30             : #include "llvm/CodeGen/MachineInstr.h"
      31             : #include "llvm/CodeGen/MachineLoopInfo.h"
      32             : #include "llvm/CodeGen/MachineOperand.h"
      33             : #include "llvm/CodeGen/MachinePassRegistry.h"
      34             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      35             : #include "llvm/CodeGen/Passes.h"
      36             : #include "llvm/CodeGen/RegisterClassInfo.h"
      37             : #include "llvm/CodeGen/RegisterPressure.h"
      38             : #include "llvm/CodeGen/ScheduleDAG.h"
      39             : #include "llvm/CodeGen/ScheduleDAGInstrs.h"
      40             : #include "llvm/CodeGen/ScheduleDAGMutation.h"
      41             : #include "llvm/CodeGen/ScheduleDFS.h"
      42             : #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
      43             : #include "llvm/CodeGen/SlotIndexes.h"
      44             : #include "llvm/CodeGen/TargetInstrInfo.h"
      45             : #include "llvm/CodeGen/TargetLowering.h"
      46             : #include "llvm/CodeGen/TargetPassConfig.h"
      47             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      48             : #include "llvm/CodeGen/TargetSchedule.h"
      49             : #include "llvm/CodeGen/TargetSubtargetInfo.h"
      50             : #include "llvm/Config/llvm-config.h"
      51             : #include "llvm/MC/LaneBitmask.h"
      52             : #include "llvm/Pass.h"
      53             : #include "llvm/Support/CommandLine.h"
      54             : #include "llvm/Support/Compiler.h"
      55             : #include "llvm/Support/Debug.h"
      56             : #include "llvm/Support/ErrorHandling.h"
      57             : #include "llvm/Support/GraphWriter.h"
      58             : #include "llvm/Support/MachineValueType.h"
      59             : #include "llvm/Support/raw_ostream.h"
      60             : #include <algorithm>
      61             : #include <cassert>
      62             : #include <cstdint>
      63             : #include <iterator>
      64             : #include <limits>
      65             : #include <memory>
      66             : #include <string>
      67             : #include <tuple>
      68             : #include <utility>
      69             : #include <vector>
      70             : 
      71             : using namespace llvm;
      72             : 
      73             : #define DEBUG_TYPE "machine-scheduler"
      74             : 
      75             : namespace llvm {
      76             : 
      77       99743 : cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
      78       99743 :                            cl::desc("Force top-down list scheduling"));
      79       99743 : cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
      80       99743 :                             cl::desc("Force bottom-up list scheduling"));
      81             : cl::opt<bool>
      82       99743 : DumpCriticalPathLength("misched-dcpl", cl::Hidden,
      83       99743 :                        cl::desc("Print critical path length to stdout"));
      84             : 
      85             : } // end namespace llvm
      86             : 
      87             : #ifndef NDEBUG
      88             : static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
      89             :   cl::desc("Pop up a window to show MISched dags after they are processed"));
      90             : 
      91             : /// In some situations a few uninteresting nodes depend on nearly all other
      92             : /// nodes in the graph, provide a cutoff to hide them.
      93             : static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
      94             :   cl::desc("Hide nodes with more predecessor/successor than cutoff"));
      95             : 
      96             : static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
      97             :   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
      98             : 
      99             : static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
     100             :   cl::desc("Only schedule this function"));
     101             : static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
     102             :                                         cl::desc("Only schedule this MBB#"));
     103             : #else
     104             : static bool ViewMISchedDAGs = false;
     105             : #endif // NDEBUG
     106             : 
     107             : /// Avoid quadratic complexity in unusually large basic blocks by limiting the
     108             : /// size of the ready lists.
     109       99743 : static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
     110      199486 :   cl::desc("Limit ready list to N instructions"), cl::init(256));
     111             : 
     112       99743 : static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
     113      199486 :   cl::desc("Enable register pressure scheduling."), cl::init(true));
     114             : 
     115       99743 : static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
     116      199486 :   cl::desc("Enable cyclic critical path analysis."), cl::init(true));
     117             : 
     118       99743 : static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
     119       99743 :                                         cl::desc("Enable memop clustering."),
     120      299229 :                                         cl::init(true));
     121             : 
     122       99743 : static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
     123       99743 :   cl::desc("Verify machine instrs before and after machine scheduling"));
     124             : 
     125             : // DAG subtrees must have at least this many nodes.
     126             : static const unsigned MinSubtreeSize = 8;
     127             : 
     128             : // Pin the vtables to this file.
     129           0 : void MachineSchedStrategy::anchor() {}
     130             : 
     131           0 : void ScheduleDAGMutation::anchor() {}
     132             : 
     133             : //===----------------------------------------------------------------------===//
     134             : // Machine Instruction Scheduling Pass and Registry
     135             : //===----------------------------------------------------------------------===//
     136             : 
     137       22570 : MachineSchedContext::MachineSchedContext() {
     138       22570 :   RegClassInfo = new RegisterClassInfo();
     139       22570 : }
     140             : 
     141       44882 : MachineSchedContext::~MachineSchedContext() {
     142       22441 :   delete RegClassInfo;
     143       22441 : }
     144             : 
     145             : namespace {
     146             : 
     147             : /// Base class for a machine scheduler class that can run at any point.
     148       22441 : class MachineSchedulerBase : public MachineSchedContext,
     149             :                              public MachineFunctionPass {
     150             : public:
     151       22570 :   MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
     152             : 
     153             :   void print(raw_ostream &O, const Module* = nullptr) const override;
     154             : 
     155             : protected:
     156             :   void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
     157             : };
     158             : 
     159             : /// MachineScheduler runs after coalescing and before register allocation.
     160       38282 : class MachineScheduler : public MachineSchedulerBase {
     161             : public:
     162             :   MachineScheduler();
     163             : 
     164             :   void getAnalysisUsage(AnalysisUsage &AU) const override;
     165             : 
     166             :   bool runOnMachineFunction(MachineFunction&) override;
     167             : 
     168             :   static char ID; // Class identification, replacement for typeinfo
     169             : 
     170             : protected:
     171             :   ScheduleDAGInstrs *createMachineScheduler();
     172             : };
     173             : 
     174             : /// PostMachineScheduler runs after shortly before code emission.
     175        6600 : class PostMachineScheduler : public MachineSchedulerBase {
     176             : public:
     177             :   PostMachineScheduler();
     178             : 
     179             :   void getAnalysisUsage(AnalysisUsage &AU) const override;
     180             : 
     181             :   bool runOnMachineFunction(MachineFunction&) override;
     182             : 
     183             :   static char ID; // Class identification, replacement for typeinfo
     184             : 
     185             : protected:
     186             :   ScheduleDAGInstrs *createPostMachineScheduler();
     187             : };
     188             : 
     189             : } // end anonymous namespace
     190             : 
     191             : char MachineScheduler::ID = 0;
     192             : 
     193             : char &llvm::MachineSchedulerID = MachineScheduler::ID;
     194             : 
     195       26316 : INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
     196             :                       "Machine Instruction Scheduler", false, false)
     197       26316 : INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
     198       26316 : INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
     199       26316 : INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
     200       26316 : INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
     201      290364 : INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
     202             :                     "Machine Instruction Scheduler", false, false)
     203             : 
     204       38490 : MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
     205       19245 :   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
     206       19245 : }
     207             : 
     208       19097 : void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
     209       19097 :   AU.setPreservesCFG();
     210       19097 :   AU.addRequiredID(MachineDominatorsID);
     211             :   AU.addRequired<MachineLoopInfo>();
     212             :   AU.addRequired<AAResultsWrapperPass>();
     213             :   AU.addRequired<TargetPassConfig>();
     214             :   AU.addRequired<SlotIndexes>();
     215             :   AU.addPreserved<SlotIndexes>();
     216             :   AU.addRequired<LiveIntervals>();
     217             :   AU.addPreserved<LiveIntervals>();
     218       19097 :   MachineFunctionPass::getAnalysisUsage(AU);
     219       19097 : }
     220             : 
     221             : char PostMachineScheduler::ID = 0;
     222             : 
     223             : char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
     224             : 
     225      153260 : INITIALIZE_PASS(PostMachineScheduler, "postmisched",
     226             :                 "PostRA Machine Instruction Scheduler", false, false)
     227             : 
     228        6650 : PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
     229        3325 :   initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
     230        3325 : }
     231             : 
     232        3301 : void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
     233        3301 :   AU.setPreservesCFG();
     234        3301 :   AU.addRequiredID(MachineDominatorsID);
     235             :   AU.addRequired<MachineLoopInfo>();
     236             :   AU.addRequired<TargetPassConfig>();
     237        3301 :   MachineFunctionPass::getAnalysisUsage(AU);
     238        3301 : }
     239             : 
     240             : MachinePassRegistry MachineSchedRegistry::Registry;
     241             : 
     242             : /// A dummy default scheduler factory indicates whether the scheduler
     243             : /// is overridden on the command line.
     244           0 : static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
     245           0 :   return nullptr;
     246             : }
     247             : 
     248             : /// MachineSchedOpt allows command line selection of the scheduler.
     249             : static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
     250             :                RegisterPassParser<MachineSchedRegistry>>
     251       99743 : MachineSchedOpt("misched",
     252      199486 :                 cl::init(&useDefaultMachineSched), cl::Hidden,
     253      299229 :                 cl::desc("Machine instruction scheduler to use"));
     254             : 
     255             : static MachineSchedRegistry
     256       99743 : DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
     257             :                      useDefaultMachineSched);
     258             : 
     259       99743 : static cl::opt<bool> EnableMachineSched(
     260             :     "enable-misched",
     261      199486 :     cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
     262      299229 :     cl::Hidden);
     263             : 
     264       99743 : static cl::opt<bool> EnablePostRAMachineSched(
     265             :     "enable-post-misched",
     266       99743 :     cl::desc("Enable the post-ra machine instruction scheduling pass."),
     267      299229 :     cl::init(true), cl::Hidden);
     268             : 
     269             : /// Decrement this iterator until reaching the top or a non-debug instr.
     270             : static MachineBasicBlock::const_iterator
     271     2610561 : priorNonDebug(MachineBasicBlock::const_iterator I,
     272             :               MachineBasicBlock::const_iterator Beg) {
     273             :   assert(I != Beg && "reached the top of the region, cannot decrement");
     274     2632200 :   while (--I != Beg) {
     275             :     if (!I->isDebugInstr())
     276             :       break;
     277             :   }
     278     2610561 :   return I;
     279             : }
     280             : 
     281             : /// Non-const version.
     282             : static MachineBasicBlock::iterator
     283             : priorNonDebug(MachineBasicBlock::iterator I,
     284             :               MachineBasicBlock::const_iterator Beg) {
     285     5221122 :   return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)
     286             :       .getNonConstIterator();
     287             : }
     288             : 
     289             : /// If this iterator is a debug value, increment until reaching the End or a
     290             : /// non-debug instruction.
     291             : static MachineBasicBlock::const_iterator
     292     1710300 : nextIfDebug(MachineBasicBlock::const_iterator I,
     293             :             MachineBasicBlock::const_iterator End) {
     294     1736256 :   for(; I != End; ++I) {
     295             :     if (!I->isDebugInstr())
     296             :       break;
     297             :   }
     298     1710300 :   return I;
     299             : }
     300             : 
     301             : /// Non-const version.
     302             : static MachineBasicBlock::iterator
     303             : nextIfDebug(MachineBasicBlock::iterator I,
     304             :             MachineBasicBlock::const_iterator End) {
     305     1820596 :   return nextIfDebug(MachineBasicBlock::const_iterator(I), End)
     306             :       .getNonConstIterator();
     307             : }
     308             : 
     309             : /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
     310      154324 : ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
     311             :   // Select the scheduler, or set the default.
     312             :   MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
     313      154324 :   if (Ctor != useDefaultMachineSched)
     314          17 :     return Ctor(this);
     315             : 
     316             :   // Get the default scheduler set by the target for this function.
     317      154307 :   ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
     318      154307 :   if (Scheduler)
     319             :     return Scheduler;
     320             : 
     321             :   // Default to GenericScheduler.
     322       14638 :   return createGenericSchedLive(this);
     323             : }
     324             : 
     325             : /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
     326             : /// the caller. We don't have a command line option to override the postRA
     327             : /// scheduler. The Target must configure it.
     328       21349 : ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
     329             :   // Get the postRA scheduler set by the target for this function.
     330       21349 :   ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
     331       21349 :   if (Scheduler)
     332             :     return Scheduler;
     333             : 
     334             :   // Default to GenericScheduler.
     335        8571 :   return createGenericSchedPostRA(this);
     336             : }
     337             : 
     338             : /// Top-level MachineScheduler pass driver.
     339             : ///
     340             : /// Visit blocks in function order. Divide each block into scheduling regions
     341             : /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
     342             : /// consistent with the DAG builder, which traverses the interior of the
     343             : /// scheduling regions bottom-up.
     344             : ///
     345             : /// This design avoids exposing scheduling boundaries to the DAG builder,
     346             : /// simplifying the DAG builder's support for "special" target instructions.
     347             : /// At the same time the design allows target schedulers to operate across
     348             : /// scheduling boundaries, for example to bundle the boundary instructions
     349             : /// without reordering them. This creates complexity, because the target
     350             : /// scheduler must update the RegionBegin and RegionEnd positions cached by
     351             : /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
     352             : /// design would be to split blocks at scheduling boundaries, but LLVM has a
     353             : /// general bias against block splitting purely for implementation simplicity.
     354      184228 : bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
     355      184228 :   if (skipFunction(mf.getFunction()))
     356             :     return false;
     357             : 
     358      184072 :   if (EnableMachineSched.getNumOccurrences()) {
     359         443 :     if (!EnableMachineSched)
     360             :       return false;
     361      183629 :   } else if (!mf.getSubtarget().enableMachineScheduler())
     362             :     return false;
     363             : 
     364             :   LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
     365             : 
     366             :   // Initialize the context of the pass.
     367      154324 :   MF = &mf;
     368      154324 :   MLI = &getAnalysis<MachineLoopInfo>();
     369      154324 :   MDT = &getAnalysis<MachineDominatorTree>();
     370      154324 :   PassConfig = &getAnalysis<TargetPassConfig>();
     371      308648 :   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
     372             : 
     373      154324 :   LIS = &getAnalysis<LiveIntervals>();
     374             : 
     375      154324 :   if (VerifyScheduling) {
     376             :     LLVM_DEBUG(LIS->dump());
     377           1 :     MF->verify(this, "Before machine scheduling.");
     378             :   }
     379      154324 :   RegClassInfo->runOnMachineFunction(*MF);
     380             : 
     381             :   // Instantiate the selected scheduler for this target, function, and
     382             :   // optimization level.
     383      154324 :   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
     384      154324 :   scheduleRegions(*Scheduler, false);
     385             : 
     386             :   LLVM_DEBUG(LIS->dump());
     387      154323 :   if (VerifyScheduling)
     388           1 :     MF->verify(this, "After machine scheduling.");
     389             :   return true;
     390             : }
     391             : 
     392       26164 : bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
     393       26164 :   if (skipFunction(mf.getFunction()))
     394             :     return false;
     395             : 
     396       26158 :   if (EnablePostRAMachineSched.getNumOccurrences()) {
     397          45 :     if (!EnablePostRAMachineSched)
     398             :       return false;
     399       26113 :   } else if (!mf.getSubtarget().enablePostRAScheduler()) {
     400             :     LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
     401             :     return false;
     402             :   }
     403             :   LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
     404             : 
     405             :   // Initialize the context of the pass.
     406       21349 :   MF = &mf;
     407       21349 :   MLI = &getAnalysis<MachineLoopInfo>();
     408       21349 :   PassConfig = &getAnalysis<TargetPassConfig>();
     409             : 
     410       21349 :   if (VerifyScheduling)
     411           0 :     MF->verify(this, "Before post machine scheduling.");
     412             : 
     413             :   // Instantiate the selected scheduler for this target, function, and
     414             :   // optimization level.
     415       21349 :   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
     416       21349 :   scheduleRegions(*Scheduler, true);
     417             : 
     418       21349 :   if (VerifyScheduling)
     419           0 :     MF->verify(this, "After post machine scheduling.");
     420             :   return true;
     421             : }
     422             : 
     423             : /// Return true of the given instruction should not be included in a scheduling
     424             : /// region.
     425             : ///
     426             : /// MachineScheduler does not currently support scheduling across calls. To
     427             : /// handle calls, the DAG builder needs to be modified to create register
     428             : /// anti/output dependencies on the registers clobbered by the call's regmask
     429             : /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
     430             : /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
     431             : /// the boundary, but there would be no benefit to postRA scheduling across
     432             : /// calls this late anyway.
     433     3453884 : static bool isSchedBoundary(MachineBasicBlock::iterator MI,
     434             :                             MachineBasicBlock *MBB,
     435             :                             MachineFunction *MF,
     436             :                             const TargetInstrInfo *TII) {
     437     3453883 :   return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
     438             : }
     439             : 
     440             : /// A region of an MBB for scheduling.
     441             : namespace {
     442             : struct SchedRegion {
     443             :   /// RegionBegin is the first instruction in the scheduling region, and
     444             :   /// RegionEnd is either MBB->end() or the scheduling boundary after the
     445             :   /// last instruction in the scheduling region. These iterators cannot refer
     446             :   /// to instructions outside of the identified scheduling region because
     447             :   /// those may be reordered before scheduling this region.
     448             :   MachineBasicBlock::iterator RegionBegin;
     449             :   MachineBasicBlock::iterator RegionEnd;
     450             :   unsigned NumRegionInstrs;
     451             : 
     452             :   SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
     453      902493 :               unsigned N) :
     454      902493 :     RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
     455             : };
     456             : } // end anonymous namespace
     457             : 
     458             : using MBBRegionsVector = SmallVector<SchedRegion, 16>;
     459             : 
     460             : static void
     461      334339 : getSchedRegions(MachineBasicBlock *MBB,
     462             :                 MBBRegionsVector &Regions,
     463             :                 bool RegionsTopDown) {
     464      334339 :   MachineFunction *MF = MBB->getParent();
     465      334339 :   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
     466             : 
     467             :   MachineBasicBlock::iterator I = nullptr;
     468     1236832 :   for(MachineBasicBlock::iterator RegionEnd = MBB->end();
     469     2139325 :       RegionEnd != MBB->begin(); RegionEnd = I) {
     470             : 
     471             :     // Avoid decrementing RegionEnd for blocks with no terminator.
     472     1234682 :     if (RegionEnd != MBB->end() ||
     473     1234683 :         isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
     474             :       --RegionEnd;
     475             :     }
     476             : 
     477             :     // The next region starts above the previous region. Look backward in the
     478             :     // instruction stream until we find the nearest boundary.
     479             :     unsigned NumRegionInstrs = 0;
     480      902493 :     I = RegionEnd;
     481     3453884 :     for (;I != MBB->begin(); --I) {
     482             :       MachineInstr &MI = *std::prev(I);
     483     3121694 :       if (isSchedBoundary(&MI, &*MBB, MF, TII))
     484             :         break;
     485             :       if (!MI.isDebugInstr())
     486             :         // MBB::size() uses instr_iterator to count. Here we need a bundle to
     487             :         // count as a single instruction.
     488     2518449 :         ++NumRegionInstrs;
     489             :     }
     490             : 
     491     1804986 :     Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
     492             :   }
     493             : 
     494      334340 :   if (RegionsTopDown)
     495             :     std::reverse(Regions.begin(), Regions.end());
     496      334340 : }
     497             : 
     498             : /// Main driver for both MachineScheduler and PostMachineScheduler.
     499      175673 : void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
     500             :                                            bool FixKillFlags) {
     501             :   // Visit all machine basic blocks.
     502             :   //
     503             :   // TODO: Visit blocks in global postorder or postorder within the bottom-up
     504             :   // loop tree. Then we can optionally compute global RegPressure.
     505      175673 :   for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
     506      510012 :        MBB != MBBEnd; ++MBB) {
     507             : 
     508      334340 :     Scheduler.startBlock(&*MBB);
     509             : 
     510             : #ifndef NDEBUG
     511             :     if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
     512             :       continue;
     513             :     if (SchedOnlyBlock.getNumOccurrences()
     514             :         && (int)SchedOnlyBlock != MBB->getNumber())
     515             :       continue;
     516             : #endif
     517             : 
     518             :     // Break the block into scheduling regions [I, RegionEnd). RegionEnd
     519             :     // points to the scheduling boundary at the bottom of the region. The DAG
     520             :     // does not include RegionEnd, but the region does (i.e. the next
     521             :     // RegionEnd is above the previous RegionBegin). If the current block has
     522             :     // no terminator then RegionEnd == MBB->end() for the bottom region.
     523             :     //
     524             :     // All the regions of MBB are first found and stored in MBBRegions, which
     525             :     // will be processed (MBB) top-down if initialized with true.
     526             :     //
     527             :     // The Scheduler may insert instructions during either schedule() or
     528             :     // exitRegion(), even for empty regions. So the local iterators 'I' and
     529             :     // 'RegionEnd' are invalid across these calls. Instructions must not be
     530             :     // added to other regions than the current one without updating MBBRegions.
     531             : 
     532             :     MBBRegionsVector MBBRegions;
     533      334339 :     getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
     534      902492 :     for (MBBRegionsVector::iterator R = MBBRegions.begin();
     535     1236832 :          R != MBBRegions.end(); ++R) {
     536      902493 :       MachineBasicBlock::iterator I = R->RegionBegin;
     537      902493 :       MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
     538      902493 :       unsigned NumRegionInstrs = R->NumRegionInstrs;
     539             : 
     540             :       // Notify the scheduler of the region, even if we may skip scheduling
     541             :       // it. Perhaps it still needs to be bundled.
     542      902493 :       Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
     543             : 
     544             :       // Skip empty scheduling regions (0 or 1 schedulable instructions).
     545     1916174 :       if (I == RegionEnd || I == std::prev(RegionEnd)) {
     546             :         // Close the current region. Bundle the terminator if needed.
     547             :         // This invalidates 'RegionEnd' and 'I'.
     548      511570 :         Scheduler.exitRegion();
     549             :         continue;
     550             :       }
     551             :       LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
     552             :       LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
     553             :                         << " " << MBB->getName() << "\n  From: " << *I
     554             :                         << "    To: ";
     555             :                  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
     556             :                  else dbgs() << "End";
     557             :                  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
     558      390922 :       if (DumpCriticalPathLength) {
     559           0 :         errs() << MF->getName();
     560           0 :         errs() << ":%bb. " << MBB->getNumber();
     561           0 :         errs() << " " << MBB->getName() << " \n";
     562             :       }
     563             : 
     564             :       // Schedule a region: possibly reorder instructions.
     565             :       // This invalidates the original region iterators.
     566      390922 :       Scheduler.schedule();
     567             : 
     568             :       // Close the current region.
     569      390922 :       Scheduler.exitRegion();
     570             :     }
     571      334339 :     Scheduler.finishBlock();
     572             :     // FIXME: Ideally, no further passes should rely on kill flags. However,
     573             :     // thumb2 size reduction is currently an exception, so the PostMIScheduler
     574             :     // needs to do this.
     575      334339 :     if (FixKillFlags)
     576       27671 :       Scheduler.fixupKills(*MBB);
     577             :   }
     578      175672 :   Scheduler.finalizeSchedule();
     579      175672 : }
     580             : 
     581           0 : void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
     582             :   // unimplemented
     583           0 : }
     584             : 
     585             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
     586             : LLVM_DUMP_METHOD void ReadyQueue::dump() const {
     587             :   dbgs() << "Queue " << Name << ": ";
     588             :   for (const SUnit *SU : Queue)
     589             :     dbgs() << SU->NodeNum << " ";
     590             :   dbgs() << "\n";
     591             : }
     592             : #endif
     593             : 
     594             : //===----------------------------------------------------------------------===//
     595             : // ScheduleDAGMI - Basic machine instruction scheduling. This is
     596             : // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
     597             : // virtual registers.
     598             : // ===----------------------------------------------------------------------===/
     599             : 
     600             : // Provide a vtable anchor.
     601             : ScheduleDAGMI::~ScheduleDAGMI() = default;
     602             : 
     603       19498 : bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
     604       19498 :   return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
     605             : }
     606             : 
     607      108400 : bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
     608      108400 :   if (SuccSU != &ExitSU) {
     609             :     // Do not use WillCreateCycle, it assumes SD scheduling.
     610             :     // If Pred is reachable from Succ, then the edge creates a cycle.
     611      188466 :     if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
     612             :       return false;
     613       93645 :     Topo.AddPred(SuccSU, PredDep.getSUnit());
     614             :   }
     615      107812 :   SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
     616             :   // Return true regardless of whether a new edge needed to be inserted.
     617      107812 :   return true;
     618             : }
     619             : 
     620             : /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
     621             : /// NumPredsLeft reaches zero, release the successor node.
     622             : ///
     623             : /// FIXME: Adjust SuccSU height based on MinLatency.
     624      324925 : void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
     625             :   SUnit *SuccSU = SuccEdge->getSUnit();
     626             : 
     627             :   if (SuccEdge->isWeak()) {
     628        5893 :     --SuccSU->WeakPredsLeft;
     629             :     if (SuccEdge->isCluster())
     630        5826 :       NextClusterSucc = SuccSU;
     631             :     return;
     632             :   }
     633             : #ifndef NDEBUG
     634             :   if (SuccSU->NumPredsLeft == 0) {
     635             :     dbgs() << "*** Scheduling failed! ***\n";
     636             :     SuccSU->dump(this);
     637             :     dbgs() << " has been released too many times!\n";
     638             :     llvm_unreachable(nullptr);
     639             :   }
     640             : #endif
     641             :   // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
     642             :   // CurrCycle may have advanced since then.
     643      319032 :   if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
     644      220344 :     SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
     645             : 
     646      319032 :   --SuccSU->NumPredsLeft;
     647      319032 :   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
     648      129345 :     SchedImpl->releaseTopNode(SuccSU);
     649             : }
     650             : 
     651             : /// releaseSuccessors - Call releaseSucc on each of SU's successors.
     652      564146 : void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
     653     1213996 :   for (SDep &Succ : SU->Succs)
     654      324925 :     releaseSucc(SU, &Succ);
     655      564146 : }
     656             : 
     657             : /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
     658             : /// NumSuccsLeft reaches zero, release the predecessor node.
     659             : ///
     660             : /// FIXME: Adjust PredSU height based on MinLatency.
     661     3829738 : void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
     662             :   SUnit *PredSU = PredEdge->getSUnit();
     663             : 
     664             :   if (PredEdge->isWeak()) {
     665       30158 :     --PredSU->WeakSuccsLeft;
     666             :     if (PredEdge->isCluster())
     667       27840 :       NextClusterPred = PredSU;
     668             :     return;
     669             :   }
     670             : #ifndef NDEBUG
     671             :   if (PredSU->NumSuccsLeft == 0) {
     672             :     dbgs() << "*** Scheduling failed! ***\n";
     673             :     PredSU->dump(this);
     674             :     dbgs() << " has been released too many times!\n";
     675             :     llvm_unreachable(nullptr);
     676             :   }
     677             : #endif
     678             :   // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
     679             :   // CurrCycle may have advanced since then.
     680     3799580 :   if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
     681     2466521 :     PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
     682             : 
     683     3799580 :   --PredSU->NumSuccsLeft;
     684     3799580 :   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
     685     2034240 :     SchedImpl->releaseBottomNode(PredSU);
     686             : }
     687             : 
     688             : /// releasePredecessors - Call releasePred on each of SU's predecessors.
     689     2650892 : void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
     690    10310368 :   for (SDep &Pred : SU->Preds)
     691     3829738 :     releasePred(SU, &Pred);
     692     2650892 : }
     693             : 
     694      353017 : void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
     695      353017 :   ScheduleDAGInstrs::startBlock(bb);
     696      353016 :   SchedImpl->enterMBB(bb);
     697      353016 : }
     698             : 
     699      353660 : void ScheduleDAGMI::finishBlock() {
     700      353660 :   SchedImpl->leaveMBB();
     701      353661 :   ScheduleDAGInstrs::finishBlock();
     702      353660 : }
     703             : 
     704             : /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
     705             : /// crossing a scheduling boundary. [begin, end) includes all instructions in
     706             : /// the region, including the boundary itself and single-instruction regions
     707             : /// that don't get scheduled.
     708      921814 : void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
     709             :                                      MachineBasicBlock::iterator begin,
     710             :                                      MachineBasicBlock::iterator end,
     711             :                                      unsigned regioninstrs)
     712             : {
     713      921814 :   ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
     714             : 
     715      921814 :   SchedImpl->initPolicy(begin, end, regioninstrs);
     716      921813 : }
     717             : 
     718             : /// This is normally called from the main scheduler loop but may also be invoked
     719             : /// by the scheduling strategy to perform additional code motion.
     720      192784 : void ScheduleDAGMI::moveInstruction(
     721             :   MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
     722             :   // Advance RegionBegin if the first instruction moves down.
     723      192784 :   if (&*RegionBegin == MI)
     724             :     ++RegionBegin;
     725             : 
     726             :   // Update the instruction stream.
     727      385568 :   BB->splice(InsertPos, BB, MI);
     728             : 
     729             :   // Update LiveIntervals
     730      192784 :   if (LIS)
     731      182714 :     LIS->handleMove(*MI, /*UpdateFlags=*/true);
     732             : 
     733             :   // Recede RegionBegin if an instruction moves above the first.
     734      192784 :   if (RegionBegin == InsertPos)
     735        2048 :     RegionBegin = MI;
     736      192784 : }
     737             : 
     738     2432556 : bool ScheduleDAGMI::checkSchedLimit() {
     739             : #ifndef NDEBUG
     740             :   if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
     741             :     CurrentTop = CurrentBottom;
     742             :     return false;
     743             :   }
     744             :   ++NumInstrsScheduled;
     745             : #endif
     746     2432556 :   return true;
     747             : }
     748             : 
     749             : /// Per-region scheduling driver, called back from
     750             : /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
     751             : /// does not consider liveness or register pressure. It is useful for PostRA
     752             : /// scheduling and potentially other custom schedulers.
     753       18503 : void ScheduleDAGMI::schedule() {
     754             :   LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
     755             :   LLVM_DEBUG(SchedImpl->dumpPolicy());
     756             : 
     757             :   // Build the DAG.
     758       18503 :   buildSchedGraph(AA);
     759             : 
     760       18503 :   Topo.InitDAGTopologicalSorting();
     761             : 
     762       18503 :   postprocessDAG();
     763             : 
     764             :   SmallVector<SUnit*, 8> TopRoots, BotRoots;
     765       18503 :   findRootsAndBiasEdges(TopRoots, BotRoots);
     766             : 
     767             :   LLVM_DEBUG(if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this);
     768             :              for (const SUnit &SU
     769             :                   : SUnits) SU.dumpAll(this);
     770             :              if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this););
     771       18503 :   if (ViewMISchedDAGs) viewGraph();
     772             : 
     773             :   // Initialize the strategy before modifying the DAG.
     774             :   // This may initialize a DFSResult to be used for queue priority.
     775       18503 :   SchedImpl->initialize(this);
     776             : 
     777             :   // Initialize ready queues now that the DAG and priority data are finalized.
     778       18503 :   initQueues(TopRoots, BotRoots);
     779             : 
     780       18503 :   bool IsTopNode = false;
     781             :   while (true) {
     782             :     LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
     783      106072 :     SUnit *SU = SchedImpl->pickNode(IsTopNode);
     784      106072 :     if (!SU) break;
     785             : 
     786             :     assert(!SU->isScheduled && "Node already scheduled");
     787       87569 :     if (!checkSchedLimit())
     788             :       break;
     789             : 
     790       87569 :     MachineInstr *MI = SU->getInstr();
     791       87569 :     if (IsTopNode) {
     792             :       assert(SU->isTopReady() && "node still has unscheduled dependencies");
     793       87569 :       if (&*CurrentTop == MI)
     794       77499 :         CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
     795             :       else
     796       10070 :         moveInstruction(MI, CurrentTop);
     797             :     } else {
     798             :       assert(SU->isBottomReady() && "node still has unscheduled dependencies");
     799             :       MachineBasicBlock::iterator priorII =
     800             :         priorNonDebug(CurrentBottom, CurrentTop);
     801           0 :       if (&*priorII == MI)
     802           0 :         CurrentBottom = priorII;
     803             :       else {
     804           0 :         if (&*CurrentTop == MI)
     805           0 :           CurrentTop = nextIfDebug(++CurrentTop, priorII);
     806           0 :         moveInstruction(MI, CurrentBottom);
     807           0 :         CurrentBottom = MI;
     808             :       }
     809             :     }
     810             :     // Notify the scheduling strategy before updating the DAG.
     811             :     // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
     812             :     // runs, it can then use the accurate ReadyCycle time to determine whether
     813             :     // newly released nodes can move to the readyQ.
     814       87569 :     SchedImpl->schedNode(SU, IsTopNode);
     815             : 
     816       87569 :     updateQueues(SU, IsTopNode);
     817       87569 :   }
     818             :   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
     819             : 
     820       18503 :   placeDebugValues();
     821             : 
     822             :   LLVM_DEBUG({
     823             :     dbgs() << "*** Final schedule for "
     824             :            << printMBBReference(*begin()->getParent()) << " ***\n";
     825             :     dumpSchedule();
     826             :     dbgs() << '\n';
     827             :   });
     828       18503 : }
     829             : 
     830             : /// Apply each ScheduleDAGMutation step in order.
     831      391239 : void ScheduleDAGMI::postprocessDAG() {
     832     1177822 :   for (auto &m : Mutations)
     833      786583 :     m->apply(this);
     834      391239 : }
     835             : 
     836      391249 : void ScheduleDAGMI::
     837             : findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
     838             :                       SmallVectorImpl<SUnit*> &BotRoots) {
     839     2826245 :   for (SUnit &SU : SUnits) {
     840             :     assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
     841             : 
     842             :     // Order predecessors so DFSResult follows the critical path.
     843     2434996 :     SU.biasCriticalPath();
     844             : 
     845             :     // A SUnit is ready to top schedule if it has no predecessors.
     846     2434996 :     if (!SU.NumPredsLeft)
     847     1029125 :       TopRoots.push_back(&SU);
     848             :     // A SUnit is ready to bottom schedule if it has no successors.
     849     2434996 :     if (!SU.NumSuccsLeft)
     850      312562 :       BotRoots.push_back(&SU);
     851             :   }
     852      391249 :   ExitSU.biasCriticalPath();
     853      391249 : }
     854             : 
     855             : /// Identify DAG roots and setup scheduler queues.
     856      391241 : void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
     857             :                                ArrayRef<SUnit*> BotRoots) {
     858      391241 :   NextClusterSucc = nullptr;
     859      391241 :   NextClusterPred = nullptr;
     860             : 
     861             :   // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
     862             :   //
     863             :   // Nodes with unreleased weak edges can still be roots.
     864             :   // Release top roots in forward order.
     865     2449451 :   for (SUnit *SU : TopRoots)
     866     1029105 :     SchedImpl->releaseTopNode(SU);
     867             : 
     868             :   // Release bottom roots in reverse order so the higher priority nodes appear
     869             :   // first. This is more natural and slightly more efficient.
     870             :   for (SmallVectorImpl<SUnit*>::const_reverse_iterator
     871      703795 :          I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
     872      312554 :     SchedImpl->releaseBottomNode(*I);
     873             :   }
     874             : 
     875      391241 :   releaseSuccessors(&EntrySU);
     876      391241 :   releasePredecessors(&ExitSU);
     877             : 
     878      391241 :   SchedImpl->registerRoots();
     879             : 
     880             :   // Advance past initial DebugValues.
     881      391241 :   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
     882      391241 :   CurrentBottom = RegionEnd;
     883      391241 : }
     884             : 
     885             : /// Update scheduler queues after scheduling an instruction.
     886     2432556 : void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
     887             :   // Release dependent instructions for scheduling.
     888     2432556 :   if (IsTopNode)
     889      172905 :     releaseSuccessors(SU);
     890             :   else
     891     2259651 :     releasePredecessors(SU);
     892             : 
     893     2432556 :   SU->isScheduled = true;
     894     2432556 : }
     895             : 
     896             : /// Reinsert any remaining debug_values, just like the PostRA scheduler.
     897      391260 : void ScheduleDAGMI::placeDebugValues() {
     898             :   // If first instruction was a DBG_VALUE then put it back.
     899      391260 :   if (FirstDbgValue) {
     900        5390 :     BB->splice(RegionBegin, BB, FirstDbgValue);
     901        2695 :     RegionBegin = FirstDbgValue;
     902             :   }
     903             : 
     904             :   for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
     905      421410 :          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
     906       30150 :     std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
     907             :     MachineInstr *DbgValue = P.first;
     908             :     MachineBasicBlock::iterator OrigPrevMI = P.second;
     909       30150 :     if (&*RegionBegin == DbgValue)
     910             :       ++RegionBegin;
     911       60300 :     BB->splice(++OrigPrevMI, BB, DbgValue);
     912       30150 :     if (OrigPrevMI == std::prev(RegionEnd))
     913         983 :       RegionEnd = DbgValue;
     914             :   }
     915             :   DbgValues.clear();
     916      391260 :   FirstDbgValue = nullptr;
     917      391260 : }
     918             : 
     919             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
     920             : LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
     921             :   for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
     922             :     if (SUnit *SU = getSUnit(&(*MI)))
     923             :       SU->dump(this);
     924             :     else
     925             :       dbgs() << "Missing SUnit\n";
     926             :   }
     927             : }
     928             : #endif
     929             : 
     930             : //===----------------------------------------------------------------------===//
     931             : // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
     932             : // preservation.
     933             : //===----------------------------------------------------------------------===//
     934             : 
     935     1059312 : ScheduleDAGMILive::~ScheduleDAGMILive() {
     936      154324 :   delete DFSResult;
     937      287692 : }
     938             : 
     939     1423163 : void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
     940     1423163 :   const MachineInstr &MI = *SU.getInstr();
     941    14774751 :   for (const MachineOperand &MO : MI.operands()) {
     942     6675794 :     if (!MO.isReg())
     943     2349967 :       continue;
     944     1167391 :     if (!MO.readsReg())
     945     1167391 :       continue;
     946     3271058 :     if (TrackLaneMasks && !MO.isUse())
     947      112622 :       continue;
     948             : 
     949     3045814 :     unsigned Reg = MO.getReg();
     950     4553409 :     if (!TargetRegisterInfo::isVirtualRegister(Reg))
     951     1507595 :       continue;
     952             : 
     953             :     // Ignore re-defs.
     954     1538219 :     if (TrackLaneMasks) {
     955             :       bool FoundDef = false;
     956     3627733 :       for (const MachineOperand &MO2 : MI.operands()) {
     957     2749173 :         if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
     958             :           FoundDef = true;
     959             :           break;
     960             :         }
     961             :       }
     962      322113 :       if (FoundDef)
     963        4793 :         continue;
     964             :     }
     965             : 
     966             :     // Record this local VReg use.
     967     1533426 :     VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
     968             :     for (; UI != VRegUses.end(); ++UI) {
     969    14260667 :       if (UI->SU == &SU)
     970             :         break;
     971             :     }
     972             :     if (UI == VRegUses.end())
     973     1488295 :       VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
     974             :   }
     975     1423163 : }
     976             : 
     977             : /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
     978             : /// crossing a scheduling boundary. [begin, end) includes all instructions in
     979             : /// the region, including the boundary itself and single-instruction regions
     980             : /// that don't get scheduled.
     981      882570 : void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
     982             :                                 MachineBasicBlock::iterator begin,
     983             :                                 MachineBasicBlock::iterator end,
     984             :                                 unsigned regioninstrs)
     985             : {
     986             :   // ScheduleDAGMI initializes SchedImpl's per-region policy.
     987      882570 :   ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
     988             : 
     989             :   // For convenience remember the end of the liveness region.
     990     1747698 :   LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
     991             : 
     992             :   SUPressureDiffs.clear();
     993             : 
     994      882569 :   ShouldTrackPressure = SchedImpl->shouldTrackPressure();
     995      882569 :   ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
     996             : 
     997             :   assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
     998             :          "ShouldTrackLaneMasks requires ShouldTrackPressure");
     999      882569 : }
    1000             : 
    1001             : // Setup the register pressure trackers for the top scheduled top and bottom
    1002             : // scheduled regions.
    1003      113668 : void ScheduleDAGMILive::initRegPressure() {
    1004             :   VRegUses.clear();
    1005      227336 :   VRegUses.setUniverse(MRI.getNumVirtRegs());
    1006     1536831 :   for (SUnit &SU : SUnits)
    1007     1423163 :     collectVRegUses(SU);
    1008             : 
    1009      341004 :   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
    1010      113668 :                     ShouldTrackLaneMasks, false);
    1011      341004 :   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
    1012      113668 :                     ShouldTrackLaneMasks, false);
    1013             : 
    1014             :   // Close the RPTracker to finalize live ins.
    1015      113668 :   RPTracker.closeRegion();
    1016             : 
    1017             :   LLVM_DEBUG(RPTracker.dump());
    1018             : 
    1019             :   // Initialize the live ins and live outs.
    1020      227336 :   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
    1021      227336 :   BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
    1022             : 
    1023             :   // Close one end of the tracker so we can call
    1024             :   // getMaxUpward/DownwardPressureDelta before advancing across any
    1025             :   // instructions. This converts currently live regs into live ins/outs.
    1026      113668 :   TopRPTracker.closeTop();
    1027      113668 :   BotRPTracker.closeBottom();
    1028             : 
    1029      113668 :   BotRPTracker.initLiveThru(RPTracker);
    1030      113668 :   if (!BotRPTracker.getLiveThru().empty()) {
    1031             :     TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
    1032             :     LLVM_DEBUG(dbgs() << "Live Thru: ";
    1033             :                dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
    1034             :   };
    1035             : 
    1036             :   // For each live out vreg reduce the pressure change associated with other
    1037             :   // uses of the same vreg below the live-out reaching def.
    1038      227336 :   updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
    1039             : 
    1040             :   // Account for liveness generated by the region boundary.
    1041      113668 :   if (LiveRegionEnd != RegionEnd) {
    1042             :     SmallVector<RegisterMaskPair, 8> LiveUses;
    1043      107579 :     BotRPTracker.recede(&LiveUses);
    1044      107579 :     updatePressureDiffs(LiveUses);
    1045             :   }
    1046             : 
    1047             :   LLVM_DEBUG(dbgs() << "Top Pressure:\n";
    1048             :              dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
    1049             :              dbgs() << "Bottom Pressure:\n";
    1050             :              dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
    1051             : 
    1052             :   assert((BotRPTracker.getPos() == RegionEnd ||
    1053             :           (RegionEnd->isDebugInstr() &&
    1054             :            BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
    1055             :          "Can't find the region bottom");
    1056             : 
    1057             :   // Cache the list of excess pressure sets in this region. This will also track
    1058             :   // the max pressure in the scheduled code for these sets.
    1059      113668 :   RegionCriticalPSets.clear();
    1060             :   const std::vector<unsigned> &RegionPressure =
    1061      113668 :     RPTracker.getPressure().MaxSetPressure;
    1062     2889772 :   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
    1063     2662436 :     unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
    1064     5324872 :     if (RegionPressure[i] > Limit) {
    1065             :       LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
    1066             :                         << " Actual " << RegionPressure[i] << "\n");
    1067        4493 :       RegionCriticalPSets.push_back(PressureChange(i));
    1068             :     }
    1069             :   }
    1070             :   LLVM_DEBUG(dbgs() << "Excess PSets: ";
    1071             :              for (const PressureChange &RCPS
    1072             :                   : RegionCriticalPSets) dbgs()
    1073             :              << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
    1074             :              dbgs() << "\n");
    1075      113668 : }
    1076             : 
    1077     1423163 : void ScheduleDAGMILive::
    1078             : updateScheduledPressure(const SUnit *SU,
    1079             :                         const std::vector<unsigned> &NewMaxPressure) {
    1080     1423163 :   const PressureDiff &PDiff = getPressureDiff(SU);
    1081     2846326 :   unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
    1082     7387571 :   for (const PressureChange &PC : PDiff) {
    1083     4363928 :     if (!PC.isValid())
    1084             :       break;
    1085             :     unsigned ID = PC.getPSet();
    1086     3784018 :     while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
    1087       48172 :       ++CritIdx;
    1088     3591330 :     if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
    1089      260534 :       if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
    1090      130267 :           && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
    1091             :         RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
    1092             :     }
    1093     2982204 :     unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
    1094             :     if (NewMaxPressure[ID] >= Limit - 2) {
    1095             :       LLVM_DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
    1096             :                         << NewMaxPressure[ID]
    1097             :                         << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
    1098             :                         << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
    1099             :                         << " livethru)\n");
    1100             :     }
    1101             :   }
    1102     1423163 : }
    1103             : 
    1104             : /// Update the PressureDiff array for liveness after scheduling this
    1105             : /// instruction.
    1106     1572679 : void ScheduleDAGMILive::updatePressureDiffs(
    1107             :     ArrayRef<RegisterMaskPair> LiveUses) {
    1108     4423339 :   for (const RegisterMaskPair &P : LiveUses) {
    1109     1425330 :     unsigned Reg = P.RegUnit;
    1110             :     /// FIXME: Currently assuming single-use physregs.
    1111     1425330 :     if (!TRI->isVirtualRegister(Reg))
    1112             :       continue;
    1113             : 
    1114     1128888 :     if (ShouldTrackLaneMasks) {
    1115             :       // If the register has just become live then other uses won't change
    1116             :       // this fact anymore => decrement pressure.
    1117             :       // If the register has just become dead then other uses make it come
    1118             :       // back to life => increment pressure.
    1119             :       bool Decrement = P.LaneMask.any();
    1120             : 
    1121             :       for (const VReg2SUnit &V2SU
    1122      328886 :            : make_range(VRegUses.find(Reg), VRegUses.end())) {
    1123      514580 :         SUnit &SU = *V2SU.SU;
    1124      514580 :         if (SU.isScheduled || &SU == &ExitSU)
    1125      223002 :           continue;
    1126             : 
    1127      291578 :         PressureDiff &PDiff = getPressureDiff(&SU);
    1128      291578 :         PDiff.addPressureChange(Reg, Decrement, &MRI);
    1129             :         LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU.NodeNum << ") "
    1130             :                           << printReg(Reg, TRI) << ':'
    1131             :                           << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
    1132             :                    dbgs() << "              to "; PDiff.dump(*TRI););
    1133             :       }
    1134             :     } else {
    1135             :       assert(P.LaneMask.any());
    1136             :       LLVM_DEBUG(dbgs() << "  LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
    1137             :       // This may be called before CurrentBottom has been initialized. However,
    1138             :       // BotRPTracker must have a valid position. We want the value live into the
    1139             :       // instruction or live out of the block, so ask for the previous
    1140             :       // instruction's live-out.
    1141      800002 :       const LiveInterval &LI = LIS->getInterval(Reg);
    1142             :       VNInfo *VNI;
    1143             :       MachineBasicBlock::const_iterator I =
    1144     1600004 :         nextIfDebug(BotRPTracker.getPos(), BB->end());
    1145     1600004 :       if (I == BB->end())
    1146       80696 :         VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
    1147             :       else {
    1148     1519308 :         LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
    1149      759654 :         VNI = LRQ.valueIn();
    1150             :       }
    1151             :       // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
    1152             :       assert(VNI && "No live value at use.");
    1153             :       for (const VReg2SUnit &V2SU
    1154      800002 :            : make_range(VRegUses.find(Reg), VRegUses.end())) {
    1155     1788557 :         SUnit *SU = V2SU.SU;
    1156             :         // If this use comes before the reaching def, it cannot be a last use,
    1157             :         // so decrease its pressure change.
    1158     1788557 :         if (!SU->isScheduled && SU != &ExitSU) {
    1159             :           LiveQueryResult LRQ =
    1160     2965196 :               LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
    1161     1482598 :           if (LRQ.valueIn() == VNI) {
    1162     1170137 :             PressureDiff &PDiff = getPressureDiff(SU);
    1163     1170137 :             PDiff.addPressureChange(Reg, true, &MRI);
    1164             :             LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
    1165             :                               << *SU->getInstr();
    1166             :                        dbgs() << "              to "; PDiff.dump(*TRI););
    1167             :           }
    1168             :         }
    1169             :       }
    1170             :     }
    1171             :   }
    1172     1572679 : }
    1173             : 
    1174             : /// schedule - Called back from MachineScheduler::runOnMachineFunction
    1175             : /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
    1176             : /// only includes instructions that have DAG nodes, not scheduling boundaries.
    1177             : ///
    1178             : /// This is a skeletal driver, with all the functionality pushed into helpers,
    1179             : /// so that it can be easily extended by experimental schedulers. Generally,
    1180             : /// implementing MachineSchedStrategy should be sufficient to implement a new
    1181             : /// scheduling algorithm. However, if a scheduler further subclasses
    1182             : /// ScheduleDAGMILive then it will want to override this virtual method in order
    1183             : /// to update any specialized state.
    1184      367711 : void ScheduleDAGMILive::schedule() {
    1185             :   LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
    1186             :   LLVM_DEBUG(SchedImpl->dumpPolicy());
    1187      367711 :   buildDAGWithRegPressure();
    1188             : 
    1189      367711 :   Topo.InitDAGTopologicalSorting();
    1190             : 
    1191      367711 :   postprocessDAG();
    1192             : 
    1193             :   SmallVector<SUnit*, 8> TopRoots, BotRoots;
    1194      367711 :   findRootsAndBiasEdges(TopRoots, BotRoots);
    1195             : 
    1196             :   // Initialize the strategy before modifying the DAG.
    1197             :   // This may initialize a DFSResult to be used for queue priority.
    1198      367711 :   SchedImpl->initialize(this);
    1199             : 
    1200             :   LLVM_DEBUG(if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this);
    1201             :              for (const SUnit &SU
    1202             :                   : SUnits) {
    1203             :                SU.dumpAll(this);
    1204             :                if (ShouldTrackPressure) {
    1205             :                  dbgs() << "  Pressure Diff      : ";
    1206             :                  getPressureDiff(&SU).dump(*TRI);
    1207             :                }
    1208             :                dbgs() << "  Single Issue       : ";
    1209             :                if (SchedModel.mustBeginGroup(SU.getInstr()) &&
    1210             :                    SchedModel.mustEndGroup(SU.getInstr()))
    1211             :                  dbgs() << "true;";
    1212             :                else
    1213             :                  dbgs() << "false;";
    1214             :                dbgs() << '\n';
    1215             :              } if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this););
    1216      367711 :   if (ViewMISchedDAGs) viewGraph();
    1217             : 
    1218             :   // Initialize ready queues now that the DAG and priority data are finalized.
    1219      367711 :   initQueues(TopRoots, BotRoots);
    1220             : 
    1221      367711 :   bool IsTopNode = false;
    1222             :   while (true) {
    1223             :     LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
    1224     2683065 :     SUnit *SU = SchedImpl->pickNode(IsTopNode);
    1225     2683065 :     if (!SU) break;
    1226             : 
    1227             :     assert(!SU->isScheduled && "Node already scheduled");
    1228     2315354 :     if (!checkSchedLimit())
    1229             :       break;
    1230             : 
    1231     2315354 :     scheduleMI(SU, IsTopNode);
    1232             : 
    1233     2315354 :     if (DFSResult) {
    1234             :       unsigned SubtreeID = DFSResult->getSubtreeID(SU);
    1235         176 :       if (!ScheduledTrees.test(SubtreeID)) {
    1236             :         ScheduledTrees.set(SubtreeID);
    1237          38 :         DFSResult->scheduleTree(SubtreeID);
    1238          38 :         SchedImpl->scheduleTree(SubtreeID);
    1239             :       }
    1240             :     }
    1241             : 
    1242             :     // Notify the scheduling strategy after updating the DAG.
    1243     2315354 :     SchedImpl->schedNode(SU, IsTopNode);
    1244             : 
    1245     2315354 :     updateQueues(SU, IsTopNode);
    1246     2315354 :   }
    1247             :   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
    1248             : 
    1249      367711 :   placeDebugValues();
    1250             : 
    1251             :   LLVM_DEBUG({
    1252             :     dbgs() << "*** Final schedule for "
    1253             :            << printMBBReference(*begin()->getParent()) << " ***\n";
    1254             :     dumpSchedule();
    1255             :     dbgs() << '\n';
    1256             :   });
    1257      367711 : }
    1258             : 
    1259             : /// Build the DAG and setup three register pressure trackers.
    1260      372738 : void ScheduleDAGMILive::buildDAGWithRegPressure() {
    1261      372738 :   if (!ShouldTrackPressure) {
    1262      259070 :     RPTracker.reset();
    1263             :     RegionCriticalPSets.clear();
    1264      259070 :     buildSchedGraph(AA);
    1265      259070 :     return;
    1266             :   }
    1267             : 
    1268             :   // Initialize the register pressure tracker used by buildSchedGraph.
    1269      341004 :   RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
    1270      113668 :                  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
    1271             : 
    1272             :   // Account for liveness generate by the region boundary.
    1273      113668 :   if (LiveRegionEnd != RegionEnd)
    1274      107579 :     RPTracker.recede();
    1275             : 
    1276             :   // Build the DAG, and compute current register pressure.
    1277      113668 :   buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
    1278             : 
    1279             :   // Initialize top/bottom trackers after computing region pressure.
    1280      113668 :   initRegPressure();
    1281             : }
    1282             : 
    1283          10 : void ScheduleDAGMILive::computeDFSResult() {
    1284          10 :   if (!DFSResult)
    1285          16 :     DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
    1286          10 :   DFSResult->clear();
    1287             :   ScheduledTrees.clear();
    1288          10 :   DFSResult->resize(SUnits.size());
    1289          20 :   DFSResult->compute(SUnits);
    1290          20 :   ScheduledTrees.resize(DFSResult->getNumSubtrees());
    1291          10 : }
    1292             : 
    1293             : /// Compute the max cyclic critical path through the DAG. The scheduling DAG
    1294             : /// only provides the critical path for single block loops. To handle loops that
    1295             : /// span blocks, we could use the vreg path latencies provided by
    1296             : /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
    1297             : /// available for use in the scheduler.
    1298             : ///
    1299             : /// The cyclic path estimation identifies a def-use pair that crosses the back
    1300             : /// edge and considers the depth and height of the nodes. For example, consider
    1301             : /// the following instruction sequence where each instruction has unit latency
    1302             : /// and defines an epomymous virtual register:
    1303             : ///
    1304             : /// a->b(a,c)->c(b)->d(c)->exit
    1305             : ///
    1306             : /// The cyclic critical path is a two cycles: b->c->b
    1307             : /// The acyclic critical path is four cycles: a->b->c->d->exit
    1308             : /// LiveOutHeight = height(c) = len(c->d->exit) = 2
    1309             : /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
    1310             : /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
    1311             : /// LiveInDepth = depth(b) = len(a->b) = 1
    1312             : ///
    1313             : /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
    1314             : /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
    1315             : /// CyclicCriticalPath = min(2, 2) = 2
    1316             : ///
    1317             : /// This could be relevant to PostRA scheduling, but is currently implemented
    1318             : /// assuming LiveIntervals.
    1319      333156 : unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
    1320             :   // This only applies to single block loop.
    1321      333156 :   if (!BB->isSuccessor(BB))
    1322             :     return 0;
    1323             : 
    1324             :   unsigned MaxCyclicLatency = 0;
    1325             :   // Visit each live out vreg def to find def/use pairs that cross iterations.
    1326       11742 :   for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
    1327        3588 :     unsigned Reg = P.RegUnit;
    1328        3588 :     if (!TRI->isVirtualRegister(Reg))
    1329           7 :         continue;
    1330        3581 :     const LiveInterval &LI = LIS->getInterval(Reg);
    1331        7162 :     const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
    1332        3581 :     if (!DefVNI)
    1333         110 :       continue;
    1334             : 
    1335             :     MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
    1336             :     const SUnit *DefSU = getSUnit(DefMI);
    1337        1436 :     if (!DefSU)
    1338        2035 :       continue;
    1339             : 
    1340             :     unsigned LiveOutHeight = DefSU->getHeight();
    1341        1436 :     unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
    1342             :     // Visit all local users of the vreg def.
    1343             :     for (const VReg2SUnit &V2SU
    1344        1436 :          : make_range(VRegUses.find(Reg), VRegUses.end())) {
    1345        4855 :       SUnit *SU = V2SU.SU;
    1346        4855 :       if (SU == &ExitSU)
    1347         646 :         continue;
    1348             : 
    1349             :       // Only consider uses of the phi.
    1350        9710 :       LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
    1351        9710 :       if (!LRQ.valueIn()->isPHIDef())
    1352         646 :         continue;
    1353             : 
    1354             :       // Assume that a path spanning two iterations is a cycle, which could
    1355             :       // overestimate in strange cases. This allows cyclic latency to be
    1356             :       // estimated as the minimum slack of the vreg's depth or height.
    1357             :       unsigned CyclicLatency = 0;
    1358        4209 :       if (LiveOutDepth > SU->getDepth())
    1359        4207 :         CyclicLatency = LiveOutDepth - SU->getDepth();
    1360             : 
    1361        4209 :       unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
    1362        4209 :       if (LiveInHeight > LiveOutHeight) {
    1363        4208 :         if (LiveInHeight - LiveOutHeight < CyclicLatency)
    1364             :           CyclicLatency = LiveInHeight - LiveOutHeight;
    1365             :       } else
    1366             :         CyclicLatency = 0;
    1367             : 
    1368             :       LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
    1369             :                         << SU->NodeNum << ") = " << CyclicLatency << "c\n");
    1370        4209 :       if (CyclicLatency > MaxCyclicLatency)
    1371             :         MaxCyclicLatency = CyclicLatency;
    1372             :     }
    1373             :   }
    1374             :   LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
    1375             :   return MaxCyclicLatency;
    1376             : }
    1377             : 
    1378             : /// Release ExitSU predecessors and setup scheduler queues. Re-position
    1379             : /// the Top RP tracker in case the region beginning has changed.
    1380      372738 : void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
    1381             :                                    ArrayRef<SUnit*> BotRoots) {
    1382      372738 :   ScheduleDAGMI::initQueues(TopRoots, BotRoots);
    1383      372738 :   if (ShouldTrackPressure) {
    1384             :     assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
    1385             :     TopRPTracker.setPos(CurrentTop);
    1386             :   }
    1387      372738 : }
    1388             : 
    1389             : /// Move an instruction and update register pressure.
    1390     2345003 : void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
    1391             :   // Move the instruction to its new location in the instruction stream.
    1392     2345003 :   MachineInstr *MI = SU->getInstr();
    1393             : 
    1394     2345003 :   if (IsTopNode) {
    1395             :     assert(SU->isTopReady() && "node still has unscheduled dependencies");
    1396       85352 :     if (&*CurrentTop == MI)
    1397       71835 :       CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
    1398             :     else {
    1399       13517 :       moveInstruction(MI, CurrentTop);
    1400             :       TopRPTracker.setPos(MI);
    1401             :     }
    1402             : 
    1403       85352 :     if (ShouldTrackPressure) {
    1404             :       // Update top scheduled pressure.
    1405       71731 :       RegisterOperands RegOpers;
    1406       71731 :       RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
    1407       71731 :       if (ShouldTrackLaneMasks) {
    1408             :         // Adjust liveness and add missing dead+read-undef flags.
    1409      114184 :         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
    1410       57092 :         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
    1411             :       } else {
    1412             :         // Adjust for missing dead-def flags.
    1413       14639 :         RegOpers.detectDeadDefs(*MI, *LIS);
    1414             :       }
    1415             : 
    1416       71731 :       TopRPTracker.advance(RegOpers);
    1417             :       assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
    1418             :       LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
    1419             :                      TopRPTracker.getRegSetPressureAtPos(), TRI););
    1420             : 
    1421       71731 :       updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
    1422             :     }
    1423             :   } else {
    1424             :     assert(SU->isBottomReady() && "node still has unscheduled dependencies");
    1425             :     MachineBasicBlock::iterator priorII =
    1426             :       priorNonDebug(CurrentBottom, CurrentTop);
    1427     2259651 :     if (&*priorII == MI)
    1428     2095006 :       CurrentBottom = priorII;
    1429             :     else {
    1430      164645 :       if (&*CurrentTop == MI) {
    1431       18594 :         CurrentTop = nextIfDebug(++CurrentTop, priorII);
    1432             :         TopRPTracker.setPos(CurrentTop);
    1433             :       }
    1434      164645 :       moveInstruction(MI, CurrentBottom);
    1435      164645 :       CurrentBottom = MI;
    1436             :       BotRPTracker.setPos(CurrentBottom);
    1437             :     }
    1438     2259651 :     if (ShouldTrackPressure) {
    1439     1351432 :       RegisterOperands RegOpers;
    1440     1351432 :       RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
    1441     1351432 :       if (ShouldTrackLaneMasks) {
    1442             :         // Adjust liveness and add missing dead+read-undef flags.
    1443      501904 :         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
    1444      250952 :         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
    1445             :       } else {
    1446             :         // Adjust for missing dead-def flags.
    1447     1100480 :         RegOpers.detectDeadDefs(*MI, *LIS);
    1448             :       }
    1449             : 
    1450     1351432 :       if (BotRPTracker.getPos() != CurrentBottom)
    1451     1207101 :         BotRPTracker.recedeSkipDebugValues();
    1452             :       SmallVector<RegisterMaskPair, 8> LiveUses;
    1453     1351432 :       BotRPTracker.recede(RegOpers, &LiveUses);
    1454             :       assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
    1455             :       LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
    1456             :                      BotRPTracker.getRegSetPressureAtPos(), TRI););
    1457             : 
    1458     1351432 :       updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
    1459     1351432 :       updatePressureDiffs(LiveUses);
    1460             :     }
    1461             :   }
    1462     2345003 : }
    1463             : 
    1464             : //===----------------------------------------------------------------------===//
    1465             : // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
    1466             : //===----------------------------------------------------------------------===//
    1467             : 
    1468             : namespace {
    1469             : 
    1470             : /// Post-process the DAG to create cluster edges between neighboring
    1471             : /// loads or between neighboring stores.
    1472             : class BaseMemOpClusterMutation : public ScheduleDAGMutation {
    1473             :   struct MemOpInfo {
    1474             :     SUnit *SU;
    1475             :     unsigned BaseReg;
    1476             :     int64_t Offset;
    1477             : 
    1478             :     MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
    1479       54696 :         : SU(su), BaseReg(reg), Offset(ofs) {}
    1480             : 
    1481             :     bool operator<(const MemOpInfo&RHS) const {
    1482      103667 :       return std::tie(BaseReg, Offset, SU->NodeNum) <
    1483      106840 :              std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum);
    1484             :     }
    1485             :   };
    1486             : 
    1487             :   const TargetInstrInfo *TII;
    1488             :   const TargetRegisterInfo *TRI;
    1489             :   bool IsLoad;
    1490             : 
    1491             : public:
    1492             :   BaseMemOpClusterMutation(const TargetInstrInfo *tii,
    1493             :                            const TargetRegisterInfo *tri, bool IsLoad)
    1494       61280 :       : TII(tii), TRI(tri), IsLoad(IsLoad) {}
    1495             : 
    1496             :   void apply(ScheduleDAGInstrs *DAGInstrs) override;
    1497             : 
    1498             : protected:
    1499             :   void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
    1500             : };
    1501             : 
    1502       30640 : class StoreClusterMutation : public BaseMemOpClusterMutation {
    1503             : public:
    1504             :   StoreClusterMutation(const TargetInstrInfo *tii,
    1505             :                        const TargetRegisterInfo *tri)
    1506       30640 :       : BaseMemOpClusterMutation(tii, tri, false) {}
    1507             : };
    1508             : 
    1509       30640 : class LoadClusterMutation : public BaseMemOpClusterMutation {
    1510             : public:
    1511             :   LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
    1512       30640 :       : BaseMemOpClusterMutation(tii, tri, true) {}
    1513             : };
    1514             : 
    1515             : } // end anonymous namespace
    1516             : 
    1517             : namespace llvm {
    1518             : 
    1519             : std::unique_ptr<ScheduleDAGMutation>
    1520       30640 : createLoadClusterDAGMutation(const TargetInstrInfo *TII,
    1521             :                              const TargetRegisterInfo *TRI) {
    1522       30640 :   return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
    1523       30640 :                             : nullptr;
    1524             : }
    1525             : 
    1526             : std::unique_ptr<ScheduleDAGMutation>
    1527       30640 : createStoreClusterDAGMutation(const TargetInstrInfo *TII,
    1528             :                               const TargetRegisterInfo *TRI) {
    1529       30640 :   return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
    1530       30640 :                             : nullptr;
    1531             : }
    1532             : 
    1533             : } // end namespace llvm
    1534             : 
    1535       54861 : void BaseMemOpClusterMutation::clusterNeighboringMemOps(
    1536             :     ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
    1537             :   SmallVector<MemOpInfo, 32> MemOpRecords;
    1538      242683 :   for (SUnit *SU : MemOps) {
    1539             :     unsigned BaseReg;
    1540             :     int64_t Offset;
    1541       93911 :     if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
    1542      109392 :       MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
    1543             :   }
    1544       54861 :   if (MemOpRecords.size() < 2)
    1545             :     return;
    1546             : 
    1547             :   llvm::sort(MemOpRecords.begin(), MemOpRecords.end());
    1548             :   unsigned ClusterLength = 1;
    1549       12508 :   for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
    1550       45516 :     SUnit *SUa = MemOpRecords[Idx].SU;
    1551       45516 :     SUnit *SUb = MemOpRecords[Idx+1].SU;
    1552       68274 :     if (TII->shouldClusterMemOps(*SUa->getInstr(), MemOpRecords[Idx].BaseReg,
    1553       22758 :                                  *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg,
    1554       57468 :                                  ClusterLength) &&
    1555       34710 :         DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
    1556             :       LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
    1557             :                         << SUb->NodeNum << ")\n");
    1558             :       // Copy successor edges from SUa to SUb. Interleaving computation
    1559             :       // dependent on SUa can prevent load combining due to register reuse.
    1560             :       // Predecessor edges do not need to be copied from SUb to SUa since nearby
    1561             :       // loads should have effectively the same inputs.
    1562      120138 :       for (const SDep &Succ : SUa->Succs) {
    1563       54098 :         if (Succ.getSUnit() == SUb)
    1564             :           continue;
    1565             :         LLVM_DEBUG(dbgs() << "  Copy Succ SU(" << Succ.getSUnit()->NodeNum
    1566             :                           << ")\n");
    1567       41670 :         DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
    1568             :       }
    1569       11942 :       ++ClusterLength;
    1570             :     } else
    1571             :       ClusterLength = 1;
    1572             :   }
    1573             : }
    1574             : 
    1575             : /// Callback from DAG postProcessing to create cluster edges for loads.
    1576       67012 : void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
    1577             :   ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
    1578             : 
    1579             :   // Map DAG NodeNum to store chain ID.
    1580             :   DenseMap<unsigned, unsigned> StoreChainIDs;
    1581             :   // Map each store chain to a set of dependent MemOps.
    1582       67012 :   SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
    1583      827558 :   for (SUnit &SU : DAG->SUnits) {
    1584     1860702 :     if ((IsLoad && !SU.getInstr()->mayLoad()) ||
    1585      813794 :         (!IsLoad && !SU.getInstr()->mayStore()))
    1586      666635 :       continue;
    1587             : 
    1588      187822 :     unsigned ChainPredID = DAG->SUnits.size();
    1589      423743 :     for (const SDep &Pred : SU.Preds) {
    1590      198511 :       if (Pred.isCtrl()) {
    1591       33595 :         ChainPredID = Pred.getSUnit()->NodeNum;
    1592       33595 :         break;
    1593             :       }
    1594             :     }
    1595             :     // Check if this chain-like pred has been seen
    1596             :     // before. ChainPredID==MaxNodeID at the top of the schedule.
    1597       93911 :     unsigned NumChains = StoreChainDependents.size();
    1598             :     std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
    1599      187822 :       StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
    1600       93911 :     if (Result.second)
    1601       54861 :       StoreChainDependents.resize(NumChains + 1);
    1602      187822 :     StoreChainDependents[Result.first->second].push_back(&SU);
    1603             :   }
    1604             : 
    1605             :   // Iterate over the store chains.
    1606      176734 :   for (auto &SCD : StoreChainDependents)
    1607       54861 :     clusterNeighboringMemOps(SCD, DAG);
    1608       67012 : }
    1609             : 
    1610             : //===----------------------------------------------------------------------===//
    1611             : // CopyConstrain - DAG post-processing to encourage copy elimination.
    1612             : //===----------------------------------------------------------------------===//
    1613             : 
    1614             : namespace {
    1615             : 
    1616             : /// Post-process the DAG to create weak edges from all uses of a copy to
    1617             : /// the one use that defines the copy's source vreg, most likely an induction
    1618             : /// variable increment.
    1619      134395 : class CopyConstrain : public ScheduleDAGMutation {
    1620             :   // Transient state.
    1621             :   SlotIndex RegionBeginIdx;
    1622             : 
    1623             :   // RegionEndIdx is the slot index of the last non-debug instruction in the
    1624             :   // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
    1625             :   SlotIndex RegionEndIdx;
    1626             : 
    1627             : public:
    1628      134395 :   CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
    1629             : 
    1630             :   void apply(ScheduleDAGInstrs *DAGInstrs) override;
    1631             : 
    1632             : protected:
    1633             :   void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
    1634             : };
    1635             : 
    1636             : } // end anonymous namespace
    1637             : 
    1638             : namespace llvm {
    1639             : 
    1640             : std::unique_ptr<ScheduleDAGMutation>
    1641      134395 : createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
    1642             :                                const TargetRegisterInfo *TRI) {
    1643      134395 :   return llvm::make_unique<CopyConstrain>(TII, TRI);
    1644             : }
    1645             : 
    1646             : } // end namespace llvm
    1647             : 
    1648             : /// constrainLocalCopy handles two possibilities:
    1649             : /// 1) Local src:
    1650             : /// I0:     = dst
    1651             : /// I1: src = ...
    1652             : /// I2:     = dst
    1653             : /// I3: dst = src (copy)
    1654             : /// (create pred->succ edges I0->I1, I2->I1)
    1655             : ///
    1656             : /// 2) Local copy:
    1657             : /// I0: dst = src (copy)
    1658             : /// I1:     = dst
    1659             : /// I2: src = ...
    1660             : /// I3:     = dst
    1661             : /// (create pred->succ edges I1->I2, I3->I2)
    1662             : ///
    1663             : /// Although the MachineScheduler is currently constrained to single blocks,
    1664             : /// this algorithm should handle extended blocks. An EBB is a set of
    1665             : /// contiguously numbered blocks such that the previous block in the EBB is
    1666             : /// always the single predecessor.
    1667      616761 : void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
    1668      616761 :   LiveIntervals *LIS = DAG->getLIS();
    1669      616761 :   MachineInstr *Copy = CopySU->getInstr();
    1670             : 
    1671             :   // Check for pure vreg copies.
    1672      616761 :   const MachineOperand &SrcOp = Copy->getOperand(1);
    1673      616761 :   unsigned SrcReg = SrcOp.getReg();
    1674      616761 :   if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
    1675      603774 :     return;
    1676             : 
    1677             :   const MachineOperand &DstOp = Copy->getOperand(0);
    1678      376469 :   unsigned DstReg = DstOp.getReg();
    1679      412162 :   if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
    1680             :     return;
    1681             : 
    1682             :   // Check if either the dest or source is local. If it's live across a back
    1683             :   // edge, it's not local. Note that if both vregs are live across the back
    1684             :   // edge, we cannot successfully contrain the copy without cyclic scheduling.
    1685             :   // If both the copy's source and dest are local live intervals, then we
    1686             :   // should treat the dest as the global for the purpose of adding
    1687             :   // constraints. This adds edges from source's other uses to the copy.
    1688             :   unsigned LocalReg = SrcReg;
    1689             :   unsigned GlobalReg = DstReg;
    1690       35667 :   LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
    1691       35667 :   if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
    1692             :     LocalReg = DstReg;
    1693             :     GlobalReg = SrcReg;
    1694       11404 :     LocalLI = &LIS->getInterval(LocalReg);
    1695       11404 :     if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
    1696             :       return;
    1697             :   }
    1698       30651 :   LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
    1699             : 
    1700             :   // Find the global segment after the start of the local LI.
    1701       61302 :   LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
    1702             :   // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
    1703             :   // local live range. We could create edges from other global uses to the local
    1704             :   // start, but the coalescer should have already eliminated these cases, so
    1705             :   // don't bother dealing with it.
    1706       30651 :   if (GlobalSegment == GlobalLI->end())
    1707             :     return;
    1708             : 
    1709             :   // If GlobalSegment is killed at the LocalLI->start, the call to find()
    1710             :   // returned the next global segment. But if GlobalSegment overlaps with
    1711             :   // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
    1712             :   // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
    1713       30635 :   if (GlobalSegment->contains(LocalLI->beginIndex()))
    1714        6741 :     ++GlobalSegment;
    1715             : 
    1716       30635 :   if (GlobalSegment == GlobalLI->end())
    1717             :     return;
    1718             : 
    1719             :   // Check if GlobalLI contains a hole in the vicinity of LocalLI.
    1720       26852 :   if (GlobalSegment != GlobalLI->begin()) {
    1721             :     // Two address defs have no hole.
    1722        3060 :     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
    1723             :                                GlobalSegment->start)) {
    1724             :       return;
    1725             :     }
    1726             :     // If the prior global segment may be defined by the same two-address
    1727             :     // instruction that also defines LocalLI, then can't make a hole here.
    1728        1708 :     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
    1729             :                                LocalLI->beginIndex())) {
    1730             :       return;
    1731             :     }
    1732             :     // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
    1733             :     // it would be a disconnected component in the live range.
    1734             :     assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
    1735             :            "Disconnected LRG within the scheduling region.");
    1736             :   }
    1737             :   MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
    1738       25500 :   if (!GlobalDef)
    1739             :     return;
    1740             : 
    1741             :   SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
    1742       24373 :   if (!GlobalSU)
    1743             :     return;
    1744             : 
    1745             :   // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
    1746             :   // constraining the uses of the last local def to precede GlobalDef.
    1747             :   SmallVector<SUnit*,8> LocalUses;
    1748       48746 :   const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
    1749             :   MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
    1750             :   SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
    1751       69701 :   for (const SDep &Succ : LastLocalSU->Succs) {
    1752       35814 :     if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
    1753             :       continue;
    1754       45347 :     if (Succ.getSUnit() == GlobalSU)
    1755             :       continue;
    1756       18913 :     if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
    1757             :       return;
    1758        7605 :     LocalUses.push_back(Succ.getSUnit());
    1759             :   }
    1760             :   // Open the top of the GlobalLI hole by constraining any earlier global uses
    1761             :   // to precede the start of LocalLI.
    1762             :   SmallVector<SUnit*,8> GlobalUses;
    1763             :   MachineInstr *FirstLocalDef =
    1764             :     LIS->getInstructionFromIndex(LocalLI->beginIndex());
    1765             :   SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
    1766       40929 :   for (const SDep &Pred : GlobalSU->Preds) {
    1767       27042 :     if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
    1768             :       continue;
    1769        1371 :     if (Pred.getSUnit() == FirstLocalSU)
    1770             :       continue;
    1771         585 :     if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
    1772             :       return;
    1773         507 :     GlobalUses.push_back(Pred.getSUnit());
    1774             :   }
    1775             :   LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
    1776             :   // Add the weak edges.
    1777        1882 :   for (SmallVectorImpl<SUnit*>::const_iterator
    1778       14869 :          I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
    1779             :     LLVM_DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU("
    1780             :                       << GlobalSU->NodeNum << ")\n");
    1781        3764 :     DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
    1782             :   }
    1783         491 :   for (SmallVectorImpl<SUnit*>::const_iterator
    1784       13478 :          I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
    1785             :     LLVM_DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU("
    1786             :                       << FirstLocalSU->NodeNum << ")\n");
    1787         982 :     DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
    1788             :   }
    1789             : }
    1790             : 
    1791             : /// Callback from DAG postProcessing to create weak edges to encourage
    1792             : /// copy elimination.
    1793      351129 : void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
    1794             :   ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
    1795             :   assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
    1796             : 
    1797             :   MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
    1798      351129 :   if (FirstPos == DAG->end())
    1799             :     return;
    1800      701820 :   RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
    1801      701820 :   RegionEndIdx = DAG->getLIS()->getInstructionIndex(
    1802      350910 :       *priorNonDebug(DAG->end(), DAG->begin()));
    1803             : 
    1804     2334261 :   for (SUnit &SU : DAG->SUnits) {
    1805     3966702 :     if (!SU.getInstr()->isCopy())
    1806     1366590 :       continue;
    1807             : 
    1808      616761 :     constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
    1809             :   }
    1810             : }
    1811             : 
    1812             : //===----------------------------------------------------------------------===//
    1813             : // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
    1814             : // and possibly other custom schedulers.
    1815             : //===----------------------------------------------------------------------===//
    1816             : 
    1817             : static const unsigned InvalidCycle = ~0U;
    1818             : 
    1819      631982 : SchedBoundary::~SchedBoundary() { delete HazardRec; }
    1820             : 
    1821             : /// Given a Count of resource usage and a Latency value, return true if a
    1822             : /// SchedBoundary becomes resource limited.
    1823             : static bool checkResourceLimit(unsigned LFactor, unsigned Count,
    1824             :                                unsigned Latency) {
    1825     3457214 :   return (int)(Count - (Latency * LFactor)) > (int)LFactor;
    1826             : }
    1827             : 
    1828     1063682 : void SchedBoundary::reset() {
    1829             :   // A new HazardRec is created for each DAG and owned by SchedBoundary.
    1830             :   // Destroying and reconstructing it is very expensive though. So keep
    1831             :   // invalid, placeholder HazardRecs.
    1832     1063682 :   if (HazardRec && HazardRec->isEnabled()) {
    1833        6182 :     delete HazardRec;
    1834        6182 :     HazardRec = nullptr;
    1835             :   }
    1836             :   Available.clear();
    1837             :   Pending.clear();
    1838     1063682 :   CheckPending = false;
    1839     1063682 :   CurrCycle = 0;
    1840     1063682 :   CurrMOps = 0;
    1841     1063682 :   MinReadyCycle = std::numeric_limits<unsigned>::max();
    1842     1063682 :   ExpectedLatency = 0;
    1843     1063682 :   DependentLatency = 0;
    1844     1063682 :   RetiredMOps = 0;
    1845     1063682 :   MaxExecutedResCount = 0;
    1846     1063682 :   ZoneCritResIdx = 0;
    1847     1063682 :   IsResourceLimited = false;
    1848             :   ReservedCycles.clear();
    1849             : #ifndef NDEBUG
    1850             :   // Track the maximum number of stall cycles that could arise either from the
    1851             :   // latency of a DAG edge or the number of cycles that a processor resource is
    1852             :   // reserved (SchedBoundary::ReservedCycles).
    1853             :   MaxObservedStall = 0;
    1854             : #endif
    1855             :   // Reserve a zero-count for invalid CritResIdx.
    1856     1063682 :   ExecutedResCounts.resize(1);
    1857             :   assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
    1858     1063682 : }
    1859             : 
    1860      382272 : void SchedRemainder::
    1861             : init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
    1862             :   reset();
    1863      382272 :   if (!SchedModel->hasInstrSchedModel())
    1864             :     return;
    1865      277490 :   RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
    1866     1056396 :   for (SUnit &SU : DAG->SUnits) {
    1867      917651 :     const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
    1868     1835302 :     RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
    1869      917651 :       * SchedModel->getMicroOpFactor();
    1870     1629947 :     for (TargetSchedModel::ProcResIter
    1871      917651 :            PI = SchedModel->getWriteProcResBegin(SC),
    1872     2547598 :            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
    1873     1629947 :       unsigned PIdx = PI->ProcResourceIdx;
    1874             :       unsigned Factor = SchedModel->getResourceFactor(PIdx);
    1875     1629947 :       RemainingCounts[PIdx] += (Factor * PI->Cycles);
    1876             :     }
    1877             :   }
    1878             : }
    1879             : 
    1880      747691 : void SchedBoundary::
    1881             : init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
    1882      747691 :   reset();
    1883      747691 :   DAG = dag;
    1884      747691 :   SchedModel = smodel;
    1885      747691 :   Rem = rem;
    1886      747691 :   if (SchedModel->hasInstrSchedModel()) {
    1887      550554 :     ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
    1888      550554 :     ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
    1889             :   }
    1890      747691 : }
    1891             : 
    1892             : /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
    1893             : /// these "soft stalls" differently than the hard stall cycles based on CPU
    1894             : /// resources and computed by checkHazard(). A fully in-order model
    1895             : /// (MicroOpBufferSize==0) will not make use of this since instructions are not
    1896             : /// available for scheduling until they are ready. However, a weaker in-order
    1897             : /// model may use this for heuristics. For example, if a processor has in-order
    1898             : /// behavior when reading certain resources, this may come into play.
    1899    21953414 : unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
    1900    21953414 :   if (!SU->isUnbuffered)
    1901             :     return 0;
    1902             : 
    1903     2459575 :   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
    1904     2459575 :   if (ReadyCycle > CurrCycle)
    1905       62682 :     return ReadyCycle - CurrCycle;
    1906             :   return 0;
    1907             : }
    1908             : 
    1909             : /// Compute the next cycle at which the given processor resource can be
    1910             : /// scheduled.
    1911     1632829 : unsigned SchedBoundary::
    1912             : getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
    1913     3265658 :   unsigned NextUnreserved = ReservedCycles[PIdx];
    1914             :   // If this resource has never been used, always return cycle zero.
    1915     1632829 :   if (NextUnreserved == InvalidCycle)
    1916             :     return 0;
    1917             :   // For bottom-up scheduling add the cycles needed for the current operation.
    1918        2232 :   if (!isTop())
    1919         944 :     NextUnreserved += Cycles;
    1920             :   return NextUnreserved;
    1921             : }
    1922             : 
    1923             : /// Does this SU have a hazard within the current instruction group.
    1924             : ///
    1925             : /// The scheduler supports two modes of hazard recognition. The first is the
    1926             : /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
    1927             : /// supports highly complicated in-order reservation tables
    1928             : /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
    1929             : ///
    1930             : /// The second is a streamlined mechanism that checks for hazards based on
    1931             : /// simple counters that the scheduler itself maintains. It explicitly checks
    1932             : /// for instruction dispatch limitations, including the number of micro-ops that
    1933             : /// can dispatch per cycle.
    1934             : ///
    1935             : /// TODO: Also check whether the SU must start a new group.
    1936    10679126 : bool SchedBoundary::checkHazard(SUnit *SU) {
    1937    10679126 :   if (HazardRec->isEnabled()
    1938    10679126 :       && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
    1939             :     return true;
    1940             :   }
    1941             : 
    1942    10657982 :   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
    1943    10657982 :   if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
    1944             :     LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
    1945             :                       << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
    1946             :     return true;
    1947             :   }
    1948             : 
    1949    18648241 :   if (CurrMOps > 0 &&
    1950     8076692 :       ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
    1951     7973754 :        (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
    1952             :     LLVM_DEBUG(dbgs() << "  hazard: SU(" << SU->NodeNum << ") must "
    1953             :                       << (isTop() ? "begin" : "end") << " group\n");
    1954             :     return true;
    1955             :   }
    1956             : 
    1957    10622710 :   if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
    1958        1891 :     const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
    1959        1927 :     for (const MCWriteProcResEntry &PE :
    1960             :           make_range(SchedModel->getWriteProcResBegin(SC),
    1961        5709 :                      SchedModel->getWriteProcResEnd(SC))) {
    1962        2521 :       unsigned ResIdx = PE.ProcResourceIdx;
    1963        2521 :       unsigned Cycles = PE.Cycles;
    1964        2521 :       unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles);
    1965        2521 :       if (NRCycle > CurrCycle) {
    1966             : #ifndef NDEBUG
    1967             :         MaxObservedStall = std::max(Cycles, MaxObservedStall);
    1968             : #endif
    1969             :         LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "
    1970             :                           << SchedModel->getResourceName(ResIdx) << "="
    1971             :                           << NRCycle << "c\n");
    1972             :         return true;
    1973             :       }
    1974             :     }
    1975             :   }
    1976             :   return false;
    1977             : }
    1978             : 
    1979             : // Find the unscheduled node in ReadySUs with the highest latency.
    1980     1195630 : unsigned SchedBoundary::
    1981             : findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
    1982             :   SUnit *LateSU = nullptr;
    1983             :   unsigned RemLatency = 0;
    1984    12287648 :   for (SUnit *SU : ReadySUs) {
    1985     5546009 :     unsigned L = getUnscheduledLatency(SU);
    1986     5546009 :     if (L > RemLatency) {
    1987             :       RemLatency = L;
    1988             :       LateSU = SU;
    1989             :     }
    1990             :   }
    1991             :   if (LateSU) {
    1992             :     LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
    1993             :                       << LateSU->NodeNum << ") " << RemLatency << "c\n");
    1994             :   }
    1995     1195630 :   return RemLatency;
    1996             : }
    1997             : 
    1998             : // Count resources in this zone and the remaining unscheduled
    1999             : // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
    2000             : // resource index, or zero if the zone is issue limited.
    2001      571976 : unsigned SchedBoundary::
    2002             : getOtherResourceCount(unsigned &OtherCritIdx) {
    2003      571976 :   OtherCritIdx = 0;
    2004      571976 :   if (!SchedModel->hasInstrSchedModel())
    2005             :     return 0;
    2006             : 
    2007      392218 :   unsigned OtherCritCount = Rem->RemIssueCount
    2008      392218 :     + (RetiredMOps * SchedModel->getMicroOpFactor());
    2009             :   LLVM_DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
    2010             :                     << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
    2011     2521672 :   for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
    2012     2913890 :        PIdx != PEnd; ++PIdx) {
    2013     2521672 :     unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
    2014     2521672 :     if (OtherCount > OtherCritCount) {
    2015             :       OtherCritCount = OtherCount;
    2016       16194 :       OtherCritIdx = PIdx;
    2017             :     }
    2018             :   }
    2019             :   if (OtherCritIdx) {
    2020             :     LLVM_DEBUG(
    2021             :         dbgs() << "  " << Available.getName() << " + Remain CritRes: "
    2022             :                << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
    2023             :                << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
    2024             :   }
    2025      392218 :   return OtherCritCount;
    2026             : }
    2027             : 
    2028     3329175 : void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
    2029             :   assert(SU->getInstr() && "Scheduled SUnit must have instr");
    2030             : 
    2031             : #ifndef NDEBUG
    2032             :   // ReadyCycle was been bumped up to the CurrCycle when this node was
    2033             :   // scheduled, but CurrCycle may have been eagerly advanced immediately after
    2034             :   // scheduling, so may now be greater than ReadyCycle.
    2035             :   if (ReadyCycle > CurrCycle)
    2036             :     MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
    2037             : #endif
    2038             : 
    2039     3329175 :   if (ReadyCycle < MinReadyCycle)
    2040      754970 :     MinReadyCycle = ReadyCycle;
    2041             : 
    2042             :   // Check for interlocks first. For the purpose of other heuristics, an
    2043             :   // instruction that cannot issue appears as if it's not in the ReadyQueue.
    2044     3329175 :   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
    2045     6537848 :   if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
    2046             :       Available.size() >= ReadyListLimit)
    2047             :     Pending.push(SU);
    2048             :   else
    2049             :     Available.push(SU);
    2050     3329175 : }
    2051             : 
    2052             : /// Move the boundary of scheduled code by one cycle.
    2053      744657 : void SchedBoundary::bumpCycle(unsigned NextCycle) {
    2054      744657 :   if (SchedModel->getMicroOpBufferSize() == 0) {
    2055             :     assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
    2056             :            "MinReadyCycle uninitialized");
    2057      248569 :     if (MinReadyCycle > NextCycle)
    2058             :       NextCycle = MinReadyCycle;
    2059             :   }
    2060             :   // Update the current micro-ops, which will issue in the next cycle.
    2061      744657 :   unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
    2062      744657 :   CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
    2063             : 
    2064             :   // Decrement DependentLatency based on the next cycle.
    2065      744657 :   if ((NextCycle - CurrCycle) > DependentLatency)
    2066       96587 :     DependentLatency = 0;
    2067             :   else
    2068      648070 :     DependentLatency -= (NextCycle - CurrCycle);
    2069             : 
    2070      744657 :   if (!HazardRec->isEnabled()) {
    2071             :     // Bypass HazardRec virtual calls.
    2072      687078 :     CurrCycle = NextCycle;
    2073             :   } else {
    2074             :     // Bypass getHazardType calls in case of long latency.
    2075      218043 :     for (; CurrCycle != NextCycle; ++CurrCycle) {
    2076       80232 :       if (isTop())
    2077       41591 :         HazardRec->AdvanceCycle();
    2078             :       else
    2079       38641 :         HazardRec->RecedeCycle();
    2080             :     }
    2081             :   }
    2082      744657 :   CheckPending = true;
    2083      744657 :   IsResourceLimited =
    2084      744657 :       checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
    2085             :                          getScheduledLatency());
    2086             : 
    2087             :   LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
    2088             :                     << '\n');
    2089      744657 : }
    2090             : 
    2091     1629947 : void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
    2092     3259894 :   ExecutedResCounts[PIdx] += Count;
    2093     1629947 :   if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
    2094      528754 :     MaxExecutedResCount = ExecutedResCounts[PIdx];
    2095     1629947 : }
    2096             : 
    2097             : /// Add the given processor resource to this scheduled zone.
    2098             : ///
    2099             : /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
    2100             : /// during which this resource is consumed.
    2101             : ///
    2102             : /// \return the next cycle at which the instruction may execute without
    2103             : /// oversubscribing resources.
    2104     1629947 : unsigned SchedBoundary::
    2105             : countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
    2106     1629947 :   unsigned Factor = SchedModel->getResourceFactor(PIdx);
    2107     1629947 :   unsigned Count = Factor * Cycles;
    2108             :   LLVM_DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx) << " +"
    2109             :                     << Cycles << "x" << Factor << "u\n");
    2110             : 
    2111             :   // Update Executed resources counts.
    2112     1629947 :   incExecutedResources(PIdx, Count);
    2113             :   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
    2114     3259894 :   Rem->RemainingCounts[PIdx] -= Count;
    2115             : 
    2116             :   // Check if this resource exceeds the current critical resource. If so, it
    2117             :   // becomes the critical resource.
    2118     3106031 :   if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
    2119      185402 :     ZoneCritResIdx = PIdx;
    2120             :     LLVM_DEBUG(dbgs() << "  *** Critical resource "
    2121             :                       << SchedModel->getResourceName(PIdx) << ": "
    2122             :                       << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
    2123             :                       << "c\n");
    2124             :   }
    2125             :   // For reserved resources, record the highest cycle using the resource.
    2126     1629947 :   unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
    2127             :   if (NextAvailable > CurrCycle) {
    2128             :     LLVM_DEBUG(dbgs() << "  Resource conflict: "
    2129             :                       << SchedModel->getProcResource(PIdx)->Name
    2130             :                       << " reserved until @" << NextAvailable << "\n");
    2131             :   }
    2132     1629947 :   return NextAvailable;
    2133             : }
    2134             : 
    2135             : /// Move the boundary of scheduled code by one SUnit.
    2136     2340520 : void SchedBoundary::bumpNode(SUnit *SU) {
    2137             :   // Update the reservation table.
    2138     2340520 :   if (HazardRec->isEnabled()) {
    2139       75518 :     if (!isTop() && SU->isCall) {
    2140             :       // Calls are scheduled with their preceding instructions. For bottom-up
    2141             :       // scheduling, clear the pipeline state before emitting.
    2142           0 :       HazardRec->Reset();
    2143             :     }
    2144       75518 :     HazardRec->EmitInstruction(SU);
    2145             :   }
    2146             :   // checkHazard should prevent scheduling multiple instructions per cycle that
    2147             :   // exceed the issue width.
    2148     2340520 :   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
    2149     2340520 :   unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
    2150             :   assert(
    2151             :       (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
    2152             :       "Cannot schedule this instruction's MicroOps in the current cycle.");
    2153             : 
    2154     2340520 :   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
    2155             :   LLVM_DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
    2156             : 
    2157     2340520 :   unsigned NextCycle = CurrCycle;
    2158     2340520 :   switch (SchedModel->getMicroOpBufferSize()) {
    2159             :   case 0:
    2160             :     assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
    2161             :     break;
    2162      236583 :   case 1:
    2163      236583 :     if (ReadyCycle > NextCycle) {
    2164             :       NextCycle = ReadyCycle;
    2165             :       LLVM_DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
    2166             :     }
    2167             :     break;
    2168     1804949 :   default:
    2169             :     // We don't currently model the OOO reorder buffer, so consider all
    2170             :     // scheduled MOps to be "retired". We do loosely model in-order resource
    2171             :     // latency. If this instruction uses an in-order resource, account for any
    2172             :     // likely stall cycles.
    2173     1804949 :     if (SU->isUnbuffered && ReadyCycle > NextCycle)
    2174             :       NextCycle = ReadyCycle;
    2175             :     break;
    2176             :   }
    2177     2340520 :   RetiredMOps += IncMOps;
    2178             : 
    2179             :   // Update resource counts and critical resource.
    2180     2340520 :   if (SchedModel->hasInstrSchedModel()) {
    2181      917651 :     unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
    2182             :     assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
    2183      917651 :     Rem->RemIssueCount -= DecRemIssue;
    2184      917651 :     if (ZoneCritResIdx) {
    2185             :       // Scale scheduled micro-ops for comparing with the critical resource.
    2186             :       unsigned ScaledMOps =
    2187      397392 :         RetiredMOps * SchedModel->getMicroOpFactor();
    2188             : 
    2189             :       // If scaled micro-ops are now more than the previous critical resource by
    2190             :       // a full cycle, then micro-ops issue becomes critical.
    2191      794784 :       if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
    2192      397392 :           >= (int)SchedModel->getLatencyFactor()) {
    2193        5141 :         ZoneCritResIdx = 0;
    2194             :         LLVM_DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
    2195             :                           << ScaledMOps / SchedModel->getLatencyFactor()
    2196             :                           << "c\n");
    2197             :       }
    2198             :     }
    2199     1629947 :     for (TargetSchedModel::ProcResIter
    2200      917651 :            PI = SchedModel->getWriteProcResBegin(SC),
    2201     2547598 :            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
    2202             :       unsigned RCycle =
    2203     1629947 :         countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
    2204     1629947 :       if (RCycle > NextCycle)
    2205             :         NextCycle = RCycle;
    2206             :     }
    2207      917651 :     if (SU->hasReservedResource) {
    2208             :       // For reserved resources, record the highest cycle using the resource.
    2209             :       // For top-down scheduling, this is the cycle in which we schedule this
    2210             :       // instruction plus the number of cycles the operations reserves the
    2211             :       // resource. For bottom-up is it simply the instruction's cycle.
    2212         809 :       for (TargetSchedModel::ProcResIter
    2213         661 :              PI = SchedModel->getWriteProcResBegin(SC),
    2214        1470 :              PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
    2215         809 :         unsigned PIdx = PI->ProcResourceIdx;
    2216        1618 :         if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
    2217         661 :           if (isTop()) {
    2218         361 :             ReservedCycles[PIdx] =
    2219        1083 :               std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
    2220             :           }
    2221             :           else
    2222         300 :             ReservedCycles[PIdx] = NextCycle;
    2223             :         }
    2224             :       }
    2225             :     }
    2226             :   }
    2227             :   // Update ExpectedLatency and DependentLatency.
    2228     2340520 :   unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
    2229     2340520 :   unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
    2230     2340520 :   if (SU->getDepth() > TopLatency) {
    2231      469622 :     TopLatency = SU->getDepth();
    2232             :     LLVM_DEBUG(dbgs() << "  " << Available.getName() << " TopLatency SU("
    2233             :                       << SU->NodeNum << ") " << TopLatency << "c\n");
    2234             :   }
    2235     2340520 :   if (SU->getHeight() > BotLatency) {
    2236      843920 :     BotLatency = SU->getHeight();
    2237             :     LLVM_DEBUG(dbgs() << "  " << Available.getName() << " BotLatency SU("
    2238             :                       << SU->NodeNum << ") " << BotLatency << "c\n");
    2239             :   }
    2240             :   // If we stall for any reason, bump the cycle.
    2241     2340520 :   if (NextCycle > CurrCycle)
    2242       22671 :     bumpCycle(NextCycle);
    2243             :   else
    2244             :     // After updating ZoneCritResIdx and ExpectedLatency, check if we're
    2245             :     // resource limited. If a stall occurred, bumpCycle does this.
    2246     2317849 :     IsResourceLimited =
    2247     2317849 :         checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
    2248             :                            getScheduledLatency());
    2249             : 
    2250             :   // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
    2251             :   // resets CurrMOps. Loop to handle instructions with more MOps than issue in
    2252             :   // one cycle.  Since we commonly reach the max MOps here, opportunistically
    2253             :   // bump the cycle to avoid uselessly checking everything in the readyQ.
    2254     2340520 :   CurrMOps += IncMOps;
    2255             : 
    2256             :   // Bump the cycle count for issue group constraints.
    2257             :   // This must be done after NextCycle has been adjust for all other stalls.
    2258             :   // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
    2259             :   // currCycle to X.
    2260     4681040 :   if ((isTop() &&  SchedModel->mustEndGroup(SU->getInstr())) ||
    2261     2181405 :       (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
    2262             :     LLVM_DEBUG(dbgs() << "  Bump cycle to " << (isTop() ? "end" : "begin")
    2263             :                       << " group\n");
    2264         331 :     bumpCycle(++NextCycle);
    2265             :   }
    2266             : 
    2267     3554050 :   while (CurrMOps >= SchedModel->getIssueWidth()) {
    2268             :     LLVM_DEBUG(dbgs() << "  *** Max MOps " << CurrMOps << " at cycle "
    2269             :                       << CurrCycle << '\n');
    2270      606765 :     bumpCycle(++NextCycle);
    2271             :   }
    2272             :   LLVM_DEBUG(dumpScheduledState());
    2273     2340520 : }
    2274             : 
    2275             : /// Release pending ready nodes in to the available queue. This makes them
    2276             : /// visible to heuristics.
    2277      650058 : void SchedBoundary::releasePending() {
    2278             :   // If the available queue is empty, it is safe to reset MinReadyCycle.
    2279      650058 :   if (Available.empty())
    2280      131427 :     MinReadyCycle = std::numeric_limits<unsigned>::max();
    2281             : 
    2282             :   // Check to see if any of the pending instructions are ready to issue.  If
    2283             :   // so, add them to the available queue.
    2284      650058 :   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
    2285     1154356 :   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
    2286      255363 :     SUnit *SU = *(Pending.begin()+i);
    2287      255363 :     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
    2288             : 
    2289      255363 :     if (ReadyCycle < MinReadyCycle)
    2290      139949 :       MinReadyCycle = ReadyCycle;
    2291             : 
    2292      255363 :     if (!IsBuffered && ReadyCycle > CurrCycle)
    2293       76092 :       continue;
    2294             : 
    2295      179271 :     if (checkHazard(SU))
    2296        5988 :       continue;
    2297             : 
    2298      173283 :     if (Available.size() >= ReadyListLimit)
    2299             :       break;
    2300             : 
    2301             :     Available.push(SU);
    2302             :     Pending.remove(Pending.begin()+i);
    2303      170069 :     --i; --e;
    2304             :   }
    2305      650058 :   CheckPending = false;
    2306      650058 : }
    2307             : 
    2308             : /// Remove SU from the ready set for this boundary.
    2309     3329165 : void SchedBoundary::removeReady(SUnit *SU) {
    2310     6658330 :   if (Available.isInQueue(SU))
    2311             :     Available.remove(Available.find(SU));
    2312             :   else {
    2313             :     assert(Pending.isInQueue(SU) && "bad ready count");
    2314             :     Pending.remove(Pending.find(SU));
    2315             :   }
    2316     3329165 : }
    2317             : 
    2318             : /// If this queue only has one ready candidate, return it. As a side effect,
    2319             : /// defer any nodes that now hit a hazard, and advance the cycle until at least
    2320             : /// one node is ready. If multiple instructions are ready, return NULL.
    2321     2635628 : SUnit *SchedBoundary::pickOnlyChoice() {
    2322     2635628 :   if (CheckPending)
    2323      535168 :     releasePending();
    2324             : 
    2325     2635628 :   if (CurrMOps > 0) {
    2326             :     // Defer any ready instrs that now have a hazard.
    2327     8305444 :     for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
    2328     7317202 :       if (checkHazard(*I)) {
    2329       38521 :         Pending.push(*I);
    2330             :         I = Available.remove(I);
    2331       38521 :         continue;
    2332             :       }
    2333             :       ++I;
    2334             :     }
    2335             :   }
    2336     2865408 :   for (unsigned i = 0; Available.empty(); ++i) {
    2337             : //  FIXME: Re-enable assert once PR20057 is resolved.
    2338             : //    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
    2339             : //           "permanent hazard");
    2340             :     (void)i;
    2341      114890 :     bumpCycle(CurrCycle + 1);
    2342      114890 :     releasePending();
    2343             :   }
    2344             : 
    2345             :   LLVM_DEBUG(Pending.dump());
    2346             :   LLVM_DEBUG(Available.dump());
    2347             : 
    2348     2635628 :   if (Available.size() == 1)
    2349      973345 :     return *Available.begin();
    2350             :   return nullptr;
    2351             : }
    2352             : 
    2353             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    2354             : // This is useful information to dump after bumpNode.
    2355             : // Note that the Queue contents are more useful before pickNodeFromQueue.
    2356             : LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
    2357             :   unsigned ResFactor;
    2358             :   unsigned ResCount;
    2359             :   if (ZoneCritResIdx) {
    2360             :     ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
    2361             :     ResCount = getResourceCount(ZoneCritResIdx);
    2362             :   } else {
    2363             :     ResFactor = SchedModel->getMicroOpFactor();
    2364             :     ResCount = RetiredMOps * ResFactor;
    2365             :   }
    2366             :   unsigned LFactor = SchedModel->getLatencyFactor();
    2367             :   dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
    2368             :          << "  Retired: " << RetiredMOps;
    2369             :   dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
    2370             :   dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
    2371             :          << ResCount / ResFactor << " "
    2372             :          << SchedModel->getResourceName(ZoneCritResIdx)
    2373             :          << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
    2374             :          << (IsResourceLimited ? "  - Resource" : "  - Latency")
    2375             :          << " limited.\n";
    2376             : }
    2377             : #endif
    2378             : 
    2379             : //===----------------------------------------------------------------------===//
    2380             : // GenericScheduler - Generic implementation of MachineSchedStrategy.
    2381             : //===----------------------------------------------------------------------===//
    2382             : 
    2383    14890158 : void GenericSchedulerBase::SchedCandidate::
    2384             : initResourceDelta(const ScheduleDAGMI *DAG,
    2385             :                   const TargetSchedModel *SchedModel) {
    2386    14890158 :   if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
    2387             :     return;
    2388             : 
    2389       16813 :   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
    2390       41161 :   for (TargetSchedModel::ProcResIter
    2391       16813 :          PI = SchedModel->getWriteProcResBegin(SC),
    2392       57974 :          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
    2393       41161 :     if (PI->ProcResourceIdx == Policy.ReduceResIdx)
    2394        4053 :       ResDelta.CritResources += PI->Cycles;
    2395       41161 :     if (PI->ProcResourceIdx == Policy.DemandResIdx)
    2396        6370 :       ResDelta.DemandedResources += PI->Cycles;
    2397             :   }
    2398             : }
    2399             : 
    2400             : /// Set the CandPolicy given a scheduling zone given the current resources and
    2401             : /// latencies inside and outside the zone.
    2402      597815 : void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
    2403             :                                      SchedBoundary &CurrZone,
    2404             :                                      SchedBoundary *OtherZone) {
    2405             :   // Apply preemptive heuristics based on the total latency and resources
    2406             :   // inside and outside this zone. Potential stalls should be considered before
    2407             :   // following this policy.
    2408             : 
    2409             :   // Compute remaining latency. We need this both to determine whether the
    2410             :   // overall schedule has become latency-limited and whether the instructions
    2411             :   // outside this zone are resource or latency limited.
    2412             :   //
    2413             :   // The "dependent" latency is updated incrementally during scheduling as the
    2414             :   // max height/depth of scheduled nodes minus the cycles since it was
    2415             :   // scheduled:
    2416             :   //   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
    2417             :   //
    2418             :   // The "independent" latency is the max ready queue depth:
    2419             :   //   ILat = max N.depth for N in Available|Pending
    2420             :   //
    2421             :   // RemainingLatency is the greater of independent and dependent latency.
    2422      597815 :   unsigned RemLatency = CurrZone.getDependentLatency();
    2423      597815 :   RemLatency = std::max(RemLatency,
    2424     1195630 :                         CurrZone.findMaxLatency(CurrZone.Available.elements()));
    2425      597815 :   RemLatency = std::max(RemLatency,
    2426     1195630 :                         CurrZone.findMaxLatency(CurrZone.Pending.elements()));
    2427             : 
    2428             :   // Compute the critical resource outside the zone.
    2429      597815 :   unsigned OtherCritIdx = 0;
    2430             :   unsigned OtherCount =
    2431      597815 :     OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
    2432             : 
    2433             :   bool OtherResLimited = false;
    2434      597815 :   if (SchedModel->hasInstrSchedModel())
    2435      394708 :     OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
    2436             :                                          OtherCount, RemLatency);
    2437             : 
    2438             :   // Schedule aggressively for latency in PostRA mode. We don't check for
    2439             :   // acyclic latency during PostRA, and highly out-of-order processors will
    2440             :   // skip PostRA scheduling.
    2441      394708 :   if (!OtherResLimited) {
    2442      407169 :     if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
    2443      170849 :       Policy.ReduceLatency |= true;
    2444             :       LLVM_DEBUG(dbgs() << "  " << CurrZone.Available.getName()
    2445             :                         << " RemainingLatency " << RemLatency << " + "
    2446             :                         << CurrZone.getCurrCycle() << "c > CritPath "
    2447             :                         << Rem.CriticalPath << "\n");
    2448             :     }
    2449             :   }
    2450             :   // If the same resource is limiting inside and outside the zone, do nothing.
    2451      597815 :   if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
    2452      585540 :     return;
    2453             : 
    2454             :   LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
    2455             :     dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "
    2456             :            << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
    2457             :   } if (OtherResLimited) dbgs()
    2458             :                  << "  RemainingLimit: "
    2459             :                  << SchedModel->getResourceName(OtherCritIdx) << "\n";
    2460             :              if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
    2461             :              << "  Latency limited both directions.\n");
    2462             : 
    2463       12275 :   if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
    2464        1512 :     Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
    2465             : 
    2466       12275 :   if (OtherResLimited)
    2467        1413 :     Policy.DemandResIdx = OtherCritIdx;
    2468             : }
    2469             : 
    2470             : #ifndef NDEBUG
    2471             : const char *GenericSchedulerBase::getReasonStr(
    2472             :   GenericSchedulerBase::CandReason Reason) {
    2473             :   switch (Reason) {
    2474             :   case NoCand:         return "NOCAND    ";
    2475             :   case Only1:          return "ONLY1     ";
    2476             :   case PhysRegCopy:    return "PREG-COPY ";
    2477             :   case RegExcess:      return "REG-EXCESS";
    2478             :   case RegCritical:    return "REG-CRIT  ";
    2479             :   case Stall:          return "STALL     ";
    2480             :   case Cluster:        return "CLUSTER   ";
    2481             :   case Weak:           return "WEAK      ";
    2482             :   case RegMax:         return "REG-MAX   ";
    2483             :   case ResourceReduce: return "RES-REDUCE";
    2484             :   case ResourceDemand: return "RES-DEMAND";
    2485             :   case TopDepthReduce: return "TOP-DEPTH ";
    2486             :   case TopPathReduce:  return "TOP-PATH  ";
    2487             :   case BotHeightReduce:return "BOT-HEIGHT";
    2488             :   case BotPathReduce:  return "BOT-PATH  ";
    2489             :   case NextDefUse:     return "DEF-USE   ";
    2490             :   case NodeOrder:      return "ORDER     ";
    2491             :   };
    2492             :   llvm_unreachable("Unknown reason!");
    2493             : }
    2494             : 
    2495             : void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
    2496             :   PressureChange P;
    2497             :   unsigned ResIdx = 0;
    2498             :   unsigned Latency = 0;
    2499             :   switch (Cand.Reason) {
    2500             :   default:
    2501             :     break;
    2502             :   case RegExcess:
    2503             :     P = Cand.RPDelta.Excess;
    2504             :     break;
    2505             :   case RegCritical:
    2506             :     P = Cand.RPDelta.CriticalMax;
    2507             :     break;
    2508             :   case RegMax:
    2509             :     P = Cand.RPDelta.CurrentMax;
    2510             :     break;
    2511             :   case ResourceReduce:
    2512             :     ResIdx = Cand.Policy.ReduceResIdx;
    2513             :     break;
    2514             :   case ResourceDemand:
    2515             :     ResIdx = Cand.Policy.DemandResIdx;
    2516             :     break;
    2517             :   case TopDepthReduce:
    2518             :     Latency = Cand.SU->getDepth();
    2519             :     break;
    2520             :   case TopPathReduce:
    2521             :     Latency = Cand.SU->getHeight();
    2522             :     break;
    2523             :   case BotHeightReduce:
    2524             :     Latency = Cand.SU->getHeight();
    2525             :     break;
    2526             :   case BotPathReduce:
    2527             :     Latency = Cand.SU->getDepth();
    2528             :     break;
    2529             :   }
    2530             :   dbgs() << "  Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
    2531             :   if (P.isValid())
    2532             :     dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
    2533             :            << ":" << P.getUnitInc() << " ";
    2534             :   else
    2535             :     dbgs() << "      ";
    2536             :   if (ResIdx)
    2537             :     dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
    2538             :   else
    2539             :     dbgs() << "         ";
    2540             :   if (Latency)
    2541             :     dbgs() << " " << Latency << " cycles ";
    2542             :   else
    2543             :     dbgs() << "          ";
    2544             :   dbgs() << '\n';
    2545             : }
    2546             : #endif
    2547             : 
    2548             : namespace llvm {
    2549             : /// Return true if this heuristic determines order.
    2550    64227800 : bool tryLess(int TryVal, int CandVal,
    2551             :              GenericSchedulerBase::SchedCandidate &TryCand,
    2552             :              GenericSchedulerBase::SchedCandidate &Cand,
    2553             :              GenericSchedulerBase::CandReason Reason) {
    2554    64227800 :   if (TryVal < CandVal) {
    2555       48681 :     TryCand.Reason = Reason;
    2556       48681 :     return true;
    2557             :   }
    2558    64179119 :   if (TryVal > CandVal) {
    2559      216385 :     if (Cand.Reason > Reason)
    2560       53610 :       Cand.Reason = Reason;
    2561             :     return true;
    2562             :   }
    2563             :   return false;
    2564             : }
    2565             : 
    2566    67191631 : bool tryGreater(int TryVal, int CandVal,
    2567             :                 GenericSchedulerBase::SchedCandidate &TryCand,
    2568             :                 GenericSchedulerBase::SchedCandidate &Cand,
    2569             :                 GenericSchedulerBase::CandReason Reason) {
    2570    67191631 :   if (TryVal > CandVal) {
    2571      317789 :     TryCand.Reason = Reason;
    2572      317789 :     return true;
    2573             :   }
    2574    66873842 :   if (TryVal < CandVal) {
    2575      910768 :     if (Cand.Reason > Reason)
    2576      318305 :       Cand.Reason = Reason;
    2577             :     return true;
    2578             :   }
    2579             :   return false;
    2580             : }
    2581             : 
    2582      900950 : bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
    2583             :                 GenericSchedulerBase::SchedCandidate &Cand,
    2584             :                 SchedBoundary &Zone) {
    2585      900950 :   if (Zone.isTop()) {
    2586      475044 :     if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
    2587       14556 :       if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
    2588             :                   TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
    2589             :         return true;
    2590             :     }
    2591      712272 :     if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
    2592             :                    TryCand, Cand, GenericSchedulerBase::TopPathReduce))
    2593             :       return true;
    2594             :   } else {
    2595     1326856 :     if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
    2596       20298 :       if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
    2597             :                   TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
    2598             :         return true;
    2599             :     }
    2600     1983273 :     if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
    2601             :                    TryCand, Cand, GenericSchedulerBase::BotPathReduce))
    2602             :       return true;
    2603             :   }
    2604             :   return false;
    2605             : }
    2606             : } // end namespace llvm
    2607             : 
    2608             : static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
    2609             :   LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
    2610             :                     << GenericSchedulerBase::getReasonStr(Reason) << '\n');
    2611             : }
    2612             : 
    2613             : static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
    2614             :   tracePick(Cand.Reason, Cand.AtTop);
    2615             : }
    2616             : 
    2617      365419 : void GenericScheduler::initialize(ScheduleDAGMI *dag) {
    2618             :   assert(dag->hasVRegLiveness() &&
    2619             :          "(PreRA)GenericScheduler needs vreg liveness");
    2620      365419 :   DAG = static_cast<ScheduleDAGMILive*>(dag);
    2621      365419 :   SchedModel = DAG->getSchedModel();
    2622      365419 :   TRI = DAG->TRI;
    2623             : 
    2624      365419 :   Rem.init(DAG, SchedModel);
    2625      365419 :   Top.init(DAG, SchedModel, &Rem);
    2626      365419 :   Bot.init(DAG, SchedModel, &Rem);
    2627             : 
    2628             :   // Initialize resource counts.
    2629             : 
    2630             :   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
    2631             :   // are disabled, then these HazardRecs will be disabled.
    2632      365419 :   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
    2633      365419 :   if (!Top.HazardRec) {
    2634      144238 :     Top.HazardRec =
    2635      288476 :         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
    2636      144238 :             Itin, DAG);
    2637             :   }
    2638      365419 :   if (!Bot.HazardRec) {
    2639      144238 :     Bot.HazardRec =
    2640      288476 :         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
    2641      144238 :             Itin, DAG);
    2642             :   }
    2643      365419 :   TopCand.SU = nullptr;
    2644      365419 :   BotCand.SU = nullptr;
    2645      365419 : }
    2646             : 
    2647             : /// Initialize the per-region scheduling policy.
    2648      872941 : void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
    2649             :                                   MachineBasicBlock::iterator End,
    2650             :                                   unsigned NumRegionInstrs) {
    2651             :   const MachineFunction &MF = *Begin->getMF();
    2652      872941 :   const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
    2653             : 
    2654             :   // Avoid setting up the register pressure tracker for small regions to save
    2655             :   // compile time. As a rough heuristic, only track pressure when the number of
    2656             :   // schedulable instructions exceeds half the integer register file.
    2657      872941 :   RegionPolicy.ShouldTrackPressure = true;
    2658     6110585 :   for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
    2659     2618823 :     MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
    2660     2618823 :     if (TLI->isTypeLegal(LegalIntVT)) {
    2661     7370864 :       unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
    2662     4913910 :         TLI->getRegClassFor(LegalIntVT));
    2663     2456954 :       RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
    2664             :     }
    2665             :   }
    2666             : 
    2667             :   // For generic targets, we default to bottom-up, because it's simpler and more
    2668             :   // compile-time optimizations have been implemented in that direction.
    2669      872940 :   RegionPolicy.OnlyBottomUp = true;
    2670             : 
    2671             :   // Allow the subtarget to override default policy.
    2672      872940 :   MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
    2673             : 
    2674             :   // After subtarget overrides, apply command line options.
    2675      872940 :   if (!EnableRegPressure)
    2676           0 :     RegionPolicy.ShouldTrackPressure = false;
    2677             : 
    2678             :   // Check -misched-topdown/bottomup can force or unforce scheduling direction.
    2679             :   // e.g. -misched-bottomup=false allows scheduling in both directions.
    2680             :   assert((!ForceTopDown || !ForceBottomUp) &&
    2681             :          "-misched-topdown incompatible with -misched-bottomup");
    2682      872940 :   if (ForceBottomUp.getNumOccurrences() > 0) {
    2683           0 :     RegionPolicy.OnlyBottomUp = ForceBottomUp;
    2684           0 :     if (RegionPolicy.OnlyBottomUp)
    2685           0 :       RegionPolicy.OnlyTopDown = false;
    2686             :   }
    2687      872940 :   if (ForceTopDown.getNumOccurrences() > 0) {
    2688           4 :     RegionPolicy.OnlyTopDown = ForceTopDown;
    2689           4 :     if (RegionPolicy.OnlyTopDown)
    2690           4 :       RegionPolicy.OnlyBottomUp = false;
    2691             :   }
    2692      872940 : }
    2693             : 
    2694           0 : void GenericScheduler::dumpPolicy() const {
    2695             :   // Cannot completely remove virtual function even in release mode.
    2696             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    2697             :   dbgs() << "GenericScheduler RegionPolicy: "
    2698             :          << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
    2699             :          << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
    2700             :          << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
    2701             :          << "\n";
    2702             : #endif
    2703           0 : }
    2704             : 
    2705             : /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
    2706             : /// critical path by more cycles than it takes to drain the instruction buffer.
    2707             : /// We estimate an upper bounds on in-flight instructions as:
    2708             : ///
    2709             : /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
    2710             : /// InFlightIterations = AcyclicPath / CyclesPerIteration
    2711             : /// InFlightResources = InFlightIterations * LoopResources
    2712             : ///
    2713             : /// TODO: Check execution resources in addition to IssueCount.
    2714      333156 : void GenericScheduler::checkAcyclicLatency() {
    2715      333156 :   if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
    2716             :     return;
    2717             : 
    2718             :   // Scaled number of cycles per loop iteration.
    2719             :   unsigned IterCount =
    2720         728 :     std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
    2721         728 :              Rem.RemIssueCount);
    2722             :   // Scaled acyclic critical path.
    2723         364 :   unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
    2724             :   // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
    2725         364 :   unsigned InFlightCount =
    2726         364 :     (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
    2727             :   unsigned BufferLimit =
    2728         364 :     SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
    2729             : 
    2730         364 :   Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
    2731             : 
    2732             :   LLVM_DEBUG(
    2733             :       dbgs() << "IssueCycles="
    2734             :              << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
    2735             :              << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
    2736             :              << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
    2737             :              << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
    2738             :              << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
    2739             :       if (Rem.IsAcyclicLatencyLimited) dbgs() << "  ACYCLIC LATENCY LIMIT\n");
    2740             : }
    2741             : 
    2742      365419 : void GenericScheduler::registerRoots() {
    2743      730838 :   Rem.CriticalPath = DAG->ExitSU.getDepth();
    2744             : 
    2745             :   // Some roots may not feed into ExitSU. Check all of them in case.
    2746     1095984 :   for (const SUnit *SU : Bot.Available) {
    2747      730565 :     if (SU->getDepth() > Rem.CriticalPath)
    2748       53616 :       Rem.CriticalPath = SU->getDepth();
    2749             :   }
    2750             :   LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
    2751      365419 :   if (DumpCriticalPathLength) {
    2752           0 :     errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
    2753             :   }
    2754             : 
    2755      365419 :   if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
    2756      333156 :     Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
    2757      333156 :     checkAcyclicLatency();
    2758             :   }
    2759      365419 : }
    2760             : 
    2761             : namespace llvm {
    2762    32375024 : bool tryPressure(const PressureChange &TryP,
    2763             :                  const PressureChange &CandP,
    2764             :                  GenericSchedulerBase::SchedCandidate &TryCand,
    2765             :                  GenericSchedulerBase::SchedCandidate &Cand,
    2766             :                  GenericSchedulerBase::CandReason Reason,
    2767             :                  const TargetRegisterInfo *TRI,
    2768             :                  const MachineFunction &MF) {
    2769             :   // If one candidate decreases and the other increases, go with it.
    2770             :   // Invalid candidates have UnitInc==0.
    2771    32375024 :   if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
    2772             :                  Reason)) {
    2773             :     return true;
    2774             :   }
    2775             :   // Do not compare the magnitude of pressure changes between top and bottom
    2776             :   // boundary.
    2777    32358475 :   if (Cand.AtTop != TryCand.AtTop)
    2778             :     return false;
    2779             : 
    2780             :   // If both candidates affect the same set in the same boundary, go with the
    2781             :   // smallest increase.
    2782    32029595 :   unsigned TryPSet = TryP.getPSetOrMax();
    2783    32029595 :   unsigned CandPSet = CandP.getPSetOrMax();
    2784    32029595 :   if (TryPSet == CandPSet) {
    2785    63453480 :     return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
    2786    31726740 :                    Reason);
    2787             :   }
    2788             : 
    2789      302855 :   int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
    2790             :                                  std::numeric_limits<int>::max();
    2791             : 
    2792      302855 :   int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
    2793             :                                    std::numeric_limits<int>::max();
    2794             : 
    2795             :   // If the candidates are decreasing pressure, reverse priority.
    2796      302855 :   if (TryP.getUnitInc() < 0)
    2797             :     std::swap(TryRank, CandRank);
    2798      302855 :   return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
    2799             : }
    2800             : 
    2801    21870274 : unsigned getWeakLeft(const SUnit *SU, bool isTop) {
    2802    21870274 :   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
    2803             : }
    2804             : 
    2805             : /// Minimize physical register live ranges. Regalloc wants them adjacent to
    2806             : /// their physreg def/use.
    2807             : ///
    2808             : /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
    2809             : /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
    2810             : /// with the operation that produces or consumes the physreg. We'll do this when
    2811             : /// regalloc has support for parallel copies.
    2812    23712338 : int biasPhysRegCopy(const SUnit *SU, bool isTop) {
    2813    23712338 :   const MachineInstr *MI = SU->getInstr();
    2814    23712338 :   if (!MI->isCopy())
    2815             :     return 0;
    2816             : 
    2817     2812320 :   unsigned ScheduledOper = isTop ? 1 : 0;
    2818     2812320 :   unsigned UnscheduledOper = isTop ? 0 : 1;
    2819             :   // If we have already scheduled the physreg produce/consumer, immediately
    2820             :   // schedule the copy.
    2821     5624640 :   if (TargetRegisterInfo::isPhysicalRegister(
    2822     2812320 :         MI->getOperand(ScheduledOper).getReg()))
    2823             :     return 1;
    2824             :   // If the physreg is at the boundary, defer it. Otherwise schedule it
    2825             :   // immediately to free the dependent. We can hoist the copy later.
    2826     1865944 :   bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
    2827     3731888 :   if (TargetRegisterInfo::isPhysicalRegister(
    2828             :         MI->getOperand(UnscheduledOper).getReg()))
    2829      752894 :     return AtBoundary ? -1 : 1;
    2830             :   return 0;
    2831             : }
    2832             : } // end namespace llvm
    2833             : 
    2834    10048750 : void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
    2835             :                                      bool AtTop,
    2836             :                                      const RegPressureTracker &RPTracker,
    2837             :                                      RegPressureTracker &TempTracker) {
    2838    10048750 :   Cand.SU = SU;
    2839    10048750 :   Cand.AtTop = AtTop;
    2840    10048750 :   if (DAG->isTrackingPressure()) {
    2841     9095816 :     if (AtTop) {
    2842      124052 :       TempTracker.getMaxDownwardPressureDelta(
    2843       62026 :         Cand.SU->getInstr(),
    2844             :         Cand.RPDelta,
    2845             :         DAG->getRegionCriticalPSets(),
    2846             :         DAG->getRegPressure().MaxSetPressure);
    2847             :     } else {
    2848     9033790 :       if (VerifyScheduling) {
    2849          38 :         TempTracker.getMaxUpwardPressureDelta(
    2850          19 :           Cand.SU->getInstr(),
    2851          19 :           &DAG->getPressureDiff(Cand.SU),
    2852             :           Cand.RPDelta,
    2853             :           DAG->getRegionCriticalPSets(),
    2854             :           DAG->getRegPressure().MaxSetPressure);
    2855             :       } else {
    2856    18067542 :         RPTracker.getUpwardPressureDelta(
    2857     9033771 :           Cand.SU->getInstr(),
    2858             :           DAG->getPressureDiff(Cand.SU),
    2859             :           Cand.RPDelta,
    2860             :           DAG->getRegionCriticalPSets(),
    2861             :           DAG->getRegPressure().MaxSetPressure);
    2862             :       }
    2863             :     }
    2864             :   }
    2865             :   LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
    2866             :              << "  Try  SU(" << Cand.SU->NodeNum << ") "
    2867             :              << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
    2868             :              << Cand.RPDelta.Excess.getUnitInc() << "\n");
    2869    10048750 : }
    2870             : 
    2871             : /// Apply a set of heuristics to a new candidate. Heuristics are currently
    2872             : /// hierarchical. This may be more efficient than a graduated cost model because
    2873             : /// we don't need to evaluate all aspects of the model for each node in the
    2874             : /// queue. But it's really done to make the heuristics easier to debug and
    2875             : /// statistically analyze.
    2876             : ///
    2877             : /// \param Cand provides the policy and current best candidate.
    2878             : /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
    2879             : /// \param Zone describes the scheduled zone that we are extending, or nullptr
    2880             : //              if Cand is from a different zone than TryCand.
    2881    13254144 : void GenericScheduler::tryCandidate(SchedCandidate &Cand,
    2882             :                                     SchedCandidate &TryCand,
    2883             :                                     SchedBoundary *Zone) const {
    2884             :   // Initialize the candidate if needed.
    2885    13254144 :   if (!Cand.isValid()) {
    2886     1397975 :     TryCand.Reason = NodeOrder;
    2887     1397975 :     return;
    2888             :   }
    2889             : 
    2890    11856169 :   if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
    2891    11856169 :                  biasPhysRegCopy(Cand.SU, Cand.AtTop),
    2892             :                  TryCand, Cand, PhysRegCopy))
    2893             :     return;
    2894             : 
    2895             :   // Avoid exceeding the target's limit.
    2896    22317065 :   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
    2897             :                                                Cand.RPDelta.Excess,
    2898    10970222 :                                                TryCand, Cand, RegExcess, TRI,
    2899    10970222 :                                                DAG->MF))
    2900             :     return;
    2901             : 
    2902             :   // Avoid increasing the max critical pressure in the scheduled region.
    2903    22225901 :   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
    2904             :                                                Cand.RPDelta.CriticalMax,
    2905    10924640 :                                                TryCand, Cand, RegCritical, TRI,
    2906    10924640 :                                                DAG->MF))
    2907             :     return;
    2908             : 
    2909             :   // We only compare a subset of features when comparing nodes between
    2910             :   // Top and Bottom boundary. Some properties are simply incomparable, in many
    2911             :   // other instances we should only override the other boundary if something
    2912             :   // is a clear good pick on one boundary. Skip heuristics that are more
    2913             :   // "tie-breaking" in nature.
    2914             :   bool SameBoundary = Zone != nullptr;
    2915    11004556 :   if (SameBoundary) {
    2916             :     // For loops that are acyclic path limited, aggressively schedule for
    2917             :     // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
    2918             :     // heuristics to take precedence.
    2919    10889186 :     if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
    2920         787 :         tryLatency(TryCand, Cand, *Zone))
    2921             :       return;
    2922             : 
    2923             :     // Prioritize instructions that read unbuffered resources by stall cycles.
    2924    10887831 :     if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
    2925    10887831 :                 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
    2926             :       return;
    2927             :   }
    2928             : 
    2929             :   // Keep clustered nodes together to encourage downstream peephole
    2930             :   // optimizations which may reduce resource requirements.
    2931             :   //
    2932             :   // This is a best effort to set things up for a post-RA pass. Optimizations
    2933             :   // like generating loads of multiple registers should ideally be done within
    2934             :   // the scheduler pass by combining the loads during DAG postprocessing.
    2935             :   const SUnit *CandNextClusterSU =
    2936    10975200 :     Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
    2937             :   const SUnit *TryCandNextClusterSU =
    2938    10975200 :     TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
    2939    10975200 :   if (tryGreater(TryCand.SU == TryCandNextClusterSU,
    2940    10975200 :                  Cand.SU == CandNextClusterSU,
    2941             :                  TryCand, Cand, Cluster))
    2942             :     return;
    2943             : 
    2944    10933056 :   if (SameBoundary) {
    2945             :     // Weak edges are for clustering and other constraints.
    2946    10817260 :     if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
    2947    10817260 :                 getWeakLeft(Cand.SU, Cand.AtTop),
    2948             :                 TryCand, Cand, Weak))
    2949             :       return;
    2950             :   }
    2951             : 
    2952             :   // Avoid increasing the max pressure of the entire region.
    2953    21335177 :   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
    2954             :                                                Cand.RPDelta.CurrentMax,
    2955    10480162 :                                                TryCand, Cand, RegMax, TRI,
    2956    10480162 :                                                DAG->MF))
    2957             :     return;
    2958             : 
    2959    10722579 :   if (SameBoundary) {
    2960             :     // Avoid critical resource consumption and balance the schedule.
    2961    10606783 :     TryCand.initResourceDelta(DAG, SchedModel);
    2962    10606783 :     if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
    2963             :                 TryCand, Cand, ResourceReduce))
    2964             :       return;
    2965    10606707 :     if (tryGreater(TryCand.ResDelta.DemandedResources,
    2966    10606707 :                    Cand.ResDelta.DemandedResources,
    2967             :                    TryCand, Cand, ResourceDemand))
    2968             :       return;
    2969             : 
    2970             :     // Avoid serializing long latency dependence chains.
    2971             :     // For acyclic path limited loops, latency was already checked above.
    2972    22022962 :     if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
    2973    12230452 :         !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
    2974             :       return;
    2975             : 
    2976             :     // Fall through to original instruction order.
    2977      253895 :     if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
    2978    20452284 :         || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
    2979     2446415 :       TryCand.Reason = NodeOrder;
    2980             :     }
    2981             :   }
    2982             : }
    2983             : 
    2984             : /// Pick the best candidate from the queue.
    2985             : ///
    2986             : /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
    2987             : /// DAG building. To adjust for the current scheduling location we need to
    2988             : /// maintain the number of vreg uses remaining to be top-scheduled.
    2989     1115040 : void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
    2990             :                                          const CandPolicy &ZonePolicy,
    2991             :                                          const RegPressureTracker &RPTracker,
    2992             :                                          SchedCandidate &Cand) {
    2993             :   // getMaxPressureDelta temporarily modifies the tracker.
    2994             :   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
    2995             : 
    2996             :   ReadyQueue &Q = Zone.Available;
    2997    11163790 :   for (SUnit *SU : Q) {
    2998             : 
    2999             :     SchedCandidate TryCand(ZonePolicy);
    3000    10048750 :     initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
    3001             :     // Pass SchedBoundary only when comparing nodes from the same boundary.
    3002    10048750 :     SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
    3003    10048750 :     tryCandidate(Cand, TryCand, ZoneArg);
    3004    10048750 :     if (TryCand.Reason != NoCand) {
    3005             :       // Initialize resource delta if needed in case future heuristics query it.
    3006             :       if (TryCand.ResDelta == SchedResourceDelta())
    3007     3103759 :         TryCand.initResourceDelta(DAG, SchedModel);
    3008             :       Cand.setBest(TryCand);
    3009             :       LLVM_DEBUG(traceCandidate(Cand));
    3010             :     }
    3011             :   }
    3012     1115040 : }
    3013             : 
    3014             : /// Pick the best candidate node from either the top or bottom queue.
    3015      122996 : SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
    3016             :   // Schedule as far as possible in the direction of no choice. This is most
    3017             :   // efficient, but also provides the best heuristics for CriticalPSets.
    3018      122996 :   if (SUnit *SU = Bot.pickOnlyChoice()) {
    3019       77791 :     IsTopNode = false;
    3020             :     tracePick(Only1, false);
    3021       77791 :     return SU;
    3022             :   }
    3023       45205 :   if (SUnit *SU = Top.pickOnlyChoice()) {
    3024        2814 :     IsTopNode = true;
    3025             :     tracePick(Only1, true);
    3026        2814 :     return SU;
    3027             :   }
    3028             :   // Set the bottom-up policy based on the state of the current bottom zone and
    3029             :   // the instructions outside the zone, including the top zone.
    3030       42391 :   CandPolicy BotPolicy;
    3031       42391 :   setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
    3032             :   // Set the top-down policy based on the state of the current top zone and
    3033             :   // the instructions outside the zone, including the bottom zone.
    3034       42391 :   CandPolicy TopPolicy;
    3035       42391 :   setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
    3036             : 
    3037             :   // See if BotCand is still valid (because we previously scheduled from Top).
    3038             :   LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
    3039       42391 :   if (!BotCand.isValid() || BotCand.SU->isScheduled ||
    3040             :       BotCand.Policy != BotPolicy) {
    3041       31768 :     BotCand.reset(CandPolicy());
    3042       63536 :     pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
    3043             :     assert(BotCand.Reason != NoCand && "failed to find the first candidate");
    3044             :   } else {
    3045             :     LLVM_DEBUG(traceCandidate(BotCand));
    3046             : #ifndef NDEBUG
    3047             :     if (VerifyScheduling) {
    3048             :       SchedCandidate TCand;
    3049             :       TCand.reset(CandPolicy());
    3050             :       pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
    3051             :       assert(TCand.SU == BotCand.SU &&
    3052             :              "Last pick result should correspond to re-picking right now");
    3053             :     }
    3054             : #endif
    3055             :   }
    3056             : 
    3057             :   // Check if the top Q has a better candidate.
    3058             :   LLVM_DEBUG(dbgs() << "Picking from Top:\n");
    3059       42391 :   if (!TopCand.isValid() || TopCand.SU->isScheduled ||
    3060             :       TopCand.Policy != TopPolicy) {
    3061       27924 :     TopCand.reset(CandPolicy());
    3062       55848 :     pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
    3063             :     assert(TopCand.Reason != NoCand && "failed to find the first candidate");
    3064             :   } else {
    3065             :     LLVM_DEBUG(traceCandidate(TopCand));
    3066             : #ifndef NDEBUG
    3067             :     if (VerifyScheduling) {
    3068             :       SchedCandidate TCand;
    3069             :       TCand.reset(CandPolicy());
    3070             :       pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
    3071             :       assert(TCand.SU == TopCand.SU &&
    3072             :            "Last pick result should correspond to re-picking right now");
    3073             :     }
    3074             : #endif
    3075             :   }
    3076             : 
    3077             :   // Pick best from BotCand and TopCand.
    3078             :   assert(BotCand.isValid());
    3079             :   assert(TopCand.isValid());
    3080       42391 :   SchedCandidate Cand = BotCand;
    3081       42391 :   TopCand.Reason = NoCand;
    3082       42391 :   tryCandidate(Cand, TopCand, nullptr);
    3083       42391 :   if (TopCand.Reason != NoCand) {
    3084             :     Cand.setBest(TopCand);
    3085             :     LLVM_DEBUG(traceCandidate(Cand));
    3086             :   }
    3087             : 
    3088       42391 :   IsTopNode = Cand.AtTop;
    3089             :   tracePick(Cand);
    3090       42391 :   return Cand.SU;
    3091             : }
    3092             : 
    3093             : /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
    3094     2299822 : SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
    3095     2299822 :   if (DAG->top() == DAG->bottom()) {
    3096             :     assert(Top.Available.empty() && Top.Pending.empty() &&
    3097             :            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
    3098             :     return nullptr;
    3099             :   }
    3100             :   SUnit *SU;
    3101             :   do {
    3102     1953718 :     if (RegionPolicy.OnlyTopDown) {
    3103          54 :       SU = Top.pickOnlyChoice();
    3104          54 :       if (!SU) {
    3105          45 :         CandPolicy NoPolicy;
    3106          45 :         TopCand.reset(NoPolicy);
    3107          90 :         pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
    3108             :         assert(TopCand.Reason != NoCand && "failed to find a candidate");
    3109             :         tracePick(TopCand);
    3110          45 :         SU = TopCand.SU;
    3111             :       }
    3112          54 :       IsTopNode = true;
    3113     1953664 :     } else if (RegionPolicy.OnlyBottomUp) {
    3114     1830668 :       SU = Bot.pickOnlyChoice();
    3115     1830668 :       if (!SU) {
    3116     1055303 :         CandPolicy NoPolicy;
    3117     1055303 :         BotCand.reset(NoPolicy);
    3118     2110606 :         pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
    3119             :         assert(BotCand.Reason != NoCand && "failed to find a candidate");
    3120             :         tracePick(BotCand);
    3121     1055303 :         SU = BotCand.SU;
    3122             :       }
    3123     1830668 :       IsTopNode = false;
    3124             :     } else {
    3125      122996 :       SU = pickNodeBidirectional(IsTopNode);
    3126             :     }
    3127     1953718 :   } while (SU->isScheduled);
    3128             : 
    3129     1953718 :   if (SU->isTopReady())
    3130      911675 :     Top.removeReady(SU);
    3131     1953718 :   if (SU->isBottomReady())
    3132     1943096 :     Bot.removeReady(SU);
    3133             : 
    3134             :   LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
    3135             :                     << *SU->getInstr());
    3136             :   return SU;
    3137             : }
    3138             : 
    3139      166476 : void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
    3140      166476 :   MachineBasicBlock::iterator InsertPos = SU->getInstr();
    3141      166476 :   if (!isTop)
    3142             :     ++InsertPos;
    3143      166476 :   SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
    3144             : 
    3145             :   // Find already scheduled copies with a single physreg dependence and move
    3146             :   // them just above the scheduled instruction.
    3147      956560 :   for (SDep &Dep : Deps) {
    3148      524140 :     if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg()))
    3149      273528 :       continue;
    3150             :     SUnit *DepSU = Dep.getSUnit();
    3151      243028 :     if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
    3152      111951 :       continue;
    3153        9563 :     MachineInstr *Copy = DepSU->getInstr();
    3154        9563 :     if (!Copy->isCopy())
    3155        5011 :       continue;
    3156             :     LLVM_DEBUG(dbgs() << "  Rescheduling physreg copy ";
    3157             :                Dep.getSUnit()->dump(DAG));
    3158        4552 :     DAG->moveInstruction(Copy, InsertPos);
    3159             :   }
    3160      166476 : }
    3161             : 
    3162             : /// Update the scheduler's state after scheduling a node. This is the same node
    3163             : /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
    3164             : /// update it's state based on the current cycle before MachineSchedStrategy
    3165             : /// does.
    3166             : ///
    3167             : /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
    3168             : /// them here. See comments in biasPhysRegCopy.
    3169     2261762 : void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
    3170     2261762 :   if (IsTopNode) {
    3171      160714 :     SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
    3172       80357 :     Top.bumpNode(SU);
    3173       80357 :     if (SU->hasPhysRegUses)
    3174       52326 :       reschedulePhysRegCopies(SU, true);
    3175             :   } else {
    3176     4362810 :     SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
    3177     2181405 :     Bot.bumpNode(SU);
    3178     2181405 :     if (SU->hasPhysRegDefs)
    3179      114150 :       reschedulePhysRegCopies(SU, false);
    3180             :   }
    3181     2261762 : }
    3182             : 
    3183             : /// Create the standard converging machine scheduler. This will be used as the
    3184             : /// default scheduler if the target does not set a default.
    3185      131122 : ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
    3186             :   ScheduleDAGMILive *DAG =
    3187      262244 :       new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
    3188             :   // Register DAG post-processors.
    3189             :   //
    3190             :   // FIXME: extend the mutation API to allow earlier mutations to instantiate
    3191             :   // data and pass it to later mutations. Have a single mutation that gathers
    3192             :   // the interesting nodes in one pass.
    3193      262244 :   DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
    3194      131122 :   return DAG;
    3195             : }
    3196             : 
    3197           0 : static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) {
    3198           0 :   return createGenericSchedLive(C);
    3199             : }
    3200             : 
    3201             : static MachineSchedRegistry
    3202       99743 : GenericSchedRegistry("converge", "Standard converging scheduler.",
    3203             :                      createConveringSched);
    3204             : 
    3205             : //===----------------------------------------------------------------------===//
    3206             : // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
    3207             : //===----------------------------------------------------------------------===//
    3208             : 
    3209       16853 : void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
    3210       16853 :   DAG = Dag;
    3211       16853 :   SchedModel = DAG->getSchedModel();
    3212       16853 :   TRI = DAG->TRI;
    3213             : 
    3214       16853 :   Rem.init(DAG, SchedModel);
    3215       16853 :   Top.init(DAG, SchedModel, &Rem);
    3216             :   BotRoots.clear();
    3217             : 
    3218             :   // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
    3219             :   // or are disabled, then these HazardRecs will be disabled.
    3220       16853 :   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
    3221       16853 :   if (!Top.HazardRec) {
    3222       15457 :     Top.HazardRec =
    3223       30914 :         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
    3224       15457 :             Itin, DAG);
    3225             :   }
    3226       16853 : }
    3227             : 
    3228       16853 : void PostGenericScheduler::registerRoots() {
    3229       33706 :   Rem.CriticalPath = DAG->ExitSU.getDepth();
    3230             : 
    3231             :   // Some roots may not feed into ExitSU. Check all of them in case.
    3232       66653 :   for (const SUnit *SU : BotRoots) {
    3233       24900 :     if (SU->getDepth() > Rem.CriticalPath)
    3234        3898 :       Rem.CriticalPath = SU->getDepth();
    3235             :   }
    3236             :   LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
    3237       16853 :   if (DumpCriticalPathLength) {
    3238           0 :     errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
    3239             :   }
    3240       16853 : }
    3241             : 
    3242             : /// Apply a set of heuristics to a new candidate for PostRA scheduling.
    3243             : ///
    3244             : /// \param Cand provides the policy and current best candidate.
    3245             : /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
    3246      114715 : void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
    3247             :                                         SchedCandidate &TryCand) {
    3248             :   // Initialize the candidate if needed.
    3249      114715 :   if (!Cand.isValid()) {
    3250       25839 :     TryCand.Reason = NodeOrder;
    3251       25839 :     return;
    3252             :   }
    3253             : 
    3254             :   // Prioritize instructions that read unbuffered resources by stall cycles.
    3255       88876 :   if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
    3256       88876 :               Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
    3257             :     return;
    3258             : 
    3259             :   // Keep clustered nodes together.
    3260       88851 :   if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
    3261       88851 :                  Cand.SU == DAG->getNextClusterSucc(),
    3262             :                  TryCand, Cand, Cluster))
    3263             :     return;
    3264             : 
    3265             :   // Avoid critical resource consumption and balance the schedule.
    3266       88692 :   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
    3267             :               TryCand, Cand, ResourceReduce))
    3268             :     return;
    3269       88310 :   if (tryGreater(TryCand.ResDelta.DemandedResources,
    3270       88310 :                  Cand.ResDelta.DemandedResources,
    3271             :                  TryCand, Cand, ResourceDemand))
    3272             :     return;
    3273             : 
    3274             :   // Avoid serializing long latency dependence chains.
    3275       88310 :   if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
    3276             :     return;
    3277             :   }
    3278             : 
    3279             :   // Fall through to original instruction order.
    3280       58439 :   if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
    3281       23188 :     TryCand.Reason = NodeOrder;
    3282             : }
    3283             : 
    3284       25839 : void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
    3285             :   ReadyQueue &Q = Top.Available;
    3286      140554 :   for (SUnit *SU : Q) {
    3287             :     SchedCandidate TryCand(Cand.Policy);
    3288      114715 :     TryCand.SU = SU;
    3289      114715 :     TryCand.AtTop = true;
    3290      114715 :     TryCand.initResourceDelta(DAG, SchedModel);
    3291      114715 :     tryCandidate(Cand, TryCand);
    3292      114715 :     if (TryCand.Reason != NoCand) {
    3293             :       Cand.setBest(TryCand);
    3294             :       LLVM_DEBUG(traceCandidate(Cand));
    3295             :     }
    3296             :   }
    3297       25839 : }
    3298             : 
    3299             : /// Pick the next node to schedule.
    3300       95611 : SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
    3301       95611 :   if (DAG->top() == DAG->bottom()) {
    3302             :     assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
    3303             :     return nullptr;
    3304             :   }
    3305             :   SUnit *SU;
    3306             :   do {
    3307       78758 :     SU = Top.pickOnlyChoice();
    3308       78758 :     if (SU) {
    3309             :       tracePick(Only1, true);
    3310             :     } else {
    3311             :       CandPolicy NoPolicy;
    3312             :       SchedCandidate TopCand(NoPolicy);
    3313             :       // Set the top-down policy based on the state of the current top zone and
    3314             :       // the instructions outside the zone, including the bottom zone.
    3315       25839 :       setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
    3316       25839 :       pickNodeFromQueue(TopCand);
    3317             :       assert(TopCand.Reason != NoCand && "failed to find a candidate");
    3318             :       tracePick(TopCand);
    3319       25839 :       SU = TopCand.SU;
    3320             :     }
    3321       78758 :   } while (SU->isScheduled);
    3322             : 
    3323       78758 :   IsTopNode = true;
    3324       78758 :   Top.removeReady(SU);
    3325             : 
    3326             :   LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
    3327             :                     << *SU->getInstr());
    3328       78758 :   return SU;
    3329             : }
    3330             : 
    3331             : /// Called after ScheduleDAGMI has scheduled an instruction and updated
    3332             : /// scheduled/remaining flags in the DAG nodes.
    3333       78758 : void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
    3334      157516 :   SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
    3335       78758 :   Top.bumpNode(SU);
    3336       78758 : }
    3337             : 
    3338       18391 : ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
    3339             :   return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
    3340       36782 :                            /*RemoveKillFlags=*/true);
    3341             : }
    3342             : 
    3343             : //===----------------------------------------------------------------------===//
    3344             : // ILP Scheduler. Currently for experimental analysis of heuristics.
    3345             : //===----------------------------------------------------------------------===//
    3346             : 
    3347             : namespace {
    3348             : 
    3349             : /// Order nodes by the ILP metric.
    3350             : struct ILPOrder {
    3351             :   const SchedDFSResult *DFSResult = nullptr;
    3352             :   const BitVector *ScheduledTrees = nullptr;
    3353             :   bool MaximizeILP;
    3354             : 
    3355           8 :   ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
    3356             : 
    3357             :   /// Apply a less-than relation on node priority.
    3358             :   ///
    3359             :   /// (Return true if A comes after B in the Q.)
    3360         300 :   bool operator()(const SUnit *A, const SUnit *B) const {
    3361         300 :     unsigned SchedTreeA = DFSResult->getSubtreeID(A);
    3362             :     unsigned SchedTreeB = DFSResult->getSubtreeID(B);
    3363         300 :     if (SchedTreeA != SchedTreeB) {
    3364             :       // Unscheduled trees have lower priority.
    3365         318 :       if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
    3366             :         return ScheduledTrees->test(SchedTreeB);
    3367             : 
    3368             :       // Trees with shallower connections have have lower priority.
    3369          58 :       if (DFSResult->getSubtreeLevel(SchedTreeA)
    3370             :           != DFSResult->getSubtreeLevel(SchedTreeB)) {
    3371             :         return DFSResult->getSubtreeLevel(SchedTreeA)
    3372           0 :           < DFSResult->getSubtreeLevel(SchedTreeB);
    3373             :       }
    3374             :     }
    3375         199 :     if (MaximizeILP)
    3376         232 :       return DFSResult->getILP(A) < DFSResult->getILP(B);
    3377             :     else
    3378         166 :       return DFSResult->getILP(A) > DFSResult->getILP(B);
    3379             :   }
    3380             : };
    3381             : 
    3382             : /// Schedule based on the ILP metric.
    3383          16 : class ILPScheduler : public MachineSchedStrategy {
    3384             :   ScheduleDAGMILive *DAG = nullptr;
    3385             :   ILPOrder Cmp;
    3386             : 
    3387             :   std::vector<SUnit*> ReadyQ;
    3388             : 
    3389             : public:
    3390           8 :   ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
    3391             : 
    3392          10 :   void initialize(ScheduleDAGMI *dag) override {
    3393             :     assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
    3394          10 :     DAG = static_cast<ScheduleDAGMILive*>(dag);
    3395          10 :     DAG->computeDFSResult();
    3396          10 :     Cmp.DFSResult = DAG->getDFSResult();
    3397          10 :     Cmp.ScheduledTrees = &DAG->getScheduledTrees();
    3398             :     ReadyQ.clear();
    3399          10 :   }
    3400             : 
    3401          10 :   void registerRoots() override {
    3402             :     // Restore the heap in ReadyQ with the updated DFS results.
    3403          10 :     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
    3404          10 :   }
    3405             : 
    3406             :   /// Implement MachineSchedStrategy interface.
    3407             :   /// -----------------------------------------
    3408             : 
    3409             :   /// Callback to select the highest priority node from the ready Q.
    3410         186 :   SUnit *pickNode(bool &IsTopNode) override {
    3411         186 :     if (ReadyQ.empty()) return nullptr;
    3412         176 :     std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
    3413         176 :     SUnit *SU = ReadyQ.back();
    3414             :     ReadyQ.pop_back();
    3415         176 :     IsTopNode = false;
    3416             :     LLVM_DEBUG(dbgs() << "Pick node "
    3417             :                       << "SU(" << SU->NodeNum << ") "
    3418             :                       << " ILP: " << DAG->getDFSResult()->getILP(SU)
    3419             :                       << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
    3420             :                       << " @"
    3421             :                       << DAG->getDFSResult()->getSubtreeLevel(
    3422             :                              DAG->getDFSResult()->getSubtreeID(SU))
    3423             :                       << '\n'
    3424             :                       << "Scheduling " << *SU->getInstr());
    3425         176 :     return SU;
    3426             :   }
    3427             : 
    3428             :   /// Scheduler callback to notify that a new subtree is scheduled.
    3429          38 :   void scheduleTree(unsigned SubtreeID) override {
    3430          38 :     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
    3431          38 :   }
    3432             : 
    3433             :   /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
    3434             :   /// DFSResults, and resort the priority Q.
    3435         176 :   void schedNode(SUnit *SU, bool IsTopNode) override {
    3436             :     assert(!IsTopNode && "SchedDFSResult needs bottom-up");
    3437         176 :   }
    3438             : 
    3439          64 :   void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
    3440             : 
    3441         176 :   void releaseBottomNode(SUnit *SU) override {
    3442         176 :     ReadyQ.push_back(SU);
    3443         176 :     std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
    3444         176 :   }
    3445             : };
    3446             : 
    3447             : } // end anonymous namespace
    3448             : 
    3449           6 : static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
    3450          24 :   return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
    3451             : }
    3452           2 : static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
    3453           8 :   return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
    3454             : }
    3455             : 
    3456       99743 : static MachineSchedRegistry ILPMaxRegistry(
    3457             :   "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
    3458       99743 : static MachineSchedRegistry ILPMinRegistry(
    3459             :   "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
    3460             : 
    3461             : //===----------------------------------------------------------------------===//
    3462             : // Machine Instruction Shuffler for Correctness Testing
    3463             : //===----------------------------------------------------------------------===//
    3464             : 
    3465             : #ifndef NDEBUG
    3466             : namespace {
    3467             : 
    3468             : /// Apply a less-than relation on the node order, which corresponds to the
    3469             : /// instruction order prior to scheduling. IsReverse implements greater-than.
    3470             : template<bool IsReverse>
    3471             : struct SUnitOrder {
    3472             :   bool operator()(SUnit *A, SUnit *B) const {
    3473             :     if (IsReverse)
    3474             :       return A->NodeNum > B->NodeNum;
    3475             :     else
    3476             :       return A->NodeNum < B->NodeNum;
    3477             :   }
    3478             : };
    3479             : 
    3480             : /// Reorder instructions as much as possible.
    3481             : class InstructionShuffler : public MachineSchedStrategy {
    3482             :   bool IsAlternating;
    3483             :   bool IsTopDown;
    3484             : 
    3485             :   // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
    3486             :   // gives nodes with a higher number higher priority causing the latest
    3487             :   // instructions to be scheduled first.
    3488             :   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
    3489             :     TopQ;
    3490             : 
    3491             :   // When scheduling bottom-up, use greater-than as the queue priority.
    3492             :   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
    3493             :     BottomQ;
    3494             : 
    3495             : public:
    3496             :   InstructionShuffler(bool alternate, bool topdown)
    3497             :     : IsAlternating(alternate), IsTopDown(topdown) {}
    3498             : 
    3499             :   void initialize(ScheduleDAGMI*) override {
    3500             :     TopQ.clear();
    3501             :     BottomQ.clear();
    3502             :   }
    3503             : 
    3504             :   /// Implement MachineSchedStrategy interface.
    3505             :   /// -----------------------------------------
    3506             : 
    3507             :   SUnit *pickNode(bool &IsTopNode) override {
    3508             :     SUnit *SU;
    3509             :     if (IsTopDown) {
    3510             :       do {
    3511             :         if (TopQ.empty()) return nullptr;
    3512             :         SU = TopQ.top();
    3513             :         TopQ.pop();
    3514             :       } while (SU->isScheduled);
    3515             :       IsTopNode = true;
    3516             :     } else {
    3517             :       do {
    3518             :         if (BottomQ.empty()) return nullptr;
    3519             :         SU = BottomQ.top();
    3520             :         BottomQ.pop();
    3521             :       } while (SU->isScheduled);
    3522             :       IsTopNode = false;
    3523             :     }
    3524             :     if (IsAlternating)
    3525             :       IsTopDown = !IsTopDown;
    3526             :     return SU;
    3527             :   }
    3528             : 
    3529             :   void schedNode(SUnit *SU, bool IsTopNode) override {}
    3530             : 
    3531             :   void releaseTopNode(SUnit *SU) override {
    3532             :     TopQ.push(SU);
    3533             :   }
    3534             :   void releaseBottomNode(SUnit *SU) override {
    3535             :     BottomQ.push(SU);
    3536             :   }
    3537             : };
    3538             : 
    3539             : } // end anonymous namespace
    3540             : 
    3541             : static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
    3542             :   bool Alternate = !ForceTopDown && !ForceBottomUp;
    3543             :   bool TopDown = !ForceBottomUp;
    3544             :   assert((TopDown || !ForceTopDown) &&
    3545             :          "-misched-topdown incompatible with -misched-bottomup");
    3546             :   return new ScheduleDAGMILive(
    3547             :       C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
    3548             : }
    3549             : 
    3550             : static MachineSchedRegistry ShufflerRegistry(
    3551             :   "shuffle", "Shuffle machine instructions alternating directions",
    3552             :   createInstructionShuffler);
    3553             : #endif // !NDEBUG
    3554             : 
    3555             : //===----------------------------------------------------------------------===//
    3556             : // GraphWriter support for ScheduleDAGMILive.
    3557             : //===----------------------------------------------------------------------===//
    3558             : 
    3559             : #ifndef NDEBUG
    3560             : namespace llvm {
    3561             : 
    3562             : template<> struct GraphTraits<
    3563             :   ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
    3564             : 
    3565             : template<>
    3566             : struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
    3567             :   DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
    3568             : 
    3569             :   static std::string getGraphName(const ScheduleDAG *G) {
    3570             :     return G->MF.getName();
    3571             :   }
    3572             : 
    3573             :   static bool renderGraphFromBottomUp() {
    3574             :     return true;
    3575             :   }
    3576             : 
    3577             :   static bool isNodeHidden(const SUnit *Node) {
    3578             :     if (ViewMISchedCutoff == 0)
    3579             :       return false;
    3580             :     return (Node->Preds.size() > ViewMISchedCutoff
    3581             :          || Node->Succs.size() > ViewMISchedCutoff);
    3582             :   }
    3583             : 
    3584             :   /// If you want to override the dot attributes printed for a particular
    3585             :   /// edge, override this method.
    3586             :   static std::string getEdgeAttributes(const SUnit *Node,
    3587             :                                        SUnitIterator EI,
    3588             :                                        const ScheduleDAG *Graph) {
    3589             :     if (EI.isArtificialDep())
    3590             :       return "color=cyan,style=dashed";
    3591             :     if (EI.isCtrlDep())
    3592             :       return "color=blue,style=dashed";
    3593             :     return "";
    3594             :   }
    3595             : 
    3596             :   static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
    3597             :     std::string Str;
    3598             :     raw_string_ostream SS(Str);
    3599             :     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
    3600             :     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
    3601             :       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
    3602             :     SS << "SU:" << SU->NodeNum;
    3603             :     if (DFS)
    3604             :       SS << " I:" << DFS->getNumInstrs(SU);
    3605             :     return SS.str();
    3606             :   }
    3607             : 
    3608             :   static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
    3609             :     return G->getGraphNodeLabel(SU);
    3610             :   }
    3611             : 
    3612             :   static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
    3613             :     std::string Str("shape=Mrecord");
    3614             :     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
    3615             :     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
    3616             :       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
    3617             :     if (DFS) {
    3618             :       Str += ",style=filled,fillcolor=\"#";
    3619             :       Str += DOT::getColorString(DFS->getSubtreeID(N));
    3620             :       Str += '"';
    3621             :     }
    3622             :     return Str;
    3623             :   }
    3624             : };
    3625             : 
    3626             : } // end namespace llvm
    3627             : #endif // NDEBUG
    3628             : 
    3629             : /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
    3630             : /// rendered using 'dot'.
    3631           0 : void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
    3632             : #ifndef NDEBUG
    3633             :   ViewGraph(this, Name, false, Title);
    3634             : #else
    3635           0 :   errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
    3636           0 :          << "systems with Graphviz or gv!\n";
    3637             : #endif  // NDEBUG
    3638           0 : }
    3639             : 
    3640             : /// Out-of-line implementation with no arguments is handy for gdb.
    3641           0 : void ScheduleDAGMI::viewGraph() {
    3642           0 :   viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
    3643      299229 : }

Generated by: LCOV version 1.13