LCOV - code coverage report
Current view: top level - lib/CodeGen - MachineScheduler.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 1133 1169 96.9 %
Date: 2018-02-25 19:55:18 Functions: 115 130 88.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // MachineScheduler schedules machine instructions after phi elimination. It
      11             : // preserves LiveIntervals so it can be invoked before register allocation.
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #include "llvm/CodeGen/MachineScheduler.h"
      16             : #include "llvm/ADT/ArrayRef.h"
      17             : #include "llvm/ADT/BitVector.h"
      18             : #include "llvm/ADT/DenseMap.h"
      19             : #include "llvm/ADT/PriorityQueue.h"
      20             : #include "llvm/ADT/STLExtras.h"
      21             : #include "llvm/ADT/SmallVector.h"
      22             : #include "llvm/ADT/iterator_range.h"
      23             : #include "llvm/Analysis/AliasAnalysis.h"
      24             : #include "llvm/CodeGen/LiveInterval.h"
      25             : #include "llvm/CodeGen/LiveIntervals.h"
      26             : #include "llvm/CodeGen/MachineBasicBlock.h"
      27             : #include "llvm/CodeGen/MachineDominators.h"
      28             : #include "llvm/CodeGen/MachineFunction.h"
      29             : #include "llvm/CodeGen/MachineFunctionPass.h"
      30             : #include "llvm/CodeGen/MachineInstr.h"
      31             : #include "llvm/CodeGen/MachineLoopInfo.h"
      32             : #include "llvm/CodeGen/MachineOperand.h"
      33             : #include "llvm/CodeGen/MachinePassRegistry.h"
      34             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      35             : #include "llvm/CodeGen/MachineValueType.h"
      36             : #include "llvm/CodeGen/Passes.h"
      37             : #include "llvm/CodeGen/RegisterClassInfo.h"
      38             : #include "llvm/CodeGen/RegisterPressure.h"
      39             : #include "llvm/CodeGen/ScheduleDAG.h"
      40             : #include "llvm/CodeGen/ScheduleDAGInstrs.h"
      41             : #include "llvm/CodeGen/ScheduleDAGMutation.h"
      42             : #include "llvm/CodeGen/ScheduleDFS.h"
      43             : #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
      44             : #include "llvm/CodeGen/SlotIndexes.h"
      45             : #include "llvm/CodeGen/TargetInstrInfo.h"
      46             : #include "llvm/CodeGen/TargetLowering.h"
      47             : #include "llvm/CodeGen/TargetPassConfig.h"
      48             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      49             : #include "llvm/CodeGen/TargetSchedule.h"
      50             : #include "llvm/CodeGen/TargetSubtargetInfo.h"
      51             : #include "llvm/MC/LaneBitmask.h"
      52             : #include "llvm/Pass.h"
      53             : #include "llvm/Support/CommandLine.h"
      54             : #include "llvm/Support/Compiler.h"
      55             : #include "llvm/Support/Debug.h"
      56             : #include "llvm/Support/ErrorHandling.h"
      57             : #include "llvm/Support/GraphWriter.h"
      58             : #include "llvm/Support/raw_ostream.h"
      59             : #include <algorithm>
      60             : #include <cassert>
      61             : #include <cstdint>
      62             : #include <iterator>
      63             : #include <limits>
      64             : #include <memory>
      65             : #include <string>
      66             : #include <tuple>
      67             : #include <utility>
      68             : #include <vector>
      69             : 
      70             : using namespace llvm;
      71             : 
      72             : #define DEBUG_TYPE "machine-scheduler"
      73             : 
      74             : namespace llvm {
      75             : 
      76       81774 : cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
      77       81774 :                            cl::desc("Force top-down list scheduling"));
      78       81774 : cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
      79       81774 :                             cl::desc("Force bottom-up list scheduling"));
      80             : cl::opt<bool>
      81       81774 : DumpCriticalPathLength("misched-dcpl", cl::Hidden,
      82       81774 :                        cl::desc("Print critical path length to stdout"));
      83             : 
      84             : } // end namespace llvm
      85             : 
      86             : #ifndef NDEBUG
      87             : static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
      88             :   cl::desc("Pop up a window to show MISched dags after they are processed"));
      89             : 
      90             : /// In some situations a few uninteresting nodes depend on nearly all other
      91             : /// nodes in the graph, provide a cutoff to hide them.
      92             : static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
      93             :   cl::desc("Hide nodes with more predecessor/successor than cutoff"));
      94             : 
      95             : static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
      96             :   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
      97             : 
      98             : static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
      99             :   cl::desc("Only schedule this function"));
     100             : static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
     101             :                                         cl::desc("Only schedule this MBB#"));
     102             : #else
     103             : static bool ViewMISchedDAGs = false;
     104             : #endif // NDEBUG
     105             : 
     106             : /// Avoid quadratic complexity in unusually large basic blocks by limiting the
     107             : /// size of the ready lists.
     108       81774 : static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
     109      163548 :   cl::desc("Limit ready list to N instructions"), cl::init(256));
     110             : 
     111       81774 : static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
     112      163548 :   cl::desc("Enable register pressure scheduling."), cl::init(true));
     113             : 
     114       81774 : static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
     115      163548 :   cl::desc("Enable cyclic critical path analysis."), cl::init(true));
     116             : 
     117       81774 : static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
     118       81774 :                                         cl::desc("Enable memop clustering."),
     119      245322 :                                         cl::init(true));
     120             : 
     121       81774 : static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
     122       81774 :   cl::desc("Verify machine instrs before and after machine scheduling"));
     123             : 
     124             : // DAG subtrees must have at least this many nodes.
     125             : static const unsigned MinSubtreeSize = 8;
     126             : 
     127             : // Pin the vtables to this file.
     128           0 : void MachineSchedStrategy::anchor() {}
     129             : 
     130           0 : void ScheduleDAGMutation::anchor() {}
     131             : 
     132             : //===----------------------------------------------------------------------===//
     133             : // Machine Instruction Scheduling Pass and Registry
     134             : //===----------------------------------------------------------------------===//
     135             : 
     136       19183 : MachineSchedContext::MachineSchedContext() {
     137       19183 :   RegClassInfo = new RegisterClassInfo();
     138       19183 : }
     139             : 
     140       38130 : MachineSchedContext::~MachineSchedContext() {
     141       19065 :   delete RegClassInfo;
     142       19065 : }
     143             : 
     144             : namespace {
     145             : 
     146             : /// Base class for a machine scheduler class that can run at any point.
     147       19065 : class MachineSchedulerBase : public MachineSchedContext,
     148             :                              public MachineFunctionPass {
     149             : public:
     150       19183 :   MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
     151             : 
     152             :   void print(raw_ostream &O, const Module* = nullptr) const override;
     153             : 
     154             : protected:
     155             :   void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
     156             : };
     157             : 
     158             : /// MachineScheduler runs after coalescing and before register allocation.
     159       34618 : class MachineScheduler : public MachineSchedulerBase {
     160             : public:
     161             :   MachineScheduler();
     162             : 
     163             :   void getAnalysisUsage(AnalysisUsage &AU) const override;
     164             : 
     165             :   bool runOnMachineFunction(MachineFunction&) override;
     166             : 
     167             :   static char ID; // Class identification, replacement for typeinfo
     168             : 
     169             : protected:
     170             :   ScheduleDAGInstrs *createMachineScheduler();
     171             : };
     172             : 
     173             : /// PostMachineScheduler runs after shortly before code emission.
     174        3512 : class PostMachineScheduler : public MachineSchedulerBase {
     175             : public:
     176             :   PostMachineScheduler();
     177             : 
     178             :   void getAnalysisUsage(AnalysisUsage &AU) const override;
     179             : 
     180             :   bool runOnMachineFunction(MachineFunction&) override;
     181             : 
     182             :   static char ID; // Class identification, replacement for typeinfo
     183             : 
     184             : protected:
     185             :   ScheduleDAGInstrs *createPostMachineScheduler();
     186             : };
     187             : 
     188             : } // end anonymous namespace
     189             : 
     190             : char MachineScheduler::ID = 0;
     191             : 
     192             : char &llvm::MachineSchedulerID = MachineScheduler::ID;
     193             : 
     194       22337 : INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
     195             :                       "Machine Instruction Scheduler", false, false)
     196       22337 : INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
     197       22337 : INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
     198       22337 : INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
     199       22337 : INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
     200      250756 : INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
     201             :                     "Machine Instruction Scheduler", false, false)
     202             : 
     203       34820 : MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
     204       17410 :   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
     205       17410 : }
     206             : 
     207       17303 : void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
     208       17303 :   AU.setPreservesCFG();
     209       17303 :   AU.addRequiredID(MachineDominatorsID);
     210             :   AU.addRequired<MachineLoopInfo>();
     211             :   AU.addRequired<AAResultsWrapperPass>();
     212             :   AU.addRequired<TargetPassConfig>();
     213             :   AU.addRequired<SlotIndexes>();
     214             :   AU.addPreserved<SlotIndexes>();
     215             :   AU.addRequired<LiveIntervals>();
     216             :   AU.addPreserved<LiveIntervals>();
     217       17303 :   MachineFunctionPass::getAnalysisUsage(AU);
     218       17303 : }
     219             : 
     220             : char PostMachineScheduler::ID = 0;
     221             : 
     222             : char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
     223             : 
     224      130133 : INITIALIZE_PASS(PostMachineScheduler, "postmisched",
     225             :                 "PostRA Machine Instruction Scheduler", false, false)
     226             : 
     227        3546 : PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
     228        1773 :   initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
     229        1773 : }
     230             : 
     231        1761 : void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
     232        1761 :   AU.setPreservesCFG();
     233        1761 :   AU.addRequiredID(MachineDominatorsID);
     234             :   AU.addRequired<MachineLoopInfo>();
     235             :   AU.addRequired<TargetPassConfig>();
     236        1761 :   MachineFunctionPass::getAnalysisUsage(AU);
     237        1761 : }
     238             : 
     239             : MachinePassRegistry MachineSchedRegistry::Registry;
     240             : 
     241             : /// A dummy default scheduler factory indicates whether the scheduler
     242             : /// is overridden on the command line.
     243           0 : static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
     244           0 :   return nullptr;
     245             : }
     246             : 
     247             : /// MachineSchedOpt allows command line selection of the scheduler.
     248             : static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
     249             :                RegisterPassParser<MachineSchedRegistry>>
     250       81774 : MachineSchedOpt("misched",
     251      163548 :                 cl::init(&useDefaultMachineSched), cl::Hidden,
     252      245322 :                 cl::desc("Machine instruction scheduler to use"));
     253             : 
     254             : static MachineSchedRegistry
     255       81774 : DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
     256             :                      useDefaultMachineSched);
     257             : 
     258       81774 : static cl::opt<bool> EnableMachineSched(
     259             :     "enable-misched",
     260      163548 :     cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
     261      245322 :     cl::Hidden);
     262             : 
     263       81774 : static cl::opt<bool> EnablePostRAMachineSched(
     264             :     "enable-post-misched",
     265       81774 :     cl::desc("Enable the post-ra machine instruction scheduling pass."),
     266      245322 :     cl::init(true), cl::Hidden);
     267             : 
     268             : /// Decrement this iterator until reaching the top or a non-debug instr.
     269             : static MachineBasicBlock::const_iterator
     270             : priorNonDebug(MachineBasicBlock::const_iterator I,
     271             :               MachineBasicBlock::const_iterator Beg) {
     272             :   assert(I != Beg && "reached the top of the region, cannot decrement");
     273     2592857 :   while (--I != Beg) {
     274     2206582 :     if (!I->isDebugValue())
     275             :       break;
     276             :   }
     277             :   return I;
     278             : }
     279             : 
     280             : /// Non-const version.
     281             : static MachineBasicBlock::iterator
     282     2574589 : priorNonDebug(MachineBasicBlock::iterator I,
     283             :               MachineBasicBlock::const_iterator Beg) {
     284             :   return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)
     285     2574589 :       .getNonConstIterator();
     286             : }
     287             : 
     288             : /// If this iterator is a debug value, increment until reaching the End or a
     289             : /// non-debug instruction.
     290             : static MachineBasicBlock::const_iterator
     291             : nextIfDebug(MachineBasicBlock::const_iterator I,
     292             :             MachineBasicBlock::const_iterator End) {
     293     1613443 :   for(; I != End; ++I) {
     294     1558045 :     if (!I->isDebugValue())
     295             :       break;
     296             :   }
     297             :   return I;
     298             : }
     299             : 
     300             : /// Non-const version.
     301             : static MachineBasicBlock::iterator
     302      874027 : nextIfDebug(MachineBasicBlock::iterator I,
     303             :             MachineBasicBlock::const_iterator End) {
     304             :   return nextIfDebug(MachineBasicBlock::const_iterator(I), End)
     305      874027 :       .getNonConstIterator();
     306             : }
     307             : 
     308             : /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
     309      132718 : ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
     310             :   // Select the scheduler, or set the default.
     311             :   MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
     312      132718 :   if (Ctor != useDefaultMachineSched)
     313          16 :     return Ctor(this);
     314             : 
     315             :   // Get the default scheduler set by the target for this function.
     316      132702 :   ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
     317      132702 :   if (Scheduler)
     318             :     return Scheduler;
     319             : 
     320             :   // Default to GenericScheduler.
     321       13529 :   return createGenericSchedLive(this);
     322             : }
     323             : 
     324             : /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
     325             : /// the caller. We don't have a command line option to override the postRA
     326             : /// scheduler. The Target must configure it.
     327       12000 : ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
     328             :   // Get the postRA scheduler set by the target for this function.
     329       12000 :   ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
     330       12000 :   if (Scheduler)
     331             :     return Scheduler;
     332             : 
     333             :   // Default to GenericScheduler.
     334         103 :   return createGenericSchedPostRA(this);
     335             : }
     336             : 
     337             : /// Top-level MachineScheduler pass driver.
     338             : ///
     339             : /// Visit blocks in function order. Divide each block into scheduling regions
     340             : /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
     341             : /// consistent with the DAG builder, which traverses the interior of the
     342             : /// scheduling regions bottom-up.
     343             : ///
     344             : /// This design avoids exposing scheduling boundaries to the DAG builder,
     345             : /// simplifying the DAG builder's support for "special" target instructions.
     346             : /// At the same time the design allows target schedulers to operate across
     347             : /// scheduling boundaries, for example to bundle the boudary instructions
     348             : /// without reordering them. This creates complexity, because the target
     349             : /// scheduler must update the RegionBegin and RegionEnd positions cached by
     350             : /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
     351             : /// design would be to split blocks at scheduling boundaries, but LLVM has a
     352             : /// general bias against block splitting purely for implementation simplicity.
     353      159939 : bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
     354      159939 :   if (skipFunction(mf.getFunction()))
     355             :     return false;
     356             : 
     357      159846 :   if (EnableMachineSched.getNumOccurrences()) {
     358         428 :     if (!EnableMachineSched)
     359             :       return false;
     360      159418 :   } else if (!mf.getSubtarget().enableMachineScheduler())
     361             :     return false;
     362             : 
     363             :   DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
     364             : 
     365             :   // Initialize the context of the pass.
     366      132718 :   MF = &mf;
     367      132718 :   MLI = &getAnalysis<MachineLoopInfo>();
     368      132718 :   MDT = &getAnalysis<MachineDominatorTree>();
     369      132718 :   PassConfig = &getAnalysis<TargetPassConfig>();
     370      265436 :   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
     371             : 
     372      132718 :   LIS = &getAnalysis<LiveIntervals>();
     373             : 
     374      132718 :   if (VerifyScheduling) {
     375             :     DEBUG(LIS->dump());
     376           1 :     MF->verify(this, "Before machine scheduling.");
     377             :   }
     378      132718 :   RegClassInfo->runOnMachineFunction(*MF);
     379             : 
     380             :   // Instantiate the selected scheduler for this target, function, and
     381             :   // optimization level.
     382      132718 :   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
     383      132718 :   scheduleRegions(*Scheduler, false);
     384             : 
     385             :   DEBUG(LIS->dump());
     386      132717 :   if (VerifyScheduling)
     387           1 :     MF->verify(this, "After machine scheduling.");
     388             :   return true;
     389             : }
     390             : 
     391       16422 : bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
     392       16422 :   if (skipFunction(mf.getFunction()))
     393             :     return false;
     394             : 
     395       16421 :   if (EnablePostRAMachineSched.getNumOccurrences()) {
     396          45 :     if (!EnablePostRAMachineSched)
     397             :       return false;
     398       16376 :   } else if (!mf.getSubtarget().enablePostRAScheduler()) {
     399             :     DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
     400             :     return false;
     401             :   }
     402             :   DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
     403             : 
     404             :   // Initialize the context of the pass.
     405       12000 :   MF = &mf;
     406       12000 :   MLI = &getAnalysis<MachineLoopInfo>();
     407       12000 :   PassConfig = &getAnalysis<TargetPassConfig>();
     408             : 
     409       12000 :   if (VerifyScheduling)
     410           0 :     MF->verify(this, "Before post machine scheduling.");
     411             : 
     412             :   // Instantiate the selected scheduler for this target, function, and
     413             :   // optimization level.
     414       12000 :   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
     415       12000 :   scheduleRegions(*Scheduler, true);
     416             : 
     417       12000 :   if (VerifyScheduling)
     418           0 :     MF->verify(this, "After post machine scheduling.");
     419             :   return true;
     420             : }
     421             : 
     422             : /// Return true of the given instruction should not be included in a scheduling
     423             : /// region.
     424             : ///
     425             : /// MachineScheduler does not currently support scheduling across calls. To
     426             : /// handle calls, the DAG builder needs to be modified to create register
     427             : /// anti/output dependencies on the registers clobbered by the call's regmask
     428             : /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
     429             : /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
     430             : /// the boundary, but there would be no benefit to postRA scheduling across
     431             : /// calls this late anyway.
     432     3544638 : static bool isSchedBoundary(MachineBasicBlock::iterator MI,
     433             :                             MachineBasicBlock *MBB,
     434             :                             MachineFunction *MF,
     435             :                             const TargetInstrInfo *TII) {
     436     3544637 :   return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
     437             : }
     438             : 
     439             : /// A region of an MBB for scheduling.
     440             : namespace {
     441             : struct SchedRegion {
     442             :   /// RegionBegin is the first instruction in the scheduling region, and
     443             :   /// RegionEnd is either MBB->end() or the scheduling boundary after the
     444             :   /// last instruction in the scheduling region. These iterators cannot refer
     445             :   /// to instructions outside of the identified scheduling region because
     446             :   /// those may be reordered before scheduling this region.
     447             :   MachineBasicBlock::iterator RegionBegin;
     448             :   MachineBasicBlock::iterator RegionEnd;
     449             :   unsigned NumRegionInstrs;
     450             : 
     451             :   SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
     452     1110089 :               unsigned N) :
     453     1110089 :     RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
     454             : };
     455             : } // end anonymous namespace
     456             : 
     457             : using MBBRegionsVector = SmallVector<SchedRegion, 16>;
     458             : 
     459             : static void
     460      291971 : getSchedRegions(MachineBasicBlock *MBB,
     461             :                 MBBRegionsVector &Regions,
     462             :                 bool RegionsTopDown) {
     463      291971 :   MachineFunction *MF = MBB->getParent();
     464      291971 :   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
     465             : 
     466             :   MachineBasicBlock::iterator I = nullptr;
     467     1402060 :   for(MachineBasicBlock::iterator RegionEnd = MBB->end();
     468     2512149 :       RegionEnd != MBB->begin(); RegionEnd = I) {
     469             : 
     470             :     // Avoid decrementing RegionEnd for blocks with no terminator.
     471     1400368 :     if (RegionEnd != MBB->end() ||
     472     1400367 :         isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
     473             :       --RegionEnd;
     474             :     }
     475             : 
     476             :     // The next region starts above the previous region. Look backward in the
     477             :     // instruction stream until we find the nearest boundary.
     478             :     unsigned NumRegionInstrs = 0;
     479     1110088 :     I = RegionEnd;
     480     3544639 :     for (;I != MBB->begin(); --I) {
     481             :       MachineInstr &MI = *std::prev(I);
     482     3254359 :       if (isSchedBoundary(&MI, &*MBB, MF, TII))
     483             :         break;
     484     2434551 :       if (!MI.isDebugValue())
     485             :         // MBB::size() uses instr_iterator to count. Here we need a bundle to
     486             :         // count as a single instruction.
     487     2407921 :         ++NumRegionInstrs;
     488             :     }
     489             : 
     490     2220178 :     Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
     491             :   }
     492             : 
     493      291972 :   if (RegionsTopDown)
     494             :     std::reverse(Regions.begin(), Regions.end());
     495      291972 : }
     496             : 
     497             : /// Main driver for both MachineScheduler and PostMachineScheduler.
     498      144718 : void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
     499             :                                            bool FixKillFlags) {
     500             :   // Visit all machine basic blocks.
     501             :   //
     502             :   // TODO: Visit blocks in global postorder or postorder within the bottom-up
     503             :   // loop tree. Then we can optionally compute global RegPressure.
     504      144718 :   for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
     505      436689 :        MBB != MBBEnd; ++MBB) {
     506             : 
     507      291972 :     Scheduler.startBlock(&*MBB);
     508             : 
     509             : #ifndef NDEBUG
     510             :     if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
     511             :       continue;
     512             :     if (SchedOnlyBlock.getNumOccurrences()
     513             :         && (int)SchedOnlyBlock != MBB->getNumber())
     514             :       continue;
     515             : #endif
     516             : 
     517             :     // Break the block into scheduling regions [I, RegionEnd). RegionEnd
     518             :     // points to the scheduling boundary at the bottom of the region. The DAG
     519             :     // does not include RegionEnd, but the region does (i.e. the next
     520             :     // RegionEnd is above the previous RegionBegin). If the current block has
     521             :     // no terminator then RegionEnd == MBB->end() for the bottom region.
     522             :     //
     523             :     // All the regions of MBB are first found and stored in MBBRegions, which
     524             :     // will be processed (MBB) top-down if initialized with true.
     525             :     //
     526             :     // The Scheduler may insert instructions during either schedule() or
     527             :     // exitRegion(), even for empty regions. So the local iterators 'I' and
     528             :     // 'RegionEnd' are invalid across these calls. Instructions must not be
     529             :     // added to other regions than the current one without updating MBBRegions.
     530             : 
     531             :     MBBRegionsVector MBBRegions;
     532      291970 :     getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
     533     1110089 :     for (MBBRegionsVector::iterator R = MBBRegions.begin();
     534     1402061 :          R != MBBRegions.end(); ++R) {
     535     1110089 :       MachineBasicBlock::iterator I = R->RegionBegin;
     536     1110089 :       MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
     537     1110089 :       unsigned NumRegionInstrs = R->NumRegionInstrs;
     538             : 
     539             :       // Notify the scheduler of the region, even if we may skip scheduling
     540             :       // it. Perhaps it still needs to be bundled.
     541     1110089 :       Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
     542             : 
     543             :       // Skip empty scheduling regions (0 or 1 schedulable instructions).
     544     2322233 :       if (I == RegionEnd || I == std::prev(RegionEnd)) {
     545             :         // Close the current region. Bundle the terminator if needed.
     546             :         // This invalidates 'RegionEnd' and 'I'.
     547      716012 :         Scheduler.exitRegion();
     548             :         continue;
     549             :       }
     550             :       DEBUG(dbgs() << "********** MI Scheduling **********\n");
     551             :       DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB) << " "
     552             :                    << MBB->getName() << "\n  From: " << *I << "    To: ";
     553             :             if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
     554             :             else dbgs() << "End";
     555             :             dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
     556      394076 :       if (DumpCriticalPathLength) {
     557           0 :         errs() << MF->getName();
     558           0 :         errs() << ":%bb. " << MBB->getNumber();
     559           0 :         errs() << " " << MBB->getName() << " \n";
     560             :       }
     561             : 
     562             :       // Schedule a region: possibly reorder instructions.
     563             :       // This invalidates the original region iterators.
     564      394076 :       Scheduler.schedule();
     565             : 
     566             :       // Close the current region.
     567      394076 :       Scheduler.exitRegion();
     568             :     }
     569      291972 :     Scheduler.finishBlock();
     570             :     // FIXME: Ideally, no further passes should rely on kill flags. However,
     571             :     // thumb2 size reduction is currently an exception, so the PostMIScheduler
     572             :     // needs to do this.
     573      291971 :     if (FixKillFlags)
     574       13890 :       Scheduler.fixupKills(*MBB);
     575             :   }
     576      144717 :   Scheduler.finalizeSchedule();
     577      144716 : }
     578             : 
     579           0 : void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
     580             :   // unimplemented
     581           0 : }
     582             : 
     583             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
     584             : LLVM_DUMP_METHOD void ReadyQueue::dump() const {
     585             :   dbgs() << "Queue " << Name << ": ";
     586             :   for (const SUnit *SU : Queue)
     587             :     dbgs() << SU->NodeNum << " ";
     588             :   dbgs() << "\n";
     589             : }
     590             : #endif
     591             : 
     592             : //===----------------------------------------------------------------------===//
     593             : // ScheduleDAGMI - Basic machine instruction scheduling. This is
     594             : // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
     595             : // virtual registers.
     596             : // ===----------------------------------------------------------------------===/
     597             : 
     598             : // Provide a vtable anchor.
     599             : ScheduleDAGMI::~ScheduleDAGMI() = default;
     600             : 
     601       18173 : bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
     602       18173 :   return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
     603             : }
     604             : 
     605      116618 : bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
     606      116618 :   if (SuccSU != &ExitSU) {
     607             :     // Do not use WillCreateCycle, it assumes SD scheduling.
     608             :     // If Pred is reachable from Succ, then the edge creates a cycle.
     609      205810 :     if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
     610             :       return false;
     611      102669 :     Topo.AddPred(SuccSU, PredDep.getSUnit());
     612             :   }
     613      116382 :   SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
     614             :   // Return true regardless of whether a new edge needed to be inserted.
     615      116382 :   return true;
     616             : }
     617             : 
     618             : /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
     619             : /// NumPredsLeft reaches zero, release the successor node.
     620             : ///
     621             : /// FIXME: Adjust SuccSU height based on MinLatency.
     622      245583 : void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
     623             :   SUnit *SuccSU = SuccEdge->getSUnit();
     624             : 
     625             :   if (SuccEdge->isWeak()) {
     626        7520 :     --SuccSU->WeakPredsLeft;
     627             :     if (SuccEdge->isCluster())
     628        7520 :       NextClusterSucc = SuccSU;
     629             :     return;
     630             :   }
     631             : #ifndef NDEBUG
     632             :   if (SuccSU->NumPredsLeft == 0) {
     633             :     dbgs() << "*** Scheduling failed! ***\n";
     634             :     SuccSU->dump(this);
     635             :     dbgs() << " has been released too many times!\n";
     636             :     llvm_unreachable(nullptr);
     637             :   }
     638             : #endif
     639             :   // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
     640             :   // CurrCycle may have advanced since then.
     641      238063 :   if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
     642      159556 :     SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
     643             : 
     644      238063 :   --SuccSU->NumPredsLeft;
     645      238063 :   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
     646       99557 :     SchedImpl->releaseTopNode(SuccSU);
     647             : }
     648             : 
     649             : /// releaseSuccessors - Call releaseSucc on each of SU's successors.
     650      511461 : void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
     651     1002627 :   for (SDep &Succ : SU->Succs)
     652      245583 :     releaseSucc(SU, &Succ);
     653      511461 : }
     654             : 
     655             : /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
     656             : /// NumSuccsLeft reaches zero, release the predecessor node.
     657             : ///
     658             : /// FIXME: Adjust PredSU height based on MinLatency.
     659     3628404 : void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
     660             :   SUnit *PredSU = PredEdge->getSUnit();
     661             : 
     662             :   if (PredEdge->isWeak()) {
     663       33116 :     --PredSU->WeakSuccsLeft;
     664             :     if (PredEdge->isCluster())
     665       30966 :       NextClusterPred = PredSU;
     666             :     return;
     667             :   }
     668             : #ifndef NDEBUG
     669             :   if (PredSU->NumSuccsLeft == 0) {
     670             :     dbgs() << "*** Scheduling failed! ***\n";
     671             :     PredSU->dump(this);
     672             :     dbgs() << " has been released too many times!\n";
     673             :     llvm_unreachable(nullptr);
     674             :   }
     675             : #endif
     676             :   // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
     677             :   // CurrCycle may have advanced since then.
     678     3595288 :   if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
     679     2358299 :     PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
     680             : 
     681     3595288 :   --PredSU->NumSuccsLeft;
     682     3595288 :   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
     683     1954169 :     SchedImpl->releaseBottomNode(PredSU);
     684             : }
     685             : 
     686             : /// releasePredecessors - Call releasePred on each of SU's predecessors.
     687     2605557 : void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
     688     9862361 :   for (SDep &Pred : SU->Preds)
     689     3628404 :     releasePred(SU, &Pred);
     690     2605555 : }
     691             : 
     692      309214 : void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
     693      309214 :   ScheduleDAGInstrs::startBlock(bb);
     694      309216 :   SchedImpl->enterMBB(bb);
     695      309216 : }
     696             : 
     697      309929 : void ScheduleDAGMI::finishBlock() {
     698      309929 :   SchedImpl->leaveMBB();
     699      309930 :   ScheduleDAGInstrs::finishBlock();
     700      309928 : }
     701             : 
     702             : /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
     703             : /// crossing a scheduling boundary. [begin, end) includes all instructions in
     704             : /// the region, including the boundary itself and single-instruction regions
     705             : /// that don't get scheduled.
     706     1127852 : void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
     707             :                                      MachineBasicBlock::iterator begin,
     708             :                                      MachineBasicBlock::iterator end,
     709             :                                      unsigned regioninstrs)
     710             : {
     711     1127852 :   ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
     712             : 
     713     1127853 :   SchedImpl->initPolicy(begin, end, regioninstrs);
     714     1127853 : }
     715             : 
     716             : /// This is normally called from the main scheduler loop but may also be invoked
     717             : /// by the scheduling strategy to perform additional code motion.
     718      202454 : void ScheduleDAGMI::moveInstruction(
     719             :   MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
     720             :   // Advance RegionBegin if the first instruction moves down.
     721      202454 :   if (&*RegionBegin == MI)
     722             :     ++RegionBegin;
     723             : 
     724             :   // Update the instruction stream.
     725      404908 :   BB->splice(InsertPos, BB, MI);
     726             : 
     727             :   // Update LiveIntervals
     728      202454 :   if (LIS)
     729      199181 :     LIS->handleMove(*MI, /*UpdateFlags=*/true);
     730             : 
     731             :   // Recede RegionBegin if an instruction moves above the first.
     732      202454 :   if (RegionBegin == InsertPos)
     733        1407 :     RegionBegin = MI;
     734      202454 : }
     735             : 
     736     2328310 : bool ScheduleDAGMI::checkSchedLimit() {
     737             : #ifndef NDEBUG
     738             :   if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
     739             :     CurrentTop = CurrentBottom;
     740             :     return false;
     741             :   }
     742             :   ++NumInstrsScheduled;
     743             : #endif
     744     2328310 :   return true;
     745             : }
     746             : 
     747             : /// Per-region scheduling driver, called back from
     748             : /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
     749             : /// does not consider liveness or register pressure. It is useful for PostRA
     750             : /// scheduling and potentially other custom schedulers.
     751        8161 : void ScheduleDAGMI::schedule() {
     752             :   DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
     753             :   DEBUG(SchedImpl->dumpPolicy());
     754             : 
     755             :   // Build the DAG.
     756        8161 :   buildSchedGraph(AA);
     757             : 
     758        8161 :   Topo.InitDAGTopologicalSorting();
     759             : 
     760        8161 :   postprocessDAG();
     761             : 
     762             :   SmallVector<SUnit*, 8> TopRoots, BotRoots;
     763        8161 :   findRootsAndBiasEdges(TopRoots, BotRoots);
     764             : 
     765             :   // Initialize the strategy before modifying the DAG.
     766             :   // This may initialize a DFSResult to be used for queue priority.
     767        8161 :   SchedImpl->initialize(this);
     768             : 
     769             :   DEBUG(
     770             :     if (EntrySU.getInstr() != nullptr)
     771             :       EntrySU.dumpAll(this);
     772             :     for (const SUnit &SU : SUnits)
     773             :       SU.dumpAll(this);
     774             :     if (ExitSU.getInstr() != nullptr)
     775             :       ExitSU.dumpAll(this);
     776             :   );
     777        8161 :   if (ViewMISchedDAGs) viewGraph();
     778             : 
     779             :   // Initialize ready queues now that the DAG and priority data are finalized.
     780        8161 :   initQueues(TopRoots, BotRoots);
     781             : 
     782        8161 :   bool IsTopNode = false;
     783             :   while (true) {
     784             :     DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
     785       44433 :     SUnit *SU = SchedImpl->pickNode(IsTopNode);
     786       44433 :     if (!SU) break;
     787             : 
     788             :     assert(!SU->isScheduled && "Node already scheduled");
     789       36272 :     if (!checkSchedLimit())
     790             :       break;
     791             : 
     792       36272 :     MachineInstr *MI = SU->getInstr();
     793       36272 :     if (IsTopNode) {
     794             :       assert(SU->isTopReady() && "node still has unscheduled dependencies");
     795       36272 :       if (&*CurrentTop == MI)
     796       65998 :         CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
     797             :       else
     798        3273 :         moveInstruction(MI, CurrentTop);
     799             :     } else {
     800             :       assert(SU->isBottomReady() && "node still has unscheduled dependencies");
     801             :       MachineBasicBlock::iterator priorII =
     802           0 :         priorNonDebug(CurrentBottom, CurrentTop);
     803           0 :       if (&*priorII == MI)
     804           0 :         CurrentBottom = priorII;
     805             :       else {
     806           0 :         if (&*CurrentTop == MI)
     807           0 :           CurrentTop = nextIfDebug(++CurrentTop, priorII);
     808           0 :         moveInstruction(MI, CurrentBottom);
     809           0 :         CurrentBottom = MI;
     810             :       }
     811             :     }
     812             :     // Notify the scheduling strategy before updating the DAG.
     813             :     // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
     814             :     // runs, it can then use the accurate ReadyCycle time to determine whether
     815             :     // newly released nodes can move to the readyQ.
     816       36272 :     SchedImpl->schedNode(SU, IsTopNode);
     817             : 
     818       36272 :     updateQueues(SU, IsTopNode);
     819       36272 :   }
     820             :   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
     821             : 
     822        8161 :   placeDebugValues();
     823             : 
     824             :   DEBUG({
     825             :     dbgs() << "*** Final schedule for "
     826             :            << printMBBReference(*begin()->getParent()) << " ***\n";
     827             :     dumpSchedule();
     828             :     dbgs() << '\n';
     829             :   });
     830        8161 : }
     831             : 
     832             : /// Apply each ScheduleDAGMutation step in order.
     833      391787 : void ScheduleDAGMI::postprocessDAG() {
     834     1188104 :   for (auto &m : Mutations)
     835      796317 :     m->apply(this);
     836      391787 : }
     837             : 
     838      394360 : void ScheduleDAGMI::
     839             : findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
     840             :                       SmallVectorImpl<SUnit*> &BotRoots) {
     841     2724884 :   for (SUnit &SU : SUnits) {
     842             :     assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
     843             : 
     844             :     // Order predecessors so DFSResult follows the critical path.
     845     2330524 :     SU.biasCriticalPath();
     846             : 
     847             :     // A SUnit is ready to top schedule if it has no predecessors.
     848     2330524 :     if (!SU.NumPredsLeft)
     849     1050091 :       TopRoots.push_back(&SU);
     850             :     // A SUnit is ready to bottom schedule if it has no successors.
     851     2330524 :     if (!SU.NumSuccsLeft)
     852      322617 :       BotRoots.push_back(&SU);
     853             :   }
     854      394360 :   ExitSU.biasCriticalPath();
     855      394360 : }
     856             : 
     857             : /// Identify DAG roots and setup scheduler queues.
     858      394353 : void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
     859             :                                ArrayRef<SUnit*> BotRoots) {
     860      394353 :   NextClusterSucc = nullptr;
     861      394353 :   NextClusterPred = nullptr;
     862             : 
     863             :   // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
     864             :   //
     865             :   // Nodes with unreleased weak edges can still be roots.
     866             :   // Release top roots in forward order.
     867     2494499 :   for (SUnit *SU : TopRoots)
     868     1050073 :     SchedImpl->releaseTopNode(SU);
     869             : 
     870             :   // Release bottom roots in reverse order so the higher priority nodes appear
     871             :   // first. This is more natural and slightly more efficient.
     872             :   for (SmallVectorImpl<SUnit*>::const_reverse_iterator
     873      716963 :          I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
     874      322610 :     SchedImpl->releaseBottomNode(*I);
     875             :   }
     876             : 
     877      394353 :   releaseSuccessors(&EntrySU);
     878      394353 :   releasePredecessors(&ExitSU);
     879             : 
     880      394353 :   SchedImpl->registerRoots();
     881             : 
     882             :   // Advance past initial DebugValues.
     883      394353 :   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
     884      394353 :   CurrentBottom = RegionEnd;
     885      394353 : }
     886             : 
     887             : /// Update scheduler queues after scheduling an instruction.
     888     2328311 : void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
     889             :   // Release dependent instructions for scheduling.
     890     2328311 :   if (IsTopNode)
     891      117108 :     releaseSuccessors(SU);
     892             :   else
     893     2211203 :     releasePredecessors(SU);
     894             : 
     895     2328310 :   SU->isScheduled = true;
     896     2328310 : }
     897             : 
     898             : /// Reinsert any remaining debug_values, just like the PostRA scheduler.
     899      394405 : void ScheduleDAGMI::placeDebugValues() {
     900             :   // If first instruction was a DBG_VALUE then put it back.
     901      394405 :   if (FirstDbgValue) {
     902        5796 :     BB->splice(RegionBegin, BB, FirstDbgValue);
     903        2898 :     RegionBegin = FirstDbgValue;
     904             :   }
     905             : 
     906             :   for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
     907      418055 :          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
     908       23650 :     std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
     909             :     MachineInstr *DbgValue = P.first;
     910             :     MachineBasicBlock::iterator OrigPrevMI = P.second;
     911       23650 :     if (&*RegionBegin == DbgValue)
     912             :       ++RegionBegin;
     913       47300 :     BB->splice(++OrigPrevMI, BB, DbgValue);
     914       23650 :     if (OrigPrevMI == std::prev(RegionEnd))
     915         885 :       RegionEnd = DbgValue;
     916             :   }
     917             :   DbgValues.clear();
     918      394405 :   FirstDbgValue = nullptr;
     919      394405 : }
     920             : 
     921             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
     922             : LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
     923             :   for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
     924             :     if (SUnit *SU = getSUnit(&(*MI)))
     925             :       SU->dump(this);
     926             :     else
     927             :       dbgs() << "Missing SUnit\n";
     928             :   }
     929             : }
     930             : #endif
     931             : 
     932             : //===----------------------------------------------------------------------===//
     933             : // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
     934             : // preservation.
     935             : //===----------------------------------------------------------------------===//
     936             : 
     937      910776 : ScheduleDAGMILive::~ScheduleDAGMILive() {
     938      132718 :   delete DFSResult;
     939      247189 : }
     940             : 
     941     1297150 : void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
     942     1297150 :   const MachineInstr &MI = *SU.getInstr();
     943    13809816 :   for (const MachineOperand &MO : MI.operands()) {
     944     6256333 :     if (!MO.isReg())
     945     2272272 :       continue;
     946     1064731 :     if (!MO.readsReg())
     947     1064731 :       continue;
     948     3013466 :     if (TrackLaneMasks && !MO.isUse())
     949       94136 :       continue;
     950             : 
     951     2825194 :     unsigned Reg = MO.getReg();
     952     4237338 :     if (!TargetRegisterInfo::isVirtualRegister(Reg))
     953     1412144 :       continue;
     954             : 
     955             :     // Ignore re-defs.
     956     1413050 :     if (TrackLaneMasks) {
     957             :       bool FoundDef = false;
     958     3426598 :       for (const MachineOperand &MO2 : MI.operands()) {
     959     2588622 :         if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
     960             :           FoundDef = true;
     961             :           break;
     962             :         }
     963             :       }
     964      298808 :       if (FoundDef)
     965        5882 :         continue;
     966             :     }
     967             : 
     968             :     // Record this local VReg use.
     969     1407168 :     VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
     970             :     for (; UI != VRegUses.end(); ++UI) {
     971    13343936 :       if (UI->SU == &SU)
     972             :         break;
     973             :     }
     974             :     if (UI == VRegUses.end())
     975     1364507 :       VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
     976             :   }
     977     1297150 : }
     978             : 
     979             : /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
     980             : /// crossing a scheduling boundary. [begin, end) includes all instructions in
     981             : /// the region, including the boundary itself and single-instruction regions
     982             : /// that don't get scheduled.
     983     1108288 : void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
     984             :                                 MachineBasicBlock::iterator begin,
     985             :                                 MachineBasicBlock::iterator end,
     986             :                                 unsigned regioninstrs)
     987             : {
     988             :   // ScheduleDAGMI initializes SchedImpl's per-region policy.
     989     1108288 :   ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
     990             : 
     991             :   // For convenience remember the end of the liveness region.
     992     2200322 :   LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
     993             : 
     994             :   SUPressureDiffs.clear();
     995             : 
     996     1108287 :   ShouldTrackPressure = SchedImpl->shouldTrackPressure();
     997     1108287 :   ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
     998             : 
     999             :   assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
    1000             :          "ShouldTrackLaneMasks requires ShouldTrackPressure");
    1001     1108287 : }
    1002             : 
    1003             : // Setup the register pressure trackers for the top scheduled top and bottom
    1004             : // scheduled regions.
    1005      100474 : void ScheduleDAGMILive::initRegPressure() {
    1006             :   VRegUses.clear();
    1007      200948 :   VRegUses.setUniverse(MRI.getNumVirtRegs());
    1008     1397624 :   for (SUnit &SU : SUnits)
    1009     1297150 :     collectVRegUses(SU);
    1010             : 
    1011      301422 :   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
    1012      100474 :                     ShouldTrackLaneMasks, false);
    1013      301422 :   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
    1014      100474 :                     ShouldTrackLaneMasks, false);
    1015             : 
    1016             :   // Close the RPTracker to finalize live ins.
    1017      100474 :   RPTracker.closeRegion();
    1018             : 
    1019             :   DEBUG(RPTracker.dump());
    1020             : 
    1021             :   // Initialize the live ins and live outs.
    1022      200948 :   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
    1023      200948 :   BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
    1024             : 
    1025             :   // Close one end of the tracker so we can call
    1026             :   // getMaxUpward/DownwardPressureDelta before advancing across any
    1027             :   // instructions. This converts currently live regs into live ins/outs.
    1028      100474 :   TopRPTracker.closeTop();
    1029      100474 :   BotRPTracker.closeBottom();
    1030             : 
    1031      100474 :   BotRPTracker.initLiveThru(RPTracker);
    1032      100474 :   if (!BotRPTracker.getLiveThru().empty()) {
    1033             :     TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
    1034             :     DEBUG(dbgs() << "Live Thru: ";
    1035             :           dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
    1036             :   };
    1037             : 
    1038             :   // For each live out vreg reduce the pressure change associated with other
    1039             :   // uses of the same vreg below the live-out reaching def.
    1040      200948 :   updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
    1041             : 
    1042             :   // Account for liveness generated by the region boundary.
    1043      100474 :   if (LiveRegionEnd != RegionEnd) {
    1044             :     SmallVector<RegisterMaskPair, 8> LiveUses;
    1045       94734 :     BotRPTracker.recede(&LiveUses);
    1046       94734 :     updatePressureDiffs(LiveUses);
    1047             :   }
    1048             : 
    1049             :   DEBUG(
    1050             :     dbgs() << "Top Pressure:\n";
    1051             :     dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
    1052             :     dbgs() << "Bottom Pressure:\n";
    1053             :     dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
    1054             :   );
    1055             : 
    1056             :   assert((BotRPTracker.getPos() == RegionEnd ||
    1057             :           (RegionEnd->isDebugValue() &&
    1058             :            BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
    1059             :          "Can't find the region bottom");
    1060             : 
    1061             :   // Cache the list of excess pressure sets in this region. This will also track
    1062             :   // the max pressure in the scheduled code for these sets.
    1063      100474 :   RegionCriticalPSets.clear();
    1064             :   const std::vector<unsigned> &RegionPressure =
    1065      100474 :     RPTracker.getPressure().MaxSetPressure;
    1066     2975471 :   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
    1067     2774523 :     unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
    1068     5549046 :     if (RegionPressure[i] > Limit) {
    1069             :       DEBUG(dbgs() << TRI->getRegPressureSetName(i)
    1070             :             << " Limit " << Limit
    1071             :             << " Actual " << RegionPressure[i] << "\n");
    1072        3342 :       RegionCriticalPSets.push_back(PressureChange(i));
    1073             :     }
    1074             :   }
    1075             :   DEBUG(dbgs() << "Excess PSets: ";
    1076             :         for (const PressureChange &RCPS : RegionCriticalPSets)
    1077             :           dbgs() << TRI->getRegPressureSetName(
    1078             :             RCPS.getPSet()) << " ";
    1079             :         dbgs() << "\n");
    1080      100474 : }
    1081             : 
    1082     1297150 : void ScheduleDAGMILive::
    1083             : updateScheduledPressure(const SUnit *SU,
    1084             :                         const std::vector<unsigned> &NewMaxPressure) {
    1085     1297150 :   const PressureDiff &PDiff = getPressureDiff(SU);
    1086     2594300 :   unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
    1087     6592994 :   for (const PressureChange &PC : PDiff) {
    1088     3910217 :     if (!PC.isValid())
    1089             :       break;
    1090             :     unsigned ID = PC.getPSet();
    1091     3329336 :     while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
    1092       39755 :       ++CritIdx;
    1093     3170316 :     if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
    1094      227276 :       if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
    1095      113638 :           && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
    1096             :         RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
    1097             :     }
    1098     2647922 :     unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
    1099             :     if (NewMaxPressure[ID] >= Limit - 2) {
    1100             :       DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
    1101             :             << NewMaxPressure[ID]
    1102             :             << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit
    1103             :             << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
    1104             :     }
    1105             :   }
    1106     1297150 : }
    1107             : 
    1108             : /// Update the PressureDiff array for liveness after scheduling this
    1109             : /// instruction.
    1110     1424363 : void ScheduleDAGMILive::updatePressureDiffs(
    1111             :     ArrayRef<RegisterMaskPair> LiveUses) {
    1112     3855099 :   for (const RegisterMaskPair &P : LiveUses) {
    1113     1215368 :     unsigned Reg = P.RegUnit;
    1114             :     /// FIXME: Currently assuming single-use physregs.
    1115     1215368 :     if (!TRI->isVirtualRegister(Reg))
    1116             :       continue;
    1117             : 
    1118     1034989 :     if (ShouldTrackLaneMasks) {
    1119             :       // If the register has just become live then other uses won't change
    1120             :       // this fact anymore => decrement pressure.
    1121             :       // If the register has just become dead then other uses make it come
    1122             :       // back to life => increment pressure.
    1123             :       bool Decrement = P.LaneMask.any();
    1124             : 
    1125             :       for (const VReg2SUnit &V2SU
    1126      314493 :            : make_range(VRegUses.find(Reg), VRegUses.end())) {
    1127      458975 :         SUnit &SU = *V2SU.SU;
    1128      458975 :         if (SU.isScheduled || &SU == &ExitSU)
    1129      190718 :           continue;
    1130             : 
    1131      268257 :         PressureDiff &PDiff = getPressureDiff(&SU);
    1132      268257 :         PDiff.addPressureChange(Reg, Decrement, &MRI);
    1133             :         DEBUG(
    1134             :           dbgs() << "  UpdateRegP: SU(" << SU.NodeNum << ") "
    1135             :                  << printReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask)
    1136             :                  << ' ' << *SU.getInstr();
    1137             :           dbgs() << "              to ";
    1138             :           PDiff.dump(*TRI);
    1139             :         );
    1140             :       }
    1141             :     } else {
    1142             :       assert(P.LaneMask.any());
    1143             :       DEBUG(dbgs() << "  LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
    1144             :       // This may be called before CurrentBottom has been initialized. However,
    1145             :       // BotRPTracker must have a valid position. We want the value live into the
    1146             :       // instruction or live out of the block, so ask for the previous
    1147             :       // instruction's live-out.
    1148      720496 :       const LiveInterval &LI = LIS->getInterval(Reg);
    1149             :       VNInfo *VNI;
    1150             :       MachineBasicBlock::const_iterator I =
    1151      720496 :         nextIfDebug(BotRPTracker.getPos(), BB->end());
    1152     1440992 :       if (I == BB->end())
    1153       74826 :         VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
    1154             :       else {
    1155     1366166 :         LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
    1156      683083 :         VNI = LRQ.valueIn();
    1157             :       }
    1158             :       // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
    1159             :       assert(VNI && "No live value at use.");
    1160             :       for (const VReg2SUnit &V2SU
    1161      720496 :            : make_range(VRegUses.find(Reg), VRegUses.end())) {
    1162     1637861 :         SUnit *SU = V2SU.SU;
    1163             :         // If this use comes before the reaching def, it cannot be a last use,
    1164             :         // so decrease its pressure change.
    1165     1637861 :         if (!SU->isScheduled && SU != &ExitSU) {
    1166             :           LiveQueryResult LRQ =
    1167     2710958 :               LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
    1168     1355479 :           if (LRQ.valueIn() == VNI) {
    1169     1070466 :             PressureDiff &PDiff = getPressureDiff(SU);
    1170     1070466 :             PDiff.addPressureChange(Reg, true, &MRI);
    1171             :             DEBUG(
    1172             :               dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
    1173             :                      << *SU->getInstr();
    1174             :               dbgs() << "              to ";
    1175             :               PDiff.dump(*TRI);
    1176             :             );
    1177             :           }
    1178             :         }
    1179             :       }
    1180             :     }
    1181             :   }
    1182     1424363 : }
    1183             : 
    1184             : /// schedule - Called back from MachineScheduler::runOnMachineFunction
    1185             : /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
    1186             : /// only includes instructions that have DAG nodes, not scheduling boundaries.
    1187             : ///
    1188             : /// This is a skeletal driver, with all the functionality pushed into helpers,
    1189             : /// so that it can be easily extended by experimental schedulers. Generally,
    1190             : /// implementing MachineSchedStrategy should be sufficient to implement a new
    1191             : /// scheduling algorithm. However, if a scheduler further subclasses
    1192             : /// ScheduleDAGMILive then it will want to override this virtual method in order
    1193             : /// to update any specialized state.
    1194      383626 : void ScheduleDAGMILive::schedule() {
    1195             :   DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
    1196             :   DEBUG(SchedImpl->dumpPolicy());
    1197      383626 :   buildDAGWithRegPressure();
    1198             : 
    1199      383626 :   Topo.InitDAGTopologicalSorting();
    1200             : 
    1201      383626 :   postprocessDAG();
    1202             : 
    1203             :   SmallVector<SUnit*, 8> TopRoots, BotRoots;
    1204      383626 :   findRootsAndBiasEdges(TopRoots, BotRoots);
    1205             : 
    1206             :   // Initialize the strategy before modifying the DAG.
    1207             :   // This may initialize a DFSResult to be used for queue priority.
    1208      383626 :   SchedImpl->initialize(this);
    1209             : 
    1210             :   DEBUG(
    1211             :     if (EntrySU.getInstr() != nullptr)
    1212             :       EntrySU.dumpAll(this);
    1213             :     for (const SUnit &SU : SUnits) {
    1214             :       SU.dumpAll(this);
    1215             :       if (ShouldTrackPressure) {
    1216             :         dbgs() << "  Pressure Diff      : ";
    1217             :         getPressureDiff(&SU).dump(*TRI);
    1218             :       }
    1219             :       dbgs() << "  Single Issue       : ";
    1220             :       if (SchedModel.mustBeginGroup(SU.getInstr()) &&
    1221             :          SchedModel.mustEndGroup(SU.getInstr()))
    1222             :         dbgs() << "true;";
    1223             :       else
    1224             :         dbgs() << "false;";
    1225             :       dbgs() << '\n';
    1226             :     }
    1227             :     if (ExitSU.getInstr() != nullptr)
    1228             :       ExitSU.dumpAll(this);
    1229             :   );
    1230      383626 :   if (ViewMISchedDAGs) viewGraph();
    1231             : 
    1232             :   // Initialize ready queues now that the DAG and priority data are finalized.
    1233      383626 :   initQueues(TopRoots, BotRoots);
    1234             : 
    1235      383626 :   bool IsTopNode = false;
    1236             :   while (true) {
    1237             :     DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
    1238     2660220 :     SUnit *SU = SchedImpl->pickNode(IsTopNode);
    1239     2660220 :     if (!SU) break;
    1240             : 
    1241             :     assert(!SU->isScheduled && "Node already scheduled");
    1242     2276594 :     if (!checkSchedLimit())
    1243             :       break;
    1244             : 
    1245     2276594 :     scheduleMI(SU, IsTopNode);
    1246             : 
    1247     2276595 :     if (DFSResult) {
    1248             :       unsigned SubtreeID = DFSResult->getSubtreeID(SU);
    1249         176 :       if (!ScheduledTrees.test(SubtreeID)) {
    1250             :         ScheduledTrees.set(SubtreeID);
    1251          38 :         DFSResult->scheduleTree(SubtreeID);
    1252          38 :         SchedImpl->scheduleTree(SubtreeID);
    1253             :       }
    1254             :     }
    1255             : 
    1256             :     // Notify the scheduling strategy after updating the DAG.
    1257     2276595 :     SchedImpl->schedNode(SU, IsTopNode);
    1258             : 
    1259     2276595 :     updateQueues(SU, IsTopNode);
    1260     2276594 :   }
    1261             :   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
    1262             : 
    1263      383626 :   placeDebugValues();
    1264             : 
    1265             :   DEBUG({
    1266             :     dbgs() << "*** Final schedule for "
    1267             :            << printMBBReference(*begin()->getParent()) << " ***\n";
    1268             :     dumpSchedule();
    1269             :     dbgs() << '\n';
    1270             :   });
    1271      383626 : }
    1272             : 
    1273             : /// Build the DAG and setup three register pressure trackers.
    1274      386192 : void ScheduleDAGMILive::buildDAGWithRegPressure() {
    1275      386192 :   if (!ShouldTrackPressure) {
    1276      285718 :     RPTracker.reset();
    1277             :     RegionCriticalPSets.clear();
    1278      285718 :     buildSchedGraph(AA);
    1279      285718 :     return;
    1280             :   }
    1281             : 
    1282             :   // Initialize the register pressure tracker used by buildSchedGraph.
    1283      301422 :   RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
    1284      100474 :                  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
    1285             : 
    1286             :   // Account for liveness generate by the region boundary.
    1287      100474 :   if (LiveRegionEnd != RegionEnd)
    1288       94734 :     RPTracker.recede();
    1289             : 
    1290             :   // Build the DAG, and compute current register pressure.
    1291      100474 :   buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
    1292             : 
    1293             :   // Initialize top/bottom trackers after computing region pressure.
    1294      100474 :   initRegPressure();
    1295             : }
    1296             : 
    1297          10 : void ScheduleDAGMILive::computeDFSResult() {
    1298          10 :   if (!DFSResult)
    1299          16 :     DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
    1300          10 :   DFSResult->clear();
    1301             :   ScheduledTrees.clear();
    1302          10 :   DFSResult->resize(SUnits.size());
    1303          20 :   DFSResult->compute(SUnits);
    1304          20 :   ScheduledTrees.resize(DFSResult->getNumSubtrees());
    1305          10 : }
    1306             : 
    1307             : /// Compute the max cyclic critical path through the DAG. The scheduling DAG
    1308             : /// only provides the critical path for single block loops. To handle loops that
    1309             : /// span blocks, we could use the vreg path latencies provided by
    1310             : /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
    1311             : /// available for use in the scheduler.
    1312             : ///
    1313             : /// The cyclic path estimation identifies a def-use pair that crosses the back
    1314             : /// edge and considers the depth and height of the nodes. For example, consider
    1315             : /// the following instruction sequence where each instruction has unit latency
    1316             : /// and defines an epomymous virtual register:
    1317             : ///
    1318             : /// a->b(a,c)->c(b)->d(c)->exit
    1319             : ///
    1320             : /// The cyclic critical path is a two cycles: b->c->b
    1321             : /// The acyclic critical path is four cycles: a->b->c->d->exit
    1322             : /// LiveOutHeight = height(c) = len(c->d->exit) = 2
    1323             : /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
    1324             : /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
    1325             : /// LiveInDepth = depth(b) = len(a->b) = 1
    1326             : ///
    1327             : /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
    1328             : /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
    1329             : /// CyclicCriticalPath = min(2, 2) = 2
    1330             : ///
    1331             : /// This could be relevant to PostRA scheduling, but is currently implemented
    1332             : /// assuming LiveIntervals.
    1333      350918 : unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
    1334             :   // This only applies to single block loop.
    1335      350918 :   if (!BB->isSuccessor(BB))
    1336             :     return 0;
    1337             : 
    1338             :   unsigned MaxCyclicLatency = 0;
    1339             :   // Visit each live out vreg def to find def/use pairs that cross iterations.
    1340       11528 :   for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
    1341        3692 :     unsigned Reg = P.RegUnit;
    1342        3692 :     if (!TRI->isVirtualRegister(Reg))
    1343           5 :         continue;
    1344        3687 :     const LiveInterval &LI = LIS->getInterval(Reg);
    1345        7374 :     const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
    1346        3687 :     if (!DefVNI)
    1347         113 :       continue;
    1348             : 
    1349             :     MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
    1350             :     const SUnit *DefSU = getSUnit(DefMI);
    1351        1517 :     if (!DefSU)
    1352        2057 :       continue;
    1353             : 
    1354             :     unsigned LiveOutHeight = DefSU->getHeight();
    1355        1517 :     unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
    1356             :     // Visit all local users of the vreg def.
    1357             :     for (const VReg2SUnit &V2SU
    1358        1517 :          : make_range(VRegUses.find(Reg), VRegUses.end())) {
    1359        4993 :       SUnit *SU = V2SU.SU;
    1360        4993 :       if (SU == &ExitSU)
    1361         813 :         continue;
    1362             : 
    1363             :       // Only consider uses of the phi.
    1364        9986 :       LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
    1365        9986 :       if (!LRQ.valueIn()->isPHIDef())
    1366         813 :         continue;
    1367             : 
    1368             :       // Assume that a path spanning two iterations is a cycle, which could
    1369             :       // overestimate in strange cases. This allows cyclic latency to be
    1370             :       // estimated as the minimum slack of the vreg's depth or height.
    1371             :       unsigned CyclicLatency = 0;
    1372        4180 :       if (LiveOutDepth > SU->getDepth())
    1373        4178 :         CyclicLatency = LiveOutDepth - SU->getDepth();
    1374             : 
    1375        4180 :       unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
    1376        4180 :       if (LiveInHeight > LiveOutHeight) {
    1377        4179 :         if (LiveInHeight - LiveOutHeight < CyclicLatency)
    1378             :           CyclicLatency = LiveInHeight - LiveOutHeight;
    1379             :       } else
    1380             :         CyclicLatency = 0;
    1381             : 
    1382             :       DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
    1383             :             << SU->NodeNum << ") = " << CyclicLatency << "c\n");
    1384        4180 :       if (CyclicLatency > MaxCyclicLatency)
    1385             :         MaxCyclicLatency = CyclicLatency;
    1386             :     }
    1387             :   }
    1388             :   DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
    1389             :   return MaxCyclicLatency;
    1390             : }
    1391             : 
    1392             : /// Release ExitSU predecessors and setup scheduler queues. Re-position
    1393             : /// the Top RP tracker in case the region beginning has changed.
    1394      386192 : void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
    1395             :                                    ArrayRef<SUnit*> BotRoots) {
    1396      386192 :   ScheduleDAGMI::initQueues(TopRoots, BotRoots);
    1397      386192 :   if (ShouldTrackPressure) {
    1398             :     assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
    1399             :     TopRPTracker.setPos(CurrentTop);
    1400             :   }
    1401      386192 : }
    1402             : 
    1403             : /// Move an instruction and update register pressure.
    1404     2292054 : void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
    1405             :   // Move the instruction to its new location in the instruction stream.
    1406     2292054 :   MachineInstr *MI = SU->getInstr();
    1407             : 
    1408     2292054 :   if (IsTopNode) {
    1409             :     assert(SU->isTopReady() && "node still has unscheduled dependencies");
    1410       80852 :     if (&*CurrentTop == MI)
    1411      129672 :       CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
    1412             :     else {
    1413       16016 :       moveInstruction(MI, CurrentTop);
    1414             :       TopRPTracker.setPos(MI);
    1415             :     }
    1416             : 
    1417       80852 :     if (ShouldTrackPressure) {
    1418             :       // Update top scheduled pressure.
    1419       67995 :       RegisterOperands RegOpers;
    1420       67995 :       RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
    1421       67995 :       if (ShouldTrackLaneMasks) {
    1422             :         // Adjust liveness and add missing dead+read-undef flags.
    1423      110016 :         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
    1424       55008 :         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
    1425             :       } else {
    1426             :         // Adjust for missing dead-def flags.
    1427       12987 :         RegOpers.detectDeadDefs(*MI, *LIS);
    1428             :       }
    1429             : 
    1430       67995 :       TopRPTracker.advance(RegOpers);
    1431             :       assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
    1432             :       DEBUG(
    1433             :         dbgs() << "Top Pressure:\n";
    1434             :         dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
    1435             :       );
    1436             : 
    1437       67995 :       updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
    1438             :     }
    1439             :   } else {
    1440             :     assert(SU->isBottomReady() && "node still has unscheduled dependencies");
    1441             :     MachineBasicBlock::iterator priorII =
    1442     2211202 :       priorNonDebug(CurrentBottom, CurrentTop);
    1443     2211203 :     if (&*priorII == MI)
    1444     2032447 :       CurrentBottom = priorII;
    1445             :     else {
    1446      178756 :       if (&*CurrentTop == MI) {
    1447       36446 :         CurrentTop = nextIfDebug(++CurrentTop, priorII);
    1448             :         TopRPTracker.setPos(CurrentTop);
    1449             :       }
    1450      178756 :       moveInstruction(MI, CurrentBottom);
    1451      178756 :       CurrentBottom = MI;
    1452             :       BotRPTracker.setPos(CurrentBottom);
    1453             :     }
    1454     2211203 :     if (ShouldTrackPressure) {
    1455     1229155 :       RegisterOperands RegOpers;
    1456     1229155 :       RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
    1457     1229155 :       if (ShouldTrackLaneMasks) {
    1458             :         // Adjust liveness and add missing dead+read-undef flags.
    1459      455272 :         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
    1460      227636 :         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
    1461             :       } else {
    1462             :         // Adjust for missing dead-def flags.
    1463     1001519 :         RegOpers.detectDeadDefs(*MI, *LIS);
    1464             :       }
    1465             : 
    1466     1229155 :       if (BotRPTracker.getPos() != CurrentBottom)
    1467     1085896 :         BotRPTracker.recedeSkipDebugValues();
    1468             :       SmallVector<RegisterMaskPair, 8> LiveUses;
    1469     1229155 :       BotRPTracker.recede(RegOpers, &LiveUses);
    1470             :       assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
    1471             :       DEBUG(
    1472             :         dbgs() << "Bottom Pressure:\n";
    1473             :         dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
    1474             :       );
    1475             : 
    1476     1229155 :       updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
    1477     1229155 :       updatePressureDiffs(LiveUses);
    1478             :     }
    1479             :   }
    1480     2292055 : }
    1481             : 
    1482             : //===----------------------------------------------------------------------===//
    1483             : // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
    1484             : //===----------------------------------------------------------------------===//
    1485             : 
    1486             : namespace {
    1487             : 
    1488             : /// \brief Post-process the DAG to create cluster edges between neighboring
    1489             : /// loads or between neighboring stores.
    1490             : class BaseMemOpClusterMutation : public ScheduleDAGMutation {
    1491             :   struct MemOpInfo {
    1492             :     SUnit *SU;
    1493             :     unsigned BaseReg;
    1494             :     int64_t Offset;
    1495             : 
    1496             :     MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
    1497       59336 :         : SU(su), BaseReg(reg), Offset(ofs) {}
    1498             : 
    1499             :     bool operator<(const MemOpInfo&RHS) const {
    1500      129459 :       return std::tie(BaseReg, Offset, SU->NodeNum) <
    1501      133417 :              std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum);
    1502             :     }
    1503             :   };
    1504             : 
    1505             :   const TargetInstrInfo *TII;
    1506             :   const TargetRegisterInfo *TRI;
    1507             :   bool IsLoad;
    1508             : 
    1509             : public:
    1510             :   BaseMemOpClusterMutation(const TargetInstrInfo *tii,
    1511             :                            const TargetRegisterInfo *tri, bool IsLoad)
    1512       57488 :       : TII(tii), TRI(tri), IsLoad(IsLoad) {}
    1513             : 
    1514             :   void apply(ScheduleDAGInstrs *DAGInstrs) override;
    1515             : 
    1516             : protected:
    1517             :   void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
    1518             : };
    1519             : 
    1520       28744 : class StoreClusterMutation : public BaseMemOpClusterMutation {
    1521             : public:
    1522             :   StoreClusterMutation(const TargetInstrInfo *tii,
    1523             :                        const TargetRegisterInfo *tri)
    1524       28744 :       : BaseMemOpClusterMutation(tii, tri, false) {}
    1525             : };
    1526             : 
    1527       28744 : class LoadClusterMutation : public BaseMemOpClusterMutation {
    1528             : public:
    1529             :   LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
    1530       28744 :       : BaseMemOpClusterMutation(tii, tri, true) {}
    1531             : };
    1532             : 
    1533             : } // end anonymous namespace
    1534             : 
    1535             : namespace llvm {
    1536             : 
    1537             : std::unique_ptr<ScheduleDAGMutation>
    1538       28744 : createLoadClusterDAGMutation(const TargetInstrInfo *TII,
    1539             :                              const TargetRegisterInfo *TRI) {
    1540       28744 :   return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
    1541       28744 :                             : nullptr;
    1542             : }
    1543             : 
    1544             : std::unique_ptr<ScheduleDAGMutation>
    1545       28744 : createStoreClusterDAGMutation(const TargetInstrInfo *TII,
    1546             :                               const TargetRegisterInfo *TRI) {
    1547       28744 :   return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
    1548       28744 :                             : nullptr;
    1549             : }
    1550             : 
    1551             : } // end namespace llvm
    1552             : 
    1553       52740 : void BaseMemOpClusterMutation::clusterNeighboringMemOps(
    1554             :     ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
    1555             :   SmallVector<MemOpInfo, 32> MemOpRecords;
    1556      251630 :   for (SUnit *SU : MemOps) {
    1557             :     unsigned BaseReg;
    1558             :     int64_t Offset;
    1559       99445 :     if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
    1560      118672 :       MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
    1561             :   }
    1562       52740 :   if (MemOpRecords.size() < 2)
    1563             :     return;
    1564             : 
    1565             :   std::sort(MemOpRecords.begin(), MemOpRecords.end());
    1566             :   unsigned ClusterLength = 1;
    1567       13230 :   for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
    1568       57872 :     SUnit *SUa = MemOpRecords[Idx].SU;
    1569       57872 :     SUnit *SUb = MemOpRecords[Idx+1].SU;
    1570       86808 :     if (TII->shouldClusterMemOps(*SUa->getInstr(), MemOpRecords[Idx].BaseReg,
    1571       28936 :                                  *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg,
    1572       75562 :                                  ClusterLength) &&
    1573       46626 :         DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
    1574             :       DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
    1575             :             << SUb->NodeNum << ")\n");
    1576             :       // Copy successor edges from SUa to SUb. Interleaving computation
    1577             :       // dependent on SUa can prevent load combining due to register reuse.
    1578             :       // Predecessor edges do not need to be copied from SUb to SUa since nearby
    1579             :       // loads should have effectively the same inputs.
    1580      150441 :       for (const SDep &Succ : SUa->Succs) {
    1581       66376 :         if (Succ.getSUnit() == SUb)
    1582             :           continue;
    1583             :         DEBUG(dbgs() << "  Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n");
    1584       48682 :         DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
    1585             :       }
    1586       17689 :       ++ClusterLength;
    1587             :     } else
    1588             :       ClusterLength = 1;
    1589             :   }
    1590             : }
    1591             : 
    1592             : /// \brief Callback from DAG postProcessing to create cluster edges for loads.
    1593       62606 : void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
    1594             :   ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
    1595             : 
    1596             :   // Map DAG NodeNum to store chain ID.
    1597             :   DenseMap<unsigned, unsigned> StoreChainIDs;
    1598             :   // Map each store chain to a set of dependent MemOps.
    1599       62606 :   SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
    1600      765670 :   for (SUnit &SU : DAG->SUnits) {
    1601     1718952 :     if ((IsLoad && !SU.getInstr()->mayLoad()) ||
    1602      763801 :         (!IsLoad && !SU.getInstr()->mayStore()))
    1603      603619 :       continue;
    1604             : 
    1605      198890 :     unsigned ChainPredID = DAG->SUnits.size();
    1606      417037 :     for (const SDep &Pred : SU.Preds) {
    1607      192854 :       if (Pred.isCtrl()) {
    1608       34058 :         ChainPredID = Pred.getSUnit()->NodeNum;
    1609       34058 :         break;
    1610             :       }
    1611             :     }
    1612             :     // Check if this chain-like pred has been seen
    1613             :     // before. ChainPredID==MaxNodeID at the top of the schedule.
    1614       99445 :     unsigned NumChains = StoreChainDependents.size();
    1615             :     std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
    1616      198890 :       StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
    1617       99445 :     if (Result.second)
    1618       52740 :       StoreChainDependents.resize(NumChains + 1);
    1619      198890 :     StoreChainDependents[Result.first->second].push_back(&SU);
    1620             :   }
    1621             : 
    1622             :   // Iterate over the store chains.
    1623      168086 :   for (auto &SCD : StoreChainDependents)
    1624       52740 :     clusterNeighboringMemOps(SCD, DAG);
    1625       62606 : }
    1626             : 
    1627             : //===----------------------------------------------------------------------===//
    1628             : // CopyConstrain - DAG post-processing to encourage copy elimination.
    1629             : //===----------------------------------------------------------------------===//
    1630             : 
    1631             : namespace {
    1632             : 
    1633             : /// \brief Post-process the DAG to create weak edges from all uses of a copy to
    1634             : /// the one use that defines the copy's source vreg, most likely an induction
    1635             : /// variable increment.
    1636      114100 : class CopyConstrain : public ScheduleDAGMutation {
    1637             :   // Transient state.
    1638             :   SlotIndex RegionBeginIdx;
    1639             : 
    1640             :   // RegionEndIdx is the slot index of the last non-debug instruction in the
    1641             :   // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
    1642             :   SlotIndex RegionEndIdx;
    1643             : 
    1644             : public:
    1645      114101 :   CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
    1646             : 
    1647             :   void apply(ScheduleDAGInstrs *DAGInstrs) override;
    1648             : 
    1649             : protected:
    1650             :   void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
    1651             : };
    1652             : 
    1653             : } // end anonymous namespace
    1654             : 
    1655             : namespace llvm {
    1656             : 
    1657             : std::unique_ptr<ScheduleDAGMutation>
    1658      114101 : createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
    1659             :                                const TargetRegisterInfo *TRI) {
    1660      114101 :   return llvm::make_unique<CopyConstrain>(TII, TRI);
    1661             : }
    1662             : 
    1663             : } // end namespace llvm
    1664             : 
    1665             : /// constrainLocalCopy handles two possibilities:
    1666             : /// 1) Local src:
    1667             : /// I0:     = dst
    1668             : /// I1: src = ...
    1669             : /// I2:     = dst
    1670             : /// I3: dst = src (copy)
    1671             : /// (create pred->succ edges I0->I1, I2->I1)
    1672             : ///
    1673             : /// 2) Local copy:
    1674             : /// I0: dst = src (copy)
    1675             : /// I1:     = dst
    1676             : /// I2: src = ...
    1677             : /// I3:     = dst
    1678             : /// (create pred->succ edges I1->I2, I3->I2)
    1679             : ///
    1680             : /// Although the MachineScheduler is currently constrained to single blocks,
    1681             : /// this algorithm should handle extended blocks. An EBB is a set of
    1682             : /// contiguously numbered blocks such that the previous block in the EBB is
    1683             : /// always the single predecessor.
    1684      583382 : void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
    1685      583382 :   LiveIntervals *LIS = DAG->getLIS();
    1686      583382 :   MachineInstr *Copy = CopySU->getInstr();
    1687             : 
    1688             :   // Check for pure vreg copies.
    1689      583382 :   const MachineOperand &SrcOp = Copy->getOperand(1);
    1690      583382 :   unsigned SrcReg = SrcOp.getReg();
    1691      583382 :   if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
    1692      574077 :     return;
    1693             : 
    1694             :   const MachineOperand &DstOp = Copy->getOperand(0);
    1695      385918 :   unsigned DstReg = DstOp.getReg();
    1696      415898 :   if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
    1697             :     return;
    1698             : 
    1699             :   // Check if either the dest or source is local. If it's live across a back
    1700             :   // edge, it's not local. Note that if both vregs are live across the back
    1701             :   // edge, we cannot successfully contrain the copy without cyclic scheduling.
    1702             :   // If both the copy's source and dest are local live intervals, then we
    1703             :   // should treat the dest as the global for the purpose of adding
    1704             :   // constraints. This adds edges from source's other uses to the copy.
    1705             :   unsigned LocalReg = SrcReg;
    1706             :   unsigned GlobalReg = DstReg;
    1707       29954 :   LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
    1708       29954 :   if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
    1709             :     LocalReg = DstReg;
    1710             :     GlobalReg = SrcReg;
    1711       10588 :     LocalLI = &LIS->getInterval(LocalReg);
    1712       10588 :     if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
    1713             :       return;
    1714             :   }
    1715       25579 :   LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
    1716             : 
    1717             :   // Find the global segment after the start of the local LI.
    1718       51158 :   LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
    1719             :   // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
    1720             :   // local live range. We could create edges from other global uses to the local
    1721             :   // start, but the coalescer should have already eliminated these cases, so
    1722             :   // don't bother dealing with it.
    1723       25579 :   if (GlobalSegment == GlobalLI->end())
    1724             :     return;
    1725             : 
    1726             :   // If GlobalSegment is killed at the LocalLI->start, the call to find()
    1727             :   // returned the next global segment. But if GlobalSegment overlaps with
    1728             :   // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
    1729             :   // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
    1730       25565 :   if (GlobalSegment->contains(LocalLI->beginIndex()))
    1731        6300 :     ++GlobalSegment;
    1732             : 
    1733       25565 :   if (GlobalSegment == GlobalLI->end())
    1734             :     return;
    1735             : 
    1736             :   // Check if GlobalLI contains a hole in the vicinity of LocalLI.
    1737       21892 :   if (GlobalSegment != GlobalLI->begin()) {
    1738             :     // Two address defs have no hole.
    1739        2682 :     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
    1740             :                                GlobalSegment->start)) {
    1741             :       return;
    1742             :     }
    1743             :     // If the prior global segment may be defined by the same two-address
    1744             :     // instruction that also defines LocalLI, then can't make a hole here.
    1745        1564 :     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
    1746             :                                LocalLI->beginIndex())) {
    1747             :       return;
    1748             :     }
    1749             :     // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
    1750             :     // it would be a disconnected component in the live range.
    1751             :     assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
    1752             :            "Disconnected LRG within the scheduling region.");
    1753             :   }
    1754             :   MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
    1755       20774 :   if (!GlobalDef)
    1756             :     return;
    1757             : 
    1758             :   SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
    1759       19653 :   if (!GlobalSU)
    1760             :     return;
    1761             : 
    1762             :   // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
    1763             :   // constraining the uses of the last local def to precede GlobalDef.
    1764             :   SmallVector<SUnit*,8> LocalUses;
    1765       39306 :   const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
    1766             :   MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
    1767             :   SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
    1768       57107 :   for (const SDep &Succ : LastLocalSU->Succs) {
    1769       30772 :     if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
    1770             :       continue;
    1771       36804 :     if (Succ.getSUnit() == GlobalSU)
    1772             :       continue;
    1773       17728 :     if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
    1774             :       return;
    1775        7436 :     LocalUses.push_back(Succ.getSUnit());
    1776             :   }
    1777             :   // Open the top of the GlobalLI hole by constraining any earlier global uses
    1778             :   // to precede the start of LocalLI.
    1779             :   SmallVector<SUnit*,8> GlobalUses;
    1780             :   MachineInstr *FirstLocalDef =
    1781             :     LIS->getInstructionFromIndex(LocalLI->beginIndex());
    1782             :   SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
    1783       29489 :   for (const SDep &Pred : GlobalSU->Preds) {
    1784       19472 :     if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
    1785             :       continue;
    1786        1091 :     if (Pred.getSUnit() == FirstLocalSU)
    1787             :       continue;
    1788         445 :     if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
    1789             :       return;
    1790         389 :     GlobalUses.push_back(Pred.getSUnit());
    1791             :   }
    1792             :   DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
    1793             :   // Add the weak edges.
    1794        1771 :   for (SmallVectorImpl<SUnit*>::const_iterator
    1795       11076 :          I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
    1796             :     DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU("
    1797             :           << GlobalSU->NodeNum << ")\n");
    1798        3542 :     DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
    1799             :   }
    1800         384 :   for (SmallVectorImpl<SUnit*>::const_iterator
    1801        9689 :          I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
    1802             :     DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU("
    1803             :           << FirstLocalSU->NodeNum << ")\n");
    1804         768 :     DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
    1805             :   }
    1806             : }
    1807             : 
    1808             : /// \brief Callback from DAG postProcessing to create weak edges to encourage
    1809             : /// copy elimination.
    1810      363616 : void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
    1811             :   ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
    1812             :   assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
    1813             : 
    1814      363616 :   MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
    1815      363616 :   if (FirstPos == DAG->end())
    1816             :     return;
    1817      726772 :   RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
    1818      726772 :   RegionEndIdx = DAG->getLIS()->getInstructionIndex(
    1819     1090158 :       *priorNonDebug(DAG->end(), DAG->begin()));
    1820             : 
    1821     2304441 :   for (SUnit &SU : DAG->SUnits) {
    1822     3882110 :     if (!SU.getInstr()->isCopy())
    1823     1357673 :       continue;
    1824             : 
    1825      583382 :     constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
    1826             :   }
    1827             : }
    1828             : 
    1829             : //===----------------------------------------------------------------------===//
    1830             : // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
    1831             : // and possibly other custom schedulers.
    1832             : //===----------------------------------------------------------------------===//
    1833             : 
    1834             : static const unsigned InvalidCycle = ~0U;
    1835             : 
    1836      533327 : SchedBoundary::~SchedBoundary() { delete HazardRec; }
    1837             : 
    1838             : /// Given a Count of resource usage and a Latency value, return true if a
    1839             : /// SchedBoundary becomes resource limited.
    1840             : static bool checkResourceLimit(unsigned LFactor, unsigned Count,
    1841             :                                unsigned Latency) {
    1842     3293558 :   return (int)(Count - (Latency * LFactor)) > (int)LFactor;
    1843             : }
    1844             : 
    1845     1036136 : void SchedBoundary::reset() {
    1846             :   // A new HazardRec is created for each DAG and owned by SchedBoundary.
    1847             :   // Destroying and reconstructing it is very expensive though. So keep
    1848             :   // invalid, placeholder HazardRecs.
    1849     1036136 :   if (HazardRec && HazardRec->isEnabled()) {
    1850        3566 :     delete HazardRec;
    1851        3566 :     HazardRec = nullptr;
    1852             :   }
    1853             :   Available.clear();
    1854             :   Pending.clear();
    1855     1036136 :   CheckPending = false;
    1856     1036136 :   CurrCycle = 0;
    1857     1036136 :   CurrMOps = 0;
    1858     1036136 :   MinReadyCycle = std::numeric_limits<unsigned>::max();
    1859     1036136 :   ExpectedLatency = 0;
    1860     1036136 :   DependentLatency = 0;
    1861     1036136 :   RetiredMOps = 0;
    1862     1036136 :   MaxExecutedResCount = 0;
    1863     1036136 :   ZoneCritResIdx = 0;
    1864     1036136 :   IsResourceLimited = false;
    1865             :   ReservedCycles.clear();
    1866             : #ifndef NDEBUG
    1867             :   // Track the maximum number of stall cycles that could arise either from the
    1868             :   // latency of a DAG edge or the number of cycles that a processor resource is
    1869             :   // reserved (SchedBoundary::ReservedCycles).
    1870             :   MaxObservedStall = 0;
    1871             : #endif
    1872             :   // Reserve a zero-count for invalid CritResIdx.
    1873     1036136 :   ExecutedResCounts.resize(1);
    1874             :   assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
    1875     1036136 : }
    1876             : 
    1877      388097 : void SchedRemainder::
    1878             : init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
    1879             :   reset();
    1880      388097 :   if (!SchedModel->hasInstrSchedModel())
    1881             :     return;
    1882      310262 :   RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
    1883     1127225 :   for (SUnit &SU : DAG->SUnits) {
    1884      972094 :     const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
    1885     1944188 :     RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
    1886      972094 :       * SchedModel->getMicroOpFactor();
    1887     1698885 :     for (TargetSchedModel::ProcResIter
    1888      972094 :            PI = SchedModel->getWriteProcResBegin(SC),
    1889     2670979 :            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
    1890     1698885 :       unsigned PIdx = PI->ProcResourceIdx;
    1891             :       unsigned Factor = SchedModel->getResourceFactor(PIdx);
    1892     1698885 :       RemainingCounts[PIdx] += (Factor * PI->Cycles);
    1893             :     }
    1894             :   }
    1895             : }
    1896             : 
    1897      769472 : void SchedBoundary::
    1898             : init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
    1899      769472 :   reset();
    1900      769472 :   DAG = dag;
    1901      769472 :   SchedModel = smodel;
    1902      769472 :   Rem = rem;
    1903      769472 :   if (SchedModel->hasInstrSchedModel()) {
    1904      619236 :     ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
    1905      619236 :     ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
    1906             :   }
    1907      769472 : }
    1908             : 
    1909             : /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
    1910             : /// these "soft stalls" differently than the hard stall cycles based on CPU
    1911             : /// resources and computed by checkHazard(). A fully in-order model
    1912             : /// (MicroOpBufferSize==0) will not make use of this since instructions are not
    1913             : /// available for scheduling until they are ready. However, a weaker in-order
    1914             : /// model may use this for heuristics. For example, if a processor has in-order
    1915             : /// behavior when reading certain resources, this may come into play.
    1916    20593728 : unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
    1917    20593728 :   if (!SU->isUnbuffered)
    1918             :     return 0;
    1919             : 
    1920     2173758 :   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
    1921     2173758 :   if (ReadyCycle > CurrCycle)
    1922       53216 :     return ReadyCycle - CurrCycle;
    1923             :   return 0;
    1924             : }
    1925             : 
    1926             : /// Compute the next cycle at which the given processor resource can be
    1927             : /// scheduled.
    1928     1701605 : unsigned SchedBoundary::
    1929             : getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
    1930     3403210 :   unsigned NextUnreserved = ReservedCycles[PIdx];
    1931             :   // If this resource has never been used, always return cycle zero.
    1932     1701605 :   if (NextUnreserved == InvalidCycle)
    1933             :     return 0;
    1934             :   // For bottom-up scheduling add the cycles needed for the current operation.
    1935        2062 :   if (!isTop())
    1936         927 :     NextUnreserved += Cycles;
    1937             :   return NextUnreserved;
    1938             : }
    1939             : 
    1940             : /// Does this SU have a hazard within the current instruction group.
    1941             : ///
    1942             : /// The scheduler supports two modes of hazard recognition. The first is the
    1943             : /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
    1944             : /// supports highly complicated in-order reservation tables
    1945             : /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
    1946             : ///
    1947             : /// The second is a streamlined mechanism that checks for hazards based on
    1948             : /// simple counters that the scheduler itself maintains. It explicitly checks
    1949             : /// for instruction dispatch limitations, including the number of micro-ops that
    1950             : /// can dispatch per cycle.
    1951             : ///
    1952             : /// TODO: Also check whether the SU must start a new group.
    1953     9692698 : bool SchedBoundary::checkHazard(SUnit *SU) {
    1954     9692698 :   if (HazardRec->isEnabled()
    1955     9692698 :       && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
    1956             :     return true;
    1957             :   }
    1958             : 
    1959     9688042 :   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
    1960     9688041 :   if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
    1961             :     DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
    1962             :           << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
    1963             :     return true;
    1964             :   }
    1965             : 
    1966    16338320 :   if (CurrMOps > 0 &&
    1967     6811093 :       ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
    1968     6774997 :        (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
    1969             :     DEBUG(dbgs() << "  hazard: SU(" << SU->NodeNum << ") must "
    1970             :                  << (isTop()? "begin" : "end") << " group\n");
    1971             :     return true;
    1972             :   }
    1973             : 
    1974     9544995 :   if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
    1975        1788 :     const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
    1976        1824 :     for (const MCWriteProcResEntry &PE :
    1977             :           make_range(SchedModel->getWriteProcResBegin(SC),
    1978        5400 :                      SchedModel->getWriteProcResEnd(SC))) {
    1979        2418 :       unsigned ResIdx = PE.ProcResourceIdx;
    1980        2418 :       unsigned Cycles = PE.Cycles;
    1981        2418 :       unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles);
    1982        2418 :       if (NRCycle > CurrCycle) {
    1983             : #ifndef NDEBUG
    1984             :         MaxObservedStall = std::max(Cycles, MaxObservedStall);
    1985             : #endif
    1986             :         DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "
    1987             :               << SchedModel->getResourceName(ResIdx)
    1988             :               << "=" << NRCycle << "c\n");
    1989             :         return true;
    1990             :       }
    1991             :     }
    1992             :   }
    1993             :   return false;
    1994             : }
    1995             : 
    1996             : // Find the unscheduled node in ReadySUs with the highest latency.
    1997     1070682 : unsigned SchedBoundary::
    1998             : findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
    1999             :   SUnit *LateSU = nullptr;
    2000             :   unsigned RemLatency = 0;
    2001    11388650 :   for (SUnit *SU : ReadySUs) {
    2002     5158984 :     unsigned L = getUnscheduledLatency(SU);
    2003     5158984 :     if (L > RemLatency) {
    2004             :       RemLatency = L;
    2005             :       LateSU = SU;
    2006             :     }
    2007             :   }
    2008             :   if (LateSU) {
    2009             :     DEBUG(dbgs() << Available.getName() << " RemLatency SU("
    2010             :           << LateSU->NodeNum << ") " << RemLatency << "c\n");
    2011             :   }
    2012     1070682 :   return RemLatency;
    2013             : }
    2014             : 
    2015             : // Count resources in this zone and the remaining unscheduled
    2016             : // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
    2017             : // resource index, or zero if the zone is issue limited.
    2018      526190 : unsigned SchedBoundary::
    2019             : getOtherResourceCount(unsigned &OtherCritIdx) {
    2020      526190 :   OtherCritIdx = 0;
    2021      526190 :   if (!SchedModel->hasInstrSchedModel())
    2022             :     return 0;
    2023             : 
    2024      353822 :   unsigned OtherCritCount = Rem->RemIssueCount
    2025      353822 :     + (RetiredMOps * SchedModel->getMicroOpFactor());
    2026             :   DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
    2027             :         << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
    2028     2273020 :   for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
    2029     2626842 :        PIdx != PEnd; ++PIdx) {
    2030     4546040 :     unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
    2031     2273020 :     if (OtherCount > OtherCritCount) {
    2032             :       OtherCritCount = OtherCount;
    2033       14375 :       OtherCritIdx = PIdx;
    2034             :     }
    2035             :   }
    2036             :   if (OtherCritIdx) {
    2037             :     DEBUG(dbgs() << "  " << Available.getName() << " + Remain CritRes: "
    2038             :           << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
    2039             :           << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
    2040             :   }
    2041      353822 :   return OtherCritCount;
    2042             : }
    2043             : 
    2044     3289573 : void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
    2045             :   assert(SU->getInstr() && "Scheduled SUnit must have instr");
    2046             : 
    2047             : #ifndef NDEBUG
    2048             :   // ReadyCycle was been bumped up to the CurrCycle when this node was
    2049             :   // scheduled, but CurrCycle may have been eagerly advanced immediately after
    2050             :   // scheduling, so may now be greater than ReadyCycle.
    2051             :   if (ReadyCycle > CurrCycle)
    2052             :     MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
    2053             : #endif
    2054             : 
    2055     3289573 :   if (ReadyCycle < MinReadyCycle)
    2056      788145 :     MinReadyCycle = ReadyCycle;
    2057             : 
    2058             :   // Check for interlocks first. For the purpose of other heuristics, an
    2059             :   // instruction that cannot issue appears as if it's not in the ReadyQueue.
    2060     3289573 :   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
    2061     6475393 :   if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
    2062             :       Available.size() >= ReadyListLimit)
    2063             :     Pending.push(SU);
    2064             :   else
    2065             :     Available.push(SU);
    2066     3289572 : }
    2067             : 
    2068             : /// Move the boundary of scheduled code by one cycle.
    2069      707721 : void SchedBoundary::bumpCycle(unsigned NextCycle) {
    2070      707721 :   if (SchedModel->getMicroOpBufferSize() == 0) {
    2071             :     assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
    2072             :            "MinReadyCycle uninitialized");
    2073      196348 :     if (MinReadyCycle > NextCycle)
    2074             :       NextCycle = MinReadyCycle;
    2075             :   }
    2076             :   // Update the current micro-ops, which will issue in the next cycle.
    2077      707721 :   unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
    2078      707721 :   CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
    2079             : 
    2080             :   // Decrement DependentLatency based on the next cycle.
    2081      707721 :   if ((NextCycle - CurrCycle) > DependentLatency)
    2082       93664 :     DependentLatency = 0;
    2083             :   else
    2084      614057 :     DependentLatency -= (NextCycle - CurrCycle);
    2085             : 
    2086      707721 :   if (!HazardRec->isEnabled()) {
    2087             :     // Bypass HazardRec virtual calls.
    2088      678717 :     CurrCycle = NextCycle;
    2089             :   } else {
    2090             :     // Bypass getHazardType calls in case of long latency.
    2091      117728 :     for (; CurrCycle != NextCycle; ++CurrCycle) {
    2092       44362 :       if (isTop())
    2093        1202 :         HazardRec->AdvanceCycle();
    2094             :       else
    2095       43160 :         HazardRec->RecedeCycle();
    2096             :     }
    2097             :   }
    2098      707721 :   CheckPending = true;
    2099      707721 :   IsResourceLimited =
    2100      707721 :       checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
    2101             :                          getScheduledLatency());
    2102             : 
    2103             :   DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
    2104      707721 : }
    2105             : 
    2106     1698885 : void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
    2107     3397770 :   ExecutedResCounts[PIdx] += Count;
    2108     1698885 :   if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
    2109      561595 :     MaxExecutedResCount = ExecutedResCounts[PIdx];
    2110     1698885 : }
    2111             : 
    2112             : /// Add the given processor resource to this scheduled zone.
    2113             : ///
    2114             : /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
    2115             : /// during which this resource is consumed.
    2116             : ///
    2117             : /// \return the next cycle at which the instruction may execute without
    2118             : /// oversubscribing resources.
    2119     1698885 : unsigned SchedBoundary::
    2120             : countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
    2121     1698885 :   unsigned Factor = SchedModel->getResourceFactor(PIdx);
    2122     1698885 :   unsigned Count = Factor * Cycles;
    2123             :   DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx)
    2124             :         << " +" << Cycles << "x" << Factor << "u\n");
    2125             : 
    2126             :   // Update Executed resources counts.
    2127     1698885 :   incExecutedResources(PIdx, Count);
    2128             :   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
    2129     3397770 :   Rem->RemainingCounts[PIdx] -= Count;
    2130             : 
    2131             :   // Check if this resource exceeds the current critical resource. If so, it
    2132             :   // becomes the critical resource.
    2133     3234600 :   if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
    2134      207512 :     ZoneCritResIdx = PIdx;
    2135             :     DEBUG(dbgs() << "  *** Critical resource "
    2136             :           << SchedModel->getResourceName(PIdx) << ": "
    2137             :           << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
    2138             :   }
    2139             :   // For reserved resources, record the highest cycle using the resource.
    2140     1698885 :   unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
    2141             :   if (NextAvailable > CurrCycle) {
    2142             :     DEBUG(dbgs() << "  Resource conflict: "
    2143             :           << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
    2144             :           << NextAvailable << "\n");
    2145             :   }
    2146     1698885 :   return NextAvailable;
    2147             : }
    2148             : 
    2149             : /// Move the boundary of scheduled code by one SUnit.
    2150     2252100 : void SchedBoundary::bumpNode(SUnit *SU) {
    2151             :   // Update the reservation table.
    2152     2252100 :   if (HazardRec->isEnabled()) {
    2153       39369 :     if (!isTop() && SU->isCall) {
    2154             :       // Calls are scheduled with their preceding instructions. For bottom-up
    2155             :       // scheduling, clear the pipeline state before emitting.
    2156           0 :       HazardRec->Reset();
    2157             :     }
    2158       39369 :     HazardRec->EmitInstruction(SU);
    2159             :   }
    2160             :   // checkHazard should prevent scheduling multiple instructions per cycle that
    2161             :   // exceed the issue width.
    2162     2252100 :   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
    2163     2252100 :   unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
    2164             :   assert(
    2165             :       (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
    2166             :       "Cannot schedule this instruction's MicroOps in the current cycle.");
    2167             : 
    2168     2252100 :   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
    2169             :   DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
    2170             : 
    2171     2252100 :   unsigned NextCycle = CurrCycle;
    2172     2252100 :   switch (SchedModel->getMicroOpBufferSize()) {
    2173             :   case 0:
    2174             :     assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
    2175             :     break;
    2176      215105 :   case 1:
    2177      215105 :     if (ReadyCycle > NextCycle) {
    2178             :       NextCycle = ReadyCycle;
    2179             :       DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
    2180             :     }
    2181             :     break;
    2182     1803030 :   default:
    2183             :     // We don't currently model the OOO reorder buffer, so consider all
    2184             :     // scheduled MOps to be "retired". We do loosely model in-order resource
    2185             :     // latency. If this instruction uses an in-order resource, account for any
    2186             :     // likely stall cycles.
    2187     1803030 :     if (SU->isUnbuffered && ReadyCycle > NextCycle)
    2188             :       NextCycle = ReadyCycle;
    2189             :     break;
    2190             :   }
    2191     2252100 :   RetiredMOps += IncMOps;
    2192             : 
    2193             :   // Update resource counts and critical resource.
    2194     2252100 :   if (SchedModel->hasInstrSchedModel()) {
    2195      972094 :     unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
    2196             :     assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
    2197      972094 :     Rem->RemIssueCount -= DecRemIssue;
    2198      972094 :     if (ZoneCritResIdx) {
    2199             :       // Scale scheduled micro-ops for comparing with the critical resource.
    2200             :       unsigned ScaledMOps =
    2201      398525 :         RetiredMOps * SchedModel->getMicroOpFactor();
    2202             : 
    2203             :       // If scaled micro-ops are now more than the previous critical resource by
    2204             :       // a full cycle, then micro-ops issue becomes critical.
    2205      797050 :       if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
    2206      398525 :           >= (int)SchedModel->getLatencyFactor()) {
    2207       10730 :         ZoneCritResIdx = 0;
    2208             :         DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
    2209             :               << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
    2210             :       }
    2211             :     }
    2212     1698885 :     for (TargetSchedModel::ProcResIter
    2213      972094 :            PI = SchedModel->getWriteProcResBegin(SC),
    2214     2670979 :            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
    2215             :       unsigned RCycle =
    2216     1698885 :         countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
    2217     1698885 :       if (RCycle > NextCycle)
    2218             :         NextCycle = RCycle;
    2219             :     }
    2220      972094 :     if (SU->hasReservedResource) {
    2221             :       // For reserved resources, record the highest cycle using the resource.
    2222             :       // For top-down scheduling, this is the cycle in which we schedule this
    2223             :       // instruction plus the number of cycles the operations reserves the
    2224             :       // resource. For bottom-up is it simply the instruction's cycle.
    2225         736 :       for (TargetSchedModel::ProcResIter
    2226         588 :              PI = SchedModel->getWriteProcResBegin(SC),
    2227        1324 :              PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
    2228         736 :         unsigned PIdx = PI->ProcResourceIdx;
    2229        1472 :         if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
    2230         588 :           if (isTop()) {
    2231         302 :             ReservedCycles[PIdx] =
    2232         906 :               std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
    2233             :           }
    2234             :           else
    2235         286 :             ReservedCycles[PIdx] = NextCycle;
    2236             :         }
    2237             :       }
    2238             :     }
    2239             :   }
    2240             :   // Update ExpectedLatency and DependentLatency.
    2241     2252100 :   unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
    2242     2252100 :   unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
    2243     2252100 :   if (SU->getDepth() > TopLatency) {
    2244      449595 :     TopLatency = SU->getDepth();
    2245             :     DEBUG(dbgs() << "  " << Available.getName()
    2246             :           << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
    2247             :   }
    2248     2252100 :   if (SU->getHeight() > BotLatency) {
    2249      794888 :     BotLatency = SU->getHeight();
    2250             :     DEBUG(dbgs() << "  " << Available.getName()
    2251             :           << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
    2252             :   }
    2253             :   // If we stall for any reason, bump the cycle.
    2254     2252100 :   if (NextCycle > CurrCycle)
    2255       20713 :     bumpCycle(NextCycle);
    2256             :   else
    2257             :     // After updating ZoneCritResIdx and ExpectedLatency, check if we're
    2258             :     // resource limited. If a stall occurred, bumpCycle does this.
    2259     2231387 :     IsResourceLimited =
    2260     2231387 :         checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
    2261             :                            getScheduledLatency());
    2262             : 
    2263             :   // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
    2264             :   // resets CurrMOps. Loop to handle instructions with more MOps than issue in
    2265             :   // one cycle.  Since we commonly reach the max MOps here, opportunistically
    2266             :   // bump the cycle to avoid uselessly checking everything in the readyQ.
    2267     2252100 :   CurrMOps += IncMOps;
    2268             : 
    2269             :   // Bump the cycle count for issue group constraints.
    2270             :   // This must be done after NextCycle has been adjust for all other stalls.
    2271             :   // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
    2272             :   // currCycle to X.
    2273     4504200 :   if ((isTop() &&  SchedModel->mustEndGroup(SU->getInstr())) ||
    2274     2146806 :       (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
    2275             :     DEBUG(dbgs() << "  Bump cycle to "
    2276             :                  << (isTop() ? "end" : "begin") << " group\n");
    2277         325 :     bumpCycle(++NextCycle);
    2278             :   }
    2279             : 
    2280     3443344 :   while (CurrMOps >= SchedModel->getIssueWidth()) {
    2281             :     DEBUG(dbgs() << "  *** Max MOps " << CurrMOps
    2282             :           << " at cycle " << CurrCycle << '\n');
    2283      595622 :     bumpCycle(++NextCycle);
    2284             :   }
    2285             :   DEBUG(dumpScheduledState());
    2286     2252100 : }
    2287             : 
    2288             : /// Release pending ready nodes in to the available queue. This makes them
    2289             : /// visible to heuristics.
    2290      610357 : void SchedBoundary::releasePending() {
    2291             :   // If the available queue is empty, it is safe to reset MinReadyCycle.
    2292      610357 :   if (Available.empty())
    2293      108073 :     MinReadyCycle = std::numeric_limits<unsigned>::max();
    2294             : 
    2295             :   // Check to see if any of the pending instructions are ready to issue.  If
    2296             :   // so, add them to the available queue.
    2297      610357 :   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
    2298     1212265 :   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
    2299      304711 :     SUnit *SU = *(Pending.begin()+i);
    2300      304711 :     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
    2301             : 
    2302      304711 :     if (ReadyCycle < MinReadyCycle)
    2303      115663 :       MinReadyCycle = ReadyCycle;
    2304             : 
    2305      304711 :     if (!IsBuffered && ReadyCycle > CurrCycle)
    2306       61945 :       continue;
    2307             : 
    2308      242766 :     if (checkHazard(SU))
    2309         171 :       continue;
    2310             : 
    2311      242595 :     if (Available.size() >= ReadyListLimit)
    2312             :       break;
    2313             : 
    2314             :     Available.push(SU);
    2315             :     Pending.remove(Pending.begin()+i);
    2316      238838 :     --i; --e;
    2317             :   }
    2318      610357 :   CheckPending = false;
    2319      610357 : }
    2320             : 
    2321             : /// Remove SU from the ready set for this boundary.
    2322     3289562 : void SchedBoundary::removeReady(SUnit *SU) {
    2323     6579124 :   if (Available.isInQueue(SU))
    2324             :     Available.remove(Available.find(SU));
    2325             :   else {
    2326             :     assert(Pending.isInQueue(SU) && "bad ready count");
    2327             :     Pending.remove(Pending.find(SU));
    2328             :   }
    2329     3289562 : }
    2330             : 
    2331             : /// If this queue only has one ready candidate, return it. As a side effect,
    2332             : /// defer any nodes that now hit a hazard, and advance the cycle until at least
    2333             : /// one node is ready. If multiple instructions are ready, return NULL.
    2334     2523210 : SUnit *SchedBoundary::pickOnlyChoice() {
    2335     2523210 :   if (CheckPending)
    2336      519296 :     releasePending();
    2337             : 
    2338     2523210 :   if (CurrMOps > 0) {
    2339             :     // Defer any ready instrs that now have a hazard.
    2340     7184913 :     for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
    2341     6367299 :       if (checkHazard(*I)) {
    2342      125656 :         Pending.push(*I);
    2343             :         I = Available.remove(I);
    2344      125656 :         continue;
    2345             :       }
    2346             :       ++I;
    2347             :     }
    2348             :   }
    2349     2705332 :   for (unsigned i = 0; Available.empty(); ++i) {
    2350             : //  FIXME: Re-enable assert once PR20057 is resolved.
    2351             : //    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
    2352             : //           "permanent hazard");
    2353             :     (void)i;
    2354       91061 :     bumpCycle(CurrCycle + 1);
    2355       91061 :     releasePending();
    2356             :   }
    2357             : 
    2358             :   DEBUG(Pending.dump());
    2359             :   DEBUG(Available.dump());
    2360             : 
    2361     2523210 :   if (Available.size() == 1)
    2362      909845 :     return *Available.begin();
    2363             :   return nullptr;
    2364             : }
    2365             : 
    2366             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    2367             : // This is useful information to dump after bumpNode.
    2368             : // Note that the Queue contents are more useful before pickNodeFromQueue.
    2369             : LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
    2370             :   unsigned ResFactor;
    2371             :   unsigned ResCount;
    2372             :   if (ZoneCritResIdx) {
    2373             :     ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
    2374             :     ResCount = getResourceCount(ZoneCritResIdx);
    2375             :   } else {
    2376             :     ResFactor = SchedModel->getMicroOpFactor();
    2377             :     ResCount = RetiredMOps * ResFactor;
    2378             :   }
    2379             :   unsigned LFactor = SchedModel->getLatencyFactor();
    2380             :   dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
    2381             :          << "  Retired: " << RetiredMOps;
    2382             :   dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
    2383             :   dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
    2384             :          << ResCount / ResFactor << " "
    2385             :          << SchedModel->getResourceName(ZoneCritResIdx)
    2386             :          << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
    2387             :          << (IsResourceLimited ? "  - Resource" : "  - Latency")
    2388             :          << " limited.\n";
    2389             : }
    2390             : #endif
    2391             : 
    2392             : //===----------------------------------------------------------------------===//
    2393             : // GenericScheduler - Generic implementation of MachineSchedStrategy.
    2394             : //===----------------------------------------------------------------------===//
    2395             : 
    2396    13862383 : void GenericSchedulerBase::SchedCandidate::
    2397             : initResourceDelta(const ScheduleDAGMI *DAG,
    2398             :                   const TargetSchedModel *SchedModel) {
    2399    13862383 :   if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
    2400             :     return;
    2401             : 
    2402       11345 :   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
    2403       28936 :   for (TargetSchedModel::ProcResIter
    2404       11345 :          PI = SchedModel->getWriteProcResBegin(SC),
    2405       40281 :          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
    2406       28936 :     if (PI->ProcResourceIdx == Policy.ReduceResIdx)
    2407         499 :       ResDelta.CritResources += PI->Cycles;
    2408       28936 :     if (PI->ProcResourceIdx == Policy.DemandResIdx)
    2409        5844 :       ResDelta.DemandedResources += PI->Cycles;
    2410             :   }
    2411             : }
    2412             : 
    2413             : /// Set the CandPolicy given a scheduling zone given the current resources and
    2414             : /// latencies inside and outside the zone.
    2415      535341 : void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
    2416             :                                      SchedBoundary &CurrZone,
    2417             :                                      SchedBoundary *OtherZone) {
    2418             :   // Apply preemptive heuristics based on the total latency and resources
    2419             :   // inside and outside this zone. Potential stalls should be considered before
    2420             :   // following this policy.
    2421             : 
    2422             :   // Compute remaining latency. We need this both to determine whether the
    2423             :   // overall schedule has become latency-limited and whether the instructions
    2424             :   // outside this zone are resource or latency limited.
    2425             :   //
    2426             :   // The "dependent" latency is updated incrementally during scheduling as the
    2427             :   // max height/depth of scheduled nodes minus the cycles since it was
    2428             :   // scheduled:
    2429             :   //   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
    2430             :   //
    2431             :   // The "independent" latency is the max ready queue depth:
    2432             :   //   ILat = max N.depth for N in Available|Pending
    2433             :   //
    2434             :   // RemainingLatency is the greater of independent and dependent latency.
    2435      535341 :   unsigned RemLatency = CurrZone.getDependentLatency();
    2436      535341 :   RemLatency = std::max(RemLatency,
    2437     1070682 :                         CurrZone.findMaxLatency(CurrZone.Available.elements()));
    2438      535341 :   RemLatency = std::max(RemLatency,
    2439     1070682 :                         CurrZone.findMaxLatency(CurrZone.Pending.elements()));
    2440             : 
    2441             :   // Compute the critical resource outside the zone.
    2442      535341 :   unsigned OtherCritIdx = 0;
    2443             :   unsigned OtherCount =
    2444      535341 :     OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
    2445             : 
    2446             :   bool OtherResLimited = false;
    2447      535341 :   if (SchedModel->hasInstrSchedModel())
    2448      354450 :     OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
    2449             :                                          OtherCount, RemLatency);
    2450             : 
    2451             :   // Schedule aggressively for latency in PostRA mode. We don't check for
    2452             :   // acyclic latency during PostRA, and highly out-of-order processors will
    2453             :   // skip PostRA scheduling.
    2454      354450 :   if (!OtherResLimited) {
    2455      359744 :     if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
    2456      151742 :       Policy.ReduceLatency |= true;
    2457             :       DEBUG(dbgs() << "  " << CurrZone.Available.getName()
    2458             :             << " RemainingLatency " << RemLatency << " + "
    2459             :             << CurrZone.getCurrCycle() << "c > CritPath "
    2460             :             << Rem.CriticalPath << "\n");
    2461             :     }
    2462             :   }
    2463             :   // If the same resource is limiting inside and outside the zone, do nothing.
    2464      535341 :   if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
    2465      525595 :     return;
    2466             : 
    2467             :   DEBUG(
    2468             :     if (CurrZone.isResourceLimited()) {
    2469             :       dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "
    2470             :              << SchedModel->getResourceName(CurrZone.getZoneCritResIdx())
    2471             :              << "\n";
    2472             :     }
    2473             :     if (OtherResLimited)
    2474             :       dbgs() << "  RemainingLimit: "
    2475             :              << SchedModel->getResourceName(OtherCritIdx) << "\n";
    2476             :     if (!CurrZone.isResourceLimited() && !OtherResLimited)
    2477             :       dbgs() << "  Latency limited both directions.\n");
    2478             : 
    2479        9746 :   if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
    2480         817 :     Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
    2481             : 
    2482        9746 :   if (OtherResLimited)
    2483        1197 :     Policy.DemandResIdx = OtherCritIdx;
    2484             : }
    2485             : 
    2486             : #ifndef NDEBUG
    2487             : const char *GenericSchedulerBase::getReasonStr(
    2488             :   GenericSchedulerBase::CandReason Reason) {
    2489             :   switch (Reason) {
    2490             :   case NoCand:         return "NOCAND    ";
    2491             :   case Only1:          return "ONLY1     ";
    2492             :   case PhysRegCopy:    return "PREG-COPY ";
    2493             :   case RegExcess:      return "REG-EXCESS";
    2494             :   case RegCritical:    return "REG-CRIT  ";
    2495             :   case Stall:          return "STALL     ";
    2496             :   case Cluster:        return "CLUSTER   ";
    2497             :   case Weak:           return "WEAK      ";
    2498             :   case RegMax:         return "REG-MAX   ";
    2499             :   case ResourceReduce: return "RES-REDUCE";
    2500             :   case ResourceDemand: return "RES-DEMAND";
    2501             :   case TopDepthReduce: return "TOP-DEPTH ";
    2502             :   case TopPathReduce:  return "TOP-PATH  ";
    2503             :   case BotHeightReduce:return "BOT-HEIGHT";
    2504             :   case BotPathReduce:  return "BOT-PATH  ";
    2505             :   case NextDefUse:     return "DEF-USE   ";
    2506             :   case NodeOrder:      return "ORDER     ";
    2507             :   };
    2508             :   llvm_unreachable("Unknown reason!");
    2509             : }
    2510             : 
    2511             : void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
    2512             :   PressureChange P;
    2513             :   unsigned ResIdx = 0;
    2514             :   unsigned Latency = 0;
    2515             :   switch (Cand.Reason) {
    2516             :   default:
    2517             :     break;
    2518             :   case RegExcess:
    2519             :     P = Cand.RPDelta.Excess;
    2520             :     break;
    2521             :   case RegCritical:
    2522             :     P = Cand.RPDelta.CriticalMax;
    2523             :     break;
    2524             :   case RegMax:
    2525             :     P = Cand.RPDelta.CurrentMax;
    2526             :     break;
    2527             :   case ResourceReduce:
    2528             :     ResIdx = Cand.Policy.ReduceResIdx;
    2529             :     break;
    2530             :   case ResourceDemand:
    2531             :     ResIdx = Cand.Policy.DemandResIdx;
    2532             :     break;
    2533             :   case TopDepthReduce:
    2534             :     Latency = Cand.SU->getDepth();
    2535             :     break;
    2536             :   case TopPathReduce:
    2537             :     Latency = Cand.SU->getHeight();
    2538             :     break;
    2539             :   case BotHeightReduce:
    2540             :     Latency = Cand.SU->getHeight();
    2541             :     break;
    2542             :   case BotPathReduce:
    2543             :     Latency = Cand.SU->getDepth();
    2544             :     break;
    2545             :   }
    2546             :   dbgs() << "  Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
    2547             :   if (P.isValid())
    2548             :     dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
    2549             :            << ":" << P.getUnitInc() << " ";
    2550             :   else
    2551             :     dbgs() << "      ";
    2552             :   if (ResIdx)
    2553             :     dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
    2554             :   else
    2555             :     dbgs() << "         ";
    2556             :   if (Latency)
    2557             :     dbgs() << " " << Latency << " cycles ";
    2558             :   else
    2559             :     dbgs() << "          ";
    2560             :   dbgs() << '\n';
    2561             : }
    2562             : #endif
    2563             : 
    2564             : /// Return true if this heuristic determines order.
    2565             : static bool tryLess(int TryVal, int CandVal,
    2566             :                     GenericSchedulerBase::SchedCandidate &TryCand,
    2567             :                     GenericSchedulerBase::SchedCandidate &Cand,
    2568             :                     GenericSchedulerBase::CandReason Reason) {
    2569    59929770 :   if (TryVal < CandVal) {
    2570       64999 :     TryCand.Reason = Reason;
    2571             :     return true;
    2572             :   }
    2573    59864771 :   if (TryVal > CandVal) {
    2574      229998 :     if (Cand.Reason > Reason)
    2575       77740 :       Cand.Reason = Reason;
    2576             :     return true;
    2577             :   }
    2578             :   return false;
    2579             : }
    2580             : 
    2581             : static bool tryGreater(int TryVal, int CandVal,
    2582             :                        GenericSchedulerBase::SchedCandidate &TryCand,
    2583             :                        GenericSchedulerBase::SchedCandidate &Cand,
    2584             :                        GenericSchedulerBase::CandReason Reason) {
    2585    62598288 :   if (TryVal > CandVal) {
    2586      283638 :     TryCand.Reason = Reason;
    2587             :     return true;
    2588             :   }
    2589    62314650 :   if (TryVal < CandVal) {
    2590      853646 :     if (Cand.Reason > Reason)
    2591      314789 :       Cand.Reason = Reason;
    2592             :     return true;
    2593             :   }
    2594             :   return false;
    2595             : }
    2596             : 
    2597      757629 : static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
    2598             :                        GenericSchedulerBase::SchedCandidate &Cand,
    2599             :                        SchedBoundary &Zone) {
    2600      757629 :   if (Zone.isTop()) {
    2601      380338 :     if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
    2602        9597 :       if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
    2603             :                   TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
    2604             :         return true;
    2605             :     }
    2606      570165 :     if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
    2607             :                    TryCand, Cand, GenericSchedulerBase::TopPathReduce))
    2608             :       return true;
    2609             :   } else {
    2610     1134920 :     if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
    2611       24417 :       if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
    2612             :                   TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
    2613             :         return true;
    2614             :     }
    2615     1693392 :     if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
    2616             :                    TryCand, Cand, GenericSchedulerBase::BotPathReduce))
    2617             :       return true;
    2618             :   }
    2619             :   return false;
    2620             : }
    2621             : 
    2622             : static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
    2623             :   DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
    2624             :         << GenericSchedulerBase::getReasonStr(Reason) << '\n');
    2625             : }
    2626             : 
    2627             : static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
    2628             :   tracePick(Cand.Reason, Cand.AtTop);
    2629             : }
    2630             : 
    2631      381375 : void GenericScheduler::initialize(ScheduleDAGMI *dag) {
    2632             :   assert(dag->hasVRegLiveness() &&
    2633             :          "(PreRA)GenericScheduler needs vreg liveness");
    2634      381375 :   DAG = static_cast<ScheduleDAGMILive*>(dag);
    2635      381375 :   SchedModel = DAG->getSchedModel();
    2636      381375 :   TRI = DAG->TRI;
    2637             : 
    2638      381375 :   Rem.init(DAG, SchedModel);
    2639      381375 :   Top.init(DAG, SchedModel, &Rem);
    2640      381375 :   Bot.init(DAG, SchedModel, &Rem);
    2641             : 
    2642             :   // Initialize resource counts.
    2643             : 
    2644             :   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
    2645             :   // are disabled, then these HazardRecs will be disabled.
    2646      381375 :   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
    2647      381375 :   if (!Top.HazardRec) {
    2648      124356 :     Top.HazardRec =
    2649      248712 :         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
    2650      124356 :             Itin, DAG);
    2651             :   }
    2652      381375 :   if (!Bot.HazardRec) {
    2653      124356 :     Bot.HazardRec =
    2654      248712 :         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
    2655      124356 :             Itin, DAG);
    2656             :   }
    2657      381375 :   TopCand.SU = nullptr;
    2658      381375 :   BotCand.SU = nullptr;
    2659      381375 : }
    2660             : 
    2661             : /// Initialize the per-region scheduling policy.
    2662     1102182 : void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
    2663             :                                   MachineBasicBlock::iterator End,
    2664             :                                   unsigned NumRegionInstrs) {
    2665             :   const MachineFunction &MF = *Begin->getMF();
    2666     1102183 :   const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
    2667             : 
    2668             :   // Avoid setting up the register pressure tracker for small regions to save
    2669             :   // compile time. As a rough heuristic, only track pressure when the number of
    2670             :   // schedulable instructions exceeds half the integer register file.
    2671     1102183 :   RegionPolicy.ShouldTrackPressure = true;
    2672     7715281 :   for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
    2673     3306549 :     MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
    2674     3306549 :     if (TLI->isTypeLegal(LegalIntVT)) {
    2675     9461520 :       unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
    2676     6307680 :         TLI->getRegClassFor(LegalIntVT));
    2677     3153840 :       RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
    2678             :     }
    2679             :   }
    2680             : 
    2681             :   // For generic targets, we default to bottom-up, because it's simpler and more
    2682             :   // compile-time optimizations have been implemented in that direction.
    2683     1102183 :   RegionPolicy.OnlyBottomUp = true;
    2684             : 
    2685             :   // Allow the subtarget to override default policy.
    2686     1102183 :   MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
    2687             : 
    2688             :   // After subtarget overrides, apply command line options.
    2689     1102183 :   if (!EnableRegPressure)
    2690           0 :     RegionPolicy.ShouldTrackPressure = false;
    2691             : 
    2692             :   // Check -misched-topdown/bottomup can force or unforce scheduling direction.
    2693             :   // e.g. -misched-bottomup=false allows scheduling in both directions.
    2694             :   assert((!ForceTopDown || !ForceBottomUp) &&
    2695             :          "-misched-topdown incompatible with -misched-bottomup");
    2696     1102183 :   if (ForceBottomUp.getNumOccurrences() > 0) {
    2697           0 :     RegionPolicy.OnlyBottomUp = ForceBottomUp;
    2698           0 :     if (RegionPolicy.OnlyBottomUp)
    2699           0 :       RegionPolicy.OnlyTopDown = false;
    2700             :   }
    2701     1102183 :   if (ForceTopDown.getNumOccurrences() > 0) {
    2702           4 :     RegionPolicy.OnlyTopDown = ForceTopDown;
    2703           4 :     if (RegionPolicy.OnlyTopDown)
    2704           4 :       RegionPolicy.OnlyBottomUp = false;
    2705             :   }
    2706     1102183 : }
    2707             : 
    2708           0 : void GenericScheduler::dumpPolicy() const {
    2709             :   // Cannot completely remove virtual function even in release mode.
    2710             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    2711             :   dbgs() << "GenericScheduler RegionPolicy: "
    2712             :          << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
    2713             :          << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
    2714             :          << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
    2715             :          << "\n";
    2716             : #endif
    2717           0 : }
    2718             : 
    2719             : /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
    2720             : /// critical path by more cycles than it takes to drain the instruction buffer.
    2721             : /// We estimate an upper bounds on in-flight instructions as:
    2722             : ///
    2723             : /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
    2724             : /// InFlightIterations = AcyclicPath / CyclesPerIteration
    2725             : /// InFlightResources = InFlightIterations * LoopResources
    2726             : ///
    2727             : /// TODO: Check execution resources in addition to IssueCount.
    2728      350918 : void GenericScheduler::checkAcyclicLatency() {
    2729      350918 :   if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
    2730             :     return;
    2731             : 
    2732             :   // Scaled number of cycles per loop iteration.
    2733             :   unsigned IterCount =
    2734         720 :     std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
    2735         720 :              Rem.RemIssueCount);
    2736             :   // Scaled acyclic critical path.
    2737         360 :   unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
    2738             :   // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
    2739         360 :   unsigned InFlightCount =
    2740         360 :     (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
    2741             :   unsigned BufferLimit =
    2742         360 :     SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
    2743             : 
    2744         360 :   Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
    2745             : 
    2746             :   DEBUG(dbgs() << "IssueCycles="
    2747             :         << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
    2748             :         << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
    2749             :         << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
    2750             :         << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
    2751             :         << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
    2752             :         if (Rem.IsAcyclicLatencyLimited)
    2753             :           dbgs() << "  ACYCLIC LATENCY LIMIT\n");
    2754             : }
    2755             : 
    2756      381375 : void GenericScheduler::registerRoots() {
    2757      762750 :   Rem.CriticalPath = DAG->ExitSU.getDepth();
    2758             : 
    2759             :   // Some roots may not feed into ExitSU. Check all of them in case.
    2760     1193411 :   for (const SUnit *SU : Bot.Available) {
    2761      812036 :     if (SU->getDepth() > Rem.CriticalPath)
    2762       50341 :       Rem.CriticalPath = SU->getDepth();
    2763             :   }
    2764             :   DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
    2765      381375 :   if (DumpCriticalPathLength) {
    2766           0 :     errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
    2767             :   }
    2768             : 
    2769      381375 :   if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
    2770      350918 :     Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
    2771      350918 :     checkAcyclicLatency();
    2772             :   }
    2773      381375 : }
    2774             : 
    2775    30019699 : static bool tryPressure(const PressureChange &TryP,
    2776             :                         const PressureChange &CandP,
    2777             :                         GenericSchedulerBase::SchedCandidate &TryCand,
    2778             :                         GenericSchedulerBase::SchedCandidate &Cand,
    2779             :                         GenericSchedulerBase::CandReason Reason,
    2780             :                         const TargetRegisterInfo *TRI,
    2781             :                         const MachineFunction &MF) {
    2782             :   // If one candidate decreases and the other increases, go with it.
    2783             :   // Invalid candidates have UnitInc==0.
    2784    90059097 :   if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
    2785             :                  Reason)) {
    2786             :     return true;
    2787             :   }
    2788             :   // Do not compare the magnitude of pressure changes between top and bottom
    2789             :   // boundary.
    2790    30000279 :   if (Cand.AtTop != TryCand.AtTop)
    2791             :     return false;
    2792             : 
    2793             :   // If both candidates affect the same set in the same boundary, go with the
    2794             :   // smallest increase.
    2795    29715770 :   unsigned TryPSet = TryP.getPSetOrMax();
    2796    29715770 :   unsigned CandPSet = CandP.getPSetOrMax();
    2797    29715770 :   if (TryPSet == CandPSet) {
    2798             :     return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
    2799             :                    Reason);
    2800             :   }
    2801             : 
    2802      275054 :   int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
    2803             :                                  std::numeric_limits<int>::max();
    2804             : 
    2805      275054 :   int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
    2806             :                                    std::numeric_limits<int>::max();
    2807             : 
    2808             :   // If the candidates are decreasing pressure, reverse priority.
    2809      275054 :   if (TryP.getUnitInc() < 0)
    2810             :     std::swap(TryRank, CandRank);
    2811             :   return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
    2812             : }
    2813             : 
    2814             : static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
    2815    20402166 :   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
    2816             : }
    2817             : 
    2818             : /// Minimize physical register live ranges. Regalloc wants them adjacent to
    2819             : /// their physreg def/use.
    2820             : ///
    2821             : /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
    2822             : /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
    2823             : /// with the operation that produces or consumes the physreg. We'll do this when
    2824             : /// regalloc has support for parallel copies.
    2825    22400904 : static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
    2826    22400904 :   const MachineInstr *MI = SU->getInstr();
    2827    22400904 :   if (!MI->isCopy())
    2828             :     return 0;
    2829             : 
    2830     2471952 :   unsigned ScheduledOper = isTop ? 1 : 0;
    2831     2471952 :   unsigned UnscheduledOper = isTop ? 0 : 1;
    2832             :   // If we have already scheduled the physreg produce/consumer, immediately
    2833             :   // schedule the copy.
    2834     4943904 :   if (TargetRegisterInfo::isPhysicalRegister(
    2835     2471952 :         MI->getOperand(ScheduledOper).getReg()))
    2836             :     return 1;
    2837             :   // If the physreg is at the boundary, defer it. Otherwise schedule it
    2838             :   // immediately to free the dependent. We can hoist the copy later.
    2839     1587823 :   bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
    2840     3175646 :   if (TargetRegisterInfo::isPhysicalRegister(
    2841             :         MI->getOperand(UnscheduledOper).getReg()))
    2842      596642 :     return AtBoundary ? -1 : 1;
    2843             :   return 0;
    2844             : }
    2845             : 
    2846     9601031 : void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
    2847             :                                      bool AtTop,
    2848             :                                      const RegPressureTracker &RPTracker,
    2849             :                                      RegPressureTracker &TempTracker) {
    2850     9601031 :   Cand.SU = SU;
    2851     9601031 :   Cand.AtTop = AtTop;
    2852     9601031 :   if (DAG->isTrackingPressure()) {
    2853     8415087 :     if (AtTop) {
    2854      119932 :       TempTracker.getMaxDownwardPressureDelta(
    2855       59966 :         Cand.SU->getInstr(),
    2856             :         Cand.RPDelta,
    2857             :         DAG->getRegionCriticalPSets(),
    2858             :         DAG->getRegPressure().MaxSetPressure);
    2859             :     } else {
    2860     8355121 :       if (VerifyScheduling) {
    2861          38 :         TempTracker.getMaxUpwardPressureDelta(
    2862          19 :           Cand.SU->getInstr(),
    2863          19 :           &DAG->getPressureDiff(Cand.SU),
    2864             :           Cand.RPDelta,
    2865             :           DAG->getRegionCriticalPSets(),
    2866             :           DAG->getRegPressure().MaxSetPressure);
    2867             :       } else {
    2868    16710204 :         RPTracker.getUpwardPressureDelta(
    2869     8355102 :           Cand.SU->getInstr(),
    2870             :           DAG->getPressureDiff(Cand.SU),
    2871             :           Cand.RPDelta,
    2872             :           DAG->getRegionCriticalPSets(),
    2873             :           DAG->getRegPressure().MaxSetPressure);
    2874             :       }
    2875             :     }
    2876             :   }
    2877             :   DEBUG(if (Cand.RPDelta.Excess.isValid())
    2878             :           dbgs() << "  Try  SU(" << Cand.SU->NodeNum << ") "
    2879             :                  << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet())
    2880             :                  << ":" << Cand.RPDelta.Excess.getUnitInc() << "\n");
    2881     9601031 : }
    2882             : 
    2883             : /// Apply a set of heursitics to a new candidate. Heuristics are currently
    2884             : /// hierarchical. This may be more efficient than a graduated cost model because
    2885             : /// we don't need to evaluate all aspects of the model for each node in the
    2886             : /// queue. But it's really done to make the heuristics easier to debug and
    2887             : /// statistically analyze.
    2888             : ///
    2889             : /// \param Cand provides the policy and current best candidate.
    2890             : /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
    2891             : /// \param Zone describes the scheduled zone that we are extending, or nullptr
    2892             : //              if Cand is from a different zone than TryCand.
    2893    12600185 : void GenericScheduler::tryCandidate(SchedCandidate &Cand,
    2894             :                                     SchedCandidate &TryCand,
    2895             :                                     SchedBoundary *Zone) {
    2896             :   // Initialize the candidate if needed.
    2897    12600185 :   if (!Cand.isValid()) {
    2898     1399733 :     TryCand.Reason = NodeOrder;
    2899     1399733 :     return;
    2900             :   }
    2901             : 
    2902    11200452 :   if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
    2903    11200452 :                  biasPhysRegCopy(Cand.SU, Cand.AtTop),
    2904             :                  TryCand, Cand, PhysRegCopy))
    2905             :     return;
    2906             : 
    2907             :   // Avoid exceeding the target's limit.
    2908    20884452 :   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
    2909             :                                                Cand.RPDelta.Excess,
    2910             :                                                TryCand, Cand, RegExcess, TRI,
    2911    10194140 :                                                DAG->MF))
    2912             :     return;
    2913             : 
    2914             :   // Avoid increasing the max critical pressure in the scheduled region.
    2915    20786632 :   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
    2916             :                                                Cand.RPDelta.CriticalMax,
    2917             :                                                TryCand, Cand, RegCritical, TRI,
    2918    10145230 :                                                DAG->MF))
    2919             :     return;
    2920             : 
    2921             :   // We only compare a subset of features when comparing nodes between
    2922             :   // Top and Bottom boundary. Some properties are simply incomparable, in many
    2923             :   // other instances we should only override the other boundary if something
    2924             :   // is a clear good pick on one boundary. Skip heuristics that are more
    2925             :   // "tie-breaking" in nature.
    2926             :   bool SameBoundary = Zone != nullptr;
    2927    10376451 :   if (SameBoundary) {
    2928             :     // For loops that are acyclic path limited, aggressively schedule for
    2929             :     // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
    2930             :     // heuristics to take precedence.
    2931    10276243 :     if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
    2932         879 :         tryLatency(TryCand, Cand, *Zone))
    2933             :       return;
    2934             : 
    2935             :     // Prioritize instructions that read unbuffered resources by stall cycles.
    2936    10274670 :     if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
    2937    10274670 :                 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
    2938             :       return;
    2939             :   }
    2940             : 
    2941             :   // Keep clustered nodes together to encourage downstream peephole
    2942             :   // optimizations which may reduce resource requirements.
    2943             :   //
    2944             :   // This is a best effort to set things up for a post-RA pass. Optimizations
    2945             :   // like generating loads of multiple registers should ideally be done within
    2946             :   // the scheduler pass by combining the loads during DAG postprocessing.
    2947             :   const SUnit *CandNextClusterSU =
    2948    10346732 :     Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
    2949             :   const SUnit *TryCandNextClusterSU =
    2950    10346732 :     TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
    2951    10346732 :   if (tryGreater(TryCand.SU == TryCandNextClusterSU,
    2952    10346732 :                  Cand.SU == CandNextClusterSU,
    2953             :                  TryCand, Cand, Cluster))
    2954             :     return;
    2955             : 
    2956    10301841 :   if (SameBoundary) {
    2957             :     // Weak edges are for clustering and other constraints.
    2958    20402166 :     if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
    2959             :                 getWeakLeft(Cand.SU, Cand.AtTop),
    2960             :                 TryCand, Cand, Weak))
    2961             :       return;
    2962             :   }
    2963             : 
    2964             :   // Avoid increasing the max pressure of the entire region.
    2965    19855101 :   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
    2966             :                                                Cand.RPDelta.CurrentMax,
    2967             :                                                TryCand, Cand, RegMax, TRI,
    2968     9680329 :                                                DAG->MF))
    2969             :     return;
    2970             : 
    2971    10058497 :   if (SameBoundary) {
    2972             :     // Avoid critical resource consumption and balance the schedule.
    2973     9957739 :     TryCand.initResourceDelta(DAG, SchedModel);
    2974     9957739 :     if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
    2975             :                 TryCand, Cand, ResourceReduce))
    2976             :       return;
    2977     9957675 :     if (tryGreater(TryCand.ResDelta.DemandedResources,
    2978     9957675 :                    Cand.ResDelta.DemandedResources,
    2979             :                    TryCand, Cand, ResourceDemand))
    2980             :       return;
    2981             : 
    2982             :     // Avoid serializing long latency dependence chains.
    2983             :     // For acyclic path limited loops, latency was already checked above.
    2984    20648253 :     if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
    2985    11427233 :         !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
    2986             :       return;
    2987             : 
    2988             :     // Fall through to original instruction order.
    2989      237267 :     if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
    2990    19259102 :         || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
    2991     2154839 :       TryCand.Reason = NodeOrder;
    2992             :     }
    2993             :   }
    2994             : }
    2995             : 
    2996             : /// Pick the best candidate from the queue.
    2997             : ///
    2998             : /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
    2999             : /// DAG building. To adjust for the current scheduling location we need to
    3000             : /// maintain the number of vreg uses remaining to be top-scheduled.
    3001     1127006 : void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
    3002             :                                          const CandPolicy &ZonePolicy,
    3003             :                                          const RegPressureTracker &RPTracker,
    3004             :                                          SchedCandidate &Cand) {
    3005             :   // getMaxPressureDelta temporarily modifies the tracker.
    3006             :   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
    3007             : 
    3008             :   ReadyQueue &Q = Zone.Available;
    3009    10728037 :   for (SUnit *SU : Q) {
    3010             : 
    3011             :     SchedCandidate TryCand(ZonePolicy);
    3012     9601031 :     initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
    3013             :     // Pass SchedBoundary only when comparing nodes from the same boundary.
    3014     9601031 :     SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
    3015     9601031 :     tryCandidate(Cand, TryCand, ZoneArg);
    3016     9601031 :     if (TryCand.Reason != NoCand) {
    3017             :       // Initialize resource delta if needed in case future heuristics query it.
    3018             :       if (TryCand.ResDelta == SchedResourceDelta())
    3019     2856826 :         TryCand.initResourceDelta(DAG, SchedModel);
    3020             :       Cand.setBest(TryCand);
    3021             :       DEBUG(traceCandidate(Cand));
    3022             :     }
    3023             :   }
    3024     1127006 : }
    3025             : 
    3026             : /// Pick the best candidate node from either the top or bottom queue.
    3027      116728 : SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
    3028             :   // Schedule as far as possible in the direction of no choice. This is most
    3029             :   // efficient, but also provides the best heuristics for CriticalPSets.
    3030      116728 :   if (SUnit *SU = Bot.pickOnlyChoice()) {
    3031       73302 :     IsTopNode = false;
    3032             :     tracePick(Only1, false);
    3033       73302 :     return SU;
    3034             :   }
    3035       43426 :   if (SUnit *SU = Top.pickOnlyChoice()) {
    3036        2617 :     IsTopNode = true;
    3037             :     tracePick(Only1, true);
    3038        2617 :     return SU;
    3039             :   }
    3040             :   // Set the bottom-up policy based on the state of the current bottom zone and
    3041             :   // the instructions outside the zone, including the top zone.
    3042       40809 :   CandPolicy BotPolicy;
    3043       40809 :   setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
    3044             :   // Set the top-down policy based on the state of the current top zone and
    3045             :   // the instructions outside the zone, including the bottom zone.
    3046       40809 :   CandPolicy TopPolicy;
    3047       40809 :   setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
    3048             : 
    3049             :   // See if BotCand is still valid (because we previously scheduled from Top).
    3050             :   DEBUG(dbgs() << "Picking from Bot:\n");
    3051       40809 :   if (!BotCand.isValid() || BotCand.SU->isScheduled ||
    3052             :       BotCand.Policy != BotPolicy) {
    3053       30732 :     BotCand.reset(CandPolicy());
    3054       61464 :     pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
    3055             :     assert(BotCand.Reason != NoCand && "failed to find the first candidate");
    3056             :   } else {
    3057             :     DEBUG(traceCandidate(BotCand));
    3058             : #ifndef NDEBUG
    3059             :     if (VerifyScheduling) {
    3060             :       SchedCandidate TCand;
    3061             :       TCand.reset(CandPolicy());
    3062             :       pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
    3063             :       assert(TCand.SU == BotCand.SU &&
    3064             :              "Last pick result should correspond to re-picking right now");
    3065             :     }
    3066             : #endif
    3067             :   }
    3068             : 
    3069             :   // Check if the top Q has a better candidate.
    3070             :   DEBUG(dbgs() << "Picking from Top:\n");
    3071       40809 :   if (!TopCand.isValid() || TopCand.SU->isScheduled ||
    3072             :       TopCand.Policy != TopPolicy) {
    3073       26266 :     TopCand.reset(CandPolicy());
    3074       52532 :     pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
    3075             :     assert(TopCand.Reason != NoCand && "failed to find the first candidate");
    3076             :   } else {
    3077             :     DEBUG(traceCandidate(TopCand));
    3078             : #ifndef NDEBUG
    3079             :     if (VerifyScheduling) {
    3080             :       SchedCandidate TCand;
    3081             :       TCand.reset(CandPolicy());
    3082             :       pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
    3083             :       assert(TCand.SU == TopCand.SU &&
    3084             :            "Last pick result should correspond to re-picking right now");
    3085             :     }
    3086             : #endif
    3087             :   }
    3088             : 
    3089             :   // Pick best from BotCand and TopCand.
    3090             :   assert(BotCand.isValid());
    3091             :   assert(TopCand.isValid());
    3092       40809 :   SchedCandidate Cand = BotCand;
    3093       40809 :   TopCand.Reason = NoCand;
    3094       40809 :   tryCandidate(Cand, TopCand, nullptr);
    3095       40809 :   if (TopCand.Reason != NoCand) {
    3096             :     Cand.setBest(TopCand);
    3097             :     DEBUG(traceCandidate(Cand));
    3098             :   }
    3099             : 
    3100       40809 :   IsTopNode = Cand.AtTop;
    3101             :   tracePick(Cand);
    3102       40809 :   return Cand.SU;
    3103             : }
    3104             : 
    3105             : /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
    3106     2304670 : SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
    3107     2304670 :   if (DAG->top() == DAG->bottom()) {
    3108             :     assert(Top.Available.empty() && Top.Pending.empty() &&
    3109             :            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
    3110             :     return nullptr;
    3111             :   }
    3112             :   SUnit *SU;
    3113             :   do {
    3114     1941054 :     if (RegionPolicy.OnlyTopDown) {
    3115          54 :       SU = Top.pickOnlyChoice();
    3116          54 :       if (!SU) {
    3117          46 :         CandPolicy NoPolicy;
    3118          46 :         TopCand.reset(NoPolicy);
    3119          92 :         pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
    3120             :         assert(TopCand.Reason != NoCand && "failed to find a candidate");
    3121             :         tracePick(TopCand);
    3122          46 :         SU = TopCand.SU;
    3123             :       }
    3124          54 :       IsTopNode = true;
    3125     1941000 :     } else if (RegionPolicy.OnlyBottomUp) {
    3126     1824272 :       SU = Bot.pickOnlyChoice();
    3127     1824272 :       if (!SU) {
    3128     1069962 :         CandPolicy NoPolicy;
    3129     1069962 :         BotCand.reset(NoPolicy);
    3130     2139924 :         pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
    3131             :         assert(BotCand.Reason != NoCand && "failed to find a candidate");
    3132             :         tracePick(BotCand);
    3133     1069962 :         SU = BotCand.SU;
    3134             :       }
    3135     1824272 :       IsTopNode = false;
    3136             :     } else {
    3137      116728 :       SU = pickNodeBidirectional(IsTopNode);
    3138             :     }
    3139     1941054 :   } while (SU->isScheduled);
    3140             : 
    3141     1941054 :   if (SU->isTopReady())
    3142      965134 :     Top.removeReady(SU);
    3143     1941054 :   if (SU->isBottomReady())
    3144     1931100 :     Bot.removeReady(SU);
    3145             : 
    3146             :   DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
    3147             :   return SU;
    3148             : }
    3149             : 
    3150      151438 : void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
    3151      151438 :   MachineBasicBlock::iterator InsertPos = SU->getInstr();
    3152      151438 :   if (!isTop)
    3153             :     ++InsertPos;
    3154      151438 :   SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
    3155             : 
    3156             :   // Find already scheduled copies with a single physreg dependence and move
    3157             :   // them just above the scheduled instruction.
    3158      905120 :   for (SDep &Dep : Deps) {
    3159      499909 :     if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg()))
    3160      263507 :       continue;
    3161             :     SUnit *DepSU = Dep.getSUnit();
    3162      226668 :     if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
    3163      105160 :       continue;
    3164        8174 :     MachineInstr *Copy = DepSU->getInstr();
    3165        8174 :     if (!Copy->isCopy())
    3166        3765 :       continue;
    3167             :     DEBUG(dbgs() << "  Rescheduling physreg copy ";
    3168             :           Dep.getSUnit()->dump(DAG));
    3169        4409 :     DAG->moveInstruction(Copy, InsertPos);
    3170             :   }
    3171      151438 : }
    3172             : 
    3173             : /// Update the scheduler's state after scheduling a node. This is the same node
    3174             : /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
    3175             : /// update it's state based on the current cycle before MachineSchedStrategy
    3176             : /// does.
    3177             : ///
    3178             : /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
    3179             : /// them here. See comments in biasPhysRegCopy.
    3180     2223699 : void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
    3181     2223699 :   if (IsTopNode) {
    3182      153786 :     SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
    3183       76893 :     Top.bumpNode(SU);
    3184       76893 :     if (SU->hasPhysRegUses)
    3185       44303 :       reschedulePhysRegCopies(SU, true);
    3186             :   } else {
    3187     4293612 :     SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
    3188     2146806 :     Bot.bumpNode(SU);
    3189     2146806 :     if (SU->hasPhysRegDefs)
    3190      107135 :       reschedulePhysRegCopies(SU, false);
    3191             :   }
    3192     2223699 : }
    3193             : 
    3194             : /// Create the standard converging machine scheduler. This will be used as the
    3195             : /// default scheduler if the target does not set a default.
    3196      112275 : ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
    3197             :   ScheduleDAGMILive *DAG =
    3198      224549 :       new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
    3199             :   // Register DAG post-processors.
    3200             :   //
    3201             :   // FIXME: extend the mutation API to allow earlier mutations to instantiate
    3202             :   // data and pass it to later mutations. Have a single mutation that gathers
    3203             :   // the interesting nodes in one pass.
    3204      224549 :   DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
    3205      112275 :   return DAG;
    3206             : }
    3207             : 
    3208           0 : static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) {
    3209           0 :   return createGenericSchedLive(C);
    3210             : }
    3211             : 
    3212             : static MachineSchedRegistry
    3213       81774 : GenericSchedRegistry("converge", "Standard converging scheduler.",
    3214             :                      createConveringSched);
    3215             : 
    3216             : //===----------------------------------------------------------------------===//
    3217             : // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
    3218             : //===----------------------------------------------------------------------===//
    3219             : 
    3220        6722 : void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
    3221        6722 :   DAG = Dag;
    3222        6722 :   SchedModel = DAG->getSchedModel();
    3223        6722 :   TRI = DAG->TRI;
    3224             : 
    3225        6722 :   Rem.init(DAG, SchedModel);
    3226        6722 :   Top.init(DAG, SchedModel, &Rem);
    3227             :   BotRoots.clear();
    3228             : 
    3229             :   // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
    3230             :   // or are disabled, then these HazardRecs will be disabled.
    3231        6722 :   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
    3232        6722 :   if (!Top.HazardRec) {
    3233        5702 :     Top.HazardRec =
    3234       11404 :         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
    3235        5702 :             Itin, DAG);
    3236             :   }
    3237        6722 : }
    3238             : 
    3239        6722 : void PostGenericScheduler::registerRoots() {
    3240       13444 :   Rem.CriticalPath = DAG->ExitSU.getDepth();
    3241             : 
    3242             :   // Some roots may not feed into ExitSU. Check all of them in case.
    3243       24324 :   for (const SUnit *SU : BotRoots) {
    3244        8801 :     if (SU->getDepth() > Rem.CriticalPath)
    3245        1073 :       Rem.CriticalPath = SU->getDepth();
    3246             :   }
    3247             :   DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
    3248        6722 :   if (DumpCriticalPathLength) {
    3249           0 :     errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
    3250             :   }
    3251        6722 : }
    3252             : 
    3253             : /// Apply a set of heursitics to a new candidate for PostRA scheduling.
    3254             : ///
    3255             : /// \param Cand provides the policy and current best candidate.
    3256             : /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
    3257       31345 : void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
    3258             :                                         SchedCandidate &TryCand) {
    3259             :   // Initialize the candidate if needed.
    3260       31345 :   if (!Cand.isValid()) {
    3261        9151 :     TryCand.Reason = NodeOrder;
    3262        9151 :     return;
    3263             :   }
    3264             : 
    3265             :   // Prioritize instructions that read unbuffered resources by stall cycles.
    3266       22194 :   if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
    3267       22194 :               Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
    3268             :     return;
    3269             : 
    3270             :   // Keep clustered nodes together.
    3271       22169 :   if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
    3272       22169 :                  Cand.SU == DAG->getNextClusterSucc(),
    3273             :                  TryCand, Cand, Cluster))
    3274             :     return;
    3275             : 
    3276             :   // Avoid critical resource consumption and balance the schedule.
    3277       22030 :   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
    3278             :               TryCand, Cand, ResourceReduce))
    3279             :     return;
    3280       21988 :   if (tryGreater(TryCand.ResDelta.DemandedResources,
    3281       21988 :                  Cand.ResDelta.DemandedResources,
    3282             :                  TryCand, Cand, ResourceDemand))
    3283             :     return;
    3284             : 
    3285             :   // Avoid serializing long latency dependence chains.
    3286       21988 :   if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
    3287             :     return;
    3288             :   }
    3289             : 
    3290             :   // Fall through to original instruction order.
    3291       13504 :   if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
    3292        5231 :     TryCand.Reason = NodeOrder;
    3293             : }
    3294             : 
    3295        9151 : void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
    3296             :   ReadyQueue &Q = Top.Available;
    3297       40496 :   for (SUnit *SU : Q) {
    3298             :     SchedCandidate TryCand(Cand.Policy);
    3299       31345 :     TryCand.SU = SU;
    3300       31345 :     TryCand.AtTop = true;
    3301       31345 :     TryCand.initResourceDelta(DAG, SchedModel);
    3302       31345 :     tryCandidate(Cand, TryCand);
    3303       31345 :     if (TryCand.Reason != NoCand) {
    3304             :       Cand.setBest(TryCand);
    3305             :       DEBUG(traceCandidate(Cand));
    3306             :     }
    3307             :   }
    3308        9151 : }
    3309             : 
    3310             : /// Pick the next node to schedule.
    3311       35123 : SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
    3312       35123 :   if (DAG->top() == DAG->bottom()) {
    3313             :     assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
    3314             :     return nullptr;
    3315             :   }
    3316             :   SUnit *SU;
    3317             :   do {
    3318       28401 :     SU = Top.pickOnlyChoice();
    3319       28401 :     if (SU) {
    3320             :       tracePick(Only1, true);
    3321             :     } else {
    3322             :       CandPolicy NoPolicy;
    3323             :       SchedCandidate TopCand(NoPolicy);
    3324             :       // Set the top-down policy based on the state of the current top zone and
    3325             :       // the instructions outside the zone, including the bottom zone.
    3326        9151 :       setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
    3327        9151 :       pickNodeFromQueue(TopCand);
    3328             :       assert(TopCand.Reason != NoCand && "failed to find a candidate");
    3329             :       tracePick(TopCand);
    3330        9151 :       SU = TopCand.SU;
    3331             :     }
    3332       28401 :   } while (SU->isScheduled);
    3333             : 
    3334       28401 :   IsTopNode = true;
    3335       28401 :   Top.removeReady(SU);
    3336             : 
    3337             :   DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
    3338       28401 :   return SU;
    3339             : }
    3340             : 
    3341             : /// Called after ScheduleDAGMI has scheduled an instruction and updated
    3342             : /// scheduled/remaining flags in the DAG nodes.
    3343       28401 : void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
    3344       56802 :   SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
    3345       28401 :   Top.bumpNode(SU);
    3346       28401 : }
    3347             : 
    3348        9280 : ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
    3349             :   return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
    3350       18560 :                            /*RemoveKillFlags=*/true);
    3351             : }
    3352             : 
    3353             : //===----------------------------------------------------------------------===//
    3354             : // ILP Scheduler. Currently for experimental analysis of heuristics.
    3355             : //===----------------------------------------------------------------------===//
    3356             : 
    3357             : namespace {
    3358             : 
    3359             : /// \brief Order nodes by the ILP metric.
    3360             : struct ILPOrder {
    3361             :   const SchedDFSResult *DFSResult = nullptr;
    3362             :   const BitVector *ScheduledTrees = nullptr;
    3363             :   bool MaximizeILP;
    3364             : 
    3365           8 :   ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
    3366             : 
    3367             :   /// \brief Apply a less-than relation on node priority.
    3368             :   ///
    3369             :   /// (Return true if A comes after B in the Q.)
    3370         300 :   bool operator()(const SUnit *A, const SUnit *B) const {
    3371         300 :     unsigned SchedTreeA = DFSResult->getSubtreeID(A);
    3372             :     unsigned SchedTreeB = DFSResult->getSubtreeID(B);
    3373         300 :     if (SchedTreeA != SchedTreeB) {
    3374             :       // Unscheduled trees have lower priority.
    3375         318 :       if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
    3376             :         return ScheduledTrees->test(SchedTreeB);
    3377             : 
    3378             :       // Trees with shallower connections have have lower priority.
    3379          58 :       if (DFSResult->getSubtreeLevel(SchedTreeA)
    3380             :           != DFSResult->getSubtreeLevel(SchedTreeB)) {
    3381             :         return DFSResult->getSubtreeLevel(SchedTreeA)
    3382           0 :           < DFSResult->getSubtreeLevel(SchedTreeB);
    3383             :       }
    3384             :     }
    3385         199 :     if (MaximizeILP)
    3386         232 :       return DFSResult->getILP(A) < DFSResult->getILP(B);
    3387             :     else
    3388         166 :       return DFSResult->getILP(A) > DFSResult->getILP(B);
    3389             :   }
    3390             : };
    3391             : 
    3392             : /// \brief Schedule based on the ILP metric.
    3393          16 : class ILPScheduler : public MachineSchedStrategy {
    3394             :   ScheduleDAGMILive *DAG = nullptr;
    3395             :   ILPOrder Cmp;
    3396             : 
    3397             :   std::vector<SUnit*> ReadyQ;
    3398             : 
    3399             : public:
    3400           8 :   ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
    3401             : 
    3402          10 :   void initialize(ScheduleDAGMI *dag) override {
    3403             :     assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
    3404          10 :     DAG = static_cast<ScheduleDAGMILive*>(dag);
    3405          10 :     DAG->computeDFSResult();
    3406          10 :     Cmp.DFSResult = DAG->getDFSResult();
    3407          10 :     Cmp.ScheduledTrees = &DAG->getScheduledTrees();
    3408             :     ReadyQ.clear();
    3409          10 :   }
    3410             : 
    3411          10 :   void registerRoots() override {
    3412             :     // Restore the heap in ReadyQ with the updated DFS results.
    3413          10 :     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
    3414          10 :   }
    3415             : 
    3416             :   /// Implement MachineSchedStrategy interface.
    3417             :   /// -----------------------------------------
    3418             : 
    3419             :   /// Callback to select the highest priority node from the ready Q.
    3420         186 :   SUnit *pickNode(bool &IsTopNode) override {
    3421         186 :     if (ReadyQ.empty()) return nullptr;
    3422         176 :     std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
    3423         176 :     SUnit *SU = ReadyQ.back();
    3424             :     ReadyQ.pop_back();
    3425         176 :     IsTopNode = false;
    3426             :     DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
    3427             :           << " ILP: " << DAG->getDFSResult()->getILP(SU)
    3428             :           << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
    3429             :           << DAG->getDFSResult()->getSubtreeLevel(
    3430             :             DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
    3431             :           << "Scheduling " << *SU->getInstr());
    3432         176 :     return SU;
    3433             :   }
    3434             : 
    3435             :   /// \brief Scheduler callback to notify that a new subtree is scheduled.
    3436          38 :   void scheduleTree(unsigned SubtreeID) override {
    3437          38 :     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
    3438          38 :   }
    3439             : 
    3440             :   /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
    3441             :   /// DFSResults, and resort the priority Q.
    3442         176 :   void schedNode(SUnit *SU, bool IsTopNode) override {
    3443             :     assert(!IsTopNode && "SchedDFSResult needs bottom-up");
    3444         176 :   }
    3445             : 
    3446          64 :   void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
    3447             : 
    3448         176 :   void releaseBottomNode(SUnit *SU) override {
    3449         176 :     ReadyQ.push_back(SU);
    3450         176 :     std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
    3451         176 :   }
    3452             : };
    3453             : 
    3454             : } // end anonymous namespace
    3455             : 
    3456           6 : static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
    3457          24 :   return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
    3458             : }
    3459           2 : static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
    3460           8 :   return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
    3461             : }
    3462             : 
    3463       81774 : static MachineSchedRegistry ILPMaxRegistry(
    3464             :   "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
    3465       81774 : static MachineSchedRegistry ILPMinRegistry(
    3466             :   "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
    3467             : 
    3468             : //===----------------------------------------------------------------------===//
    3469             : // Machine Instruction Shuffler for Correctness Testing
    3470             : //===----------------------------------------------------------------------===//
    3471             : 
    3472             : #ifndef NDEBUG
    3473             : namespace {
    3474             : 
    3475             : /// Apply a less-than relation on the node order, which corresponds to the
    3476             : /// instruction order prior to scheduling. IsReverse implements greater-than.
    3477             : template<bool IsReverse>
    3478             : struct SUnitOrder {
    3479             :   bool operator()(SUnit *A, SUnit *B) const {
    3480             :     if (IsReverse)
    3481             :       return A->NodeNum > B->NodeNum;
    3482             :     else
    3483             :       return A->NodeNum < B->NodeNum;
    3484             :   }
    3485             : };
    3486             : 
    3487             : /// Reorder instructions as much as possible.
    3488             : class InstructionShuffler : public MachineSchedStrategy {
    3489             :   bool IsAlternating;
    3490             :   bool IsTopDown;
    3491             : 
    3492             :   // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
    3493             :   // gives nodes with a higher number higher priority causing the latest
    3494             :   // instructions to be scheduled first.
    3495             :   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
    3496             :     TopQ;
    3497             : 
    3498             :   // When scheduling bottom-up, use greater-than as the queue priority.
    3499             :   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
    3500             :     BottomQ;
    3501             : 
    3502             : public:
    3503             :   InstructionShuffler(bool alternate, bool topdown)
    3504             :     : IsAlternating(alternate), IsTopDown(topdown) {}
    3505             : 
    3506             :   void initialize(ScheduleDAGMI*) override {
    3507             :     TopQ.clear();
    3508             :     BottomQ.clear();
    3509             :   }
    3510             : 
    3511             :   /// Implement MachineSchedStrategy interface.
    3512             :   /// -----------------------------------------
    3513             : 
    3514             :   SUnit *pickNode(bool &IsTopNode) override {
    3515             :     SUnit *SU;
    3516             :     if (IsTopDown) {
    3517             :       do {
    3518             :         if (TopQ.empty()) return nullptr;
    3519             :         SU = TopQ.top();
    3520             :         TopQ.pop();
    3521             :       } while (SU->isScheduled);
    3522             :       IsTopNode = true;
    3523             :     } else {
    3524             :       do {
    3525             :         if (BottomQ.empty()) return nullptr;
    3526             :         SU = BottomQ.top();
    3527             :         BottomQ.pop();
    3528             :       } while (SU->isScheduled);
    3529             :       IsTopNode = false;
    3530             :     }
    3531             :     if (IsAlternating)
    3532             :       IsTopDown = !IsTopDown;
    3533             :     return SU;
    3534             :   }
    3535             : 
    3536             :   void schedNode(SUnit *SU, bool IsTopNode) override {}
    3537             : 
    3538             :   void releaseTopNode(SUnit *SU) override {
    3539             :     TopQ.push(SU);
    3540             :   }
    3541             :   void releaseBottomNode(SUnit *SU) override {
    3542             :     BottomQ.push(SU);
    3543             :   }
    3544             : };
    3545             : 
    3546             : } // end anonymous namespace
    3547             : 
    3548             : static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
    3549             :   bool Alternate = !ForceTopDown && !ForceBottomUp;
    3550             :   bool TopDown = !ForceBottomUp;
    3551             :   assert((TopDown || !ForceTopDown) &&
    3552             :          "-misched-topdown incompatible with -misched-bottomup");
    3553             :   return new ScheduleDAGMILive(
    3554             :       C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
    3555             : }
    3556             : 
    3557             : static MachineSchedRegistry ShufflerRegistry(
    3558             :   "shuffle", "Shuffle machine instructions alternating directions",
    3559             :   createInstructionShuffler);
    3560             : #endif // !NDEBUG
    3561             : 
    3562             : //===----------------------------------------------------------------------===//
    3563             : // GraphWriter support for ScheduleDAGMILive.
    3564             : //===----------------------------------------------------------------------===//
    3565             : 
    3566             : #ifndef NDEBUG
    3567             : namespace llvm {
    3568             : 
    3569             : template<> struct GraphTraits<
    3570             :   ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
    3571             : 
    3572             : template<>
    3573             : struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
    3574             :   DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
    3575             : 
    3576             :   static std::string getGraphName(const ScheduleDAG *G) {
    3577             :     return G->MF.getName();
    3578             :   }
    3579             : 
    3580             :   static bool renderGraphFromBottomUp() {
    3581             :     return true;
    3582             :   }
    3583             : 
    3584             :   static bool isNodeHidden(const SUnit *Node) {
    3585             :     if (ViewMISchedCutoff == 0)
    3586             :       return false;
    3587             :     return (Node->Preds.size() > ViewMISchedCutoff
    3588             :          || Node->Succs.size() > ViewMISchedCutoff);
    3589             :   }
    3590             : 
    3591             :   /// If you want to override the dot attributes printed for a particular
    3592             :   /// edge, override this method.
    3593             :   static std::string getEdgeAttributes(const SUnit *Node,
    3594             :                                        SUnitIterator EI,
    3595             :                                        const ScheduleDAG *Graph) {
    3596             :     if (EI.isArtificialDep())
    3597             :       return "color=cyan,style=dashed";
    3598             :     if (EI.isCtrlDep())
    3599             :       return "color=blue,style=dashed";
    3600             :     return "";
    3601             :   }
    3602             : 
    3603             :   static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
    3604             :     std::string Str;
    3605             :     raw_string_ostream SS(Str);
    3606             :     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
    3607             :     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
    3608             :       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
    3609             :     SS << "SU:" << SU->NodeNum;
    3610             :     if (DFS)
    3611             :       SS << " I:" << DFS->getNumInstrs(SU);
    3612             :     return SS.str();
    3613             :   }
    3614             : 
    3615             :   static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
    3616             :     return G->getGraphNodeLabel(SU);
    3617             :   }
    3618             : 
    3619             :   static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
    3620             :     std::string Str("shape=Mrecord");
    3621             :     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
    3622             :     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
    3623             :       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
    3624             :     if (DFS) {
    3625             :       Str += ",style=filled,fillcolor=\"#";
    3626             :       Str += DOT::getColorString(DFS->getSubtreeID(N));
    3627             :       Str += '"';
    3628             :     }
    3629             :     return Str;
    3630             :   }
    3631             : };
    3632             : 
    3633             : } // end namespace llvm
    3634             : #endif // NDEBUG
    3635             : 
    3636             : /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
    3637             : /// rendered using 'dot'.
    3638           0 : void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
    3639             : #ifndef NDEBUG
    3640             :   ViewGraph(this, Name, false, Title);
    3641             : #else
    3642           0 :   errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
    3643           0 :          << "systems with Graphviz or gv!\n";
    3644             : #endif  // NDEBUG
    3645           0 : }
    3646             : 
    3647             : /// Out-of-line implementation with no arguments is handy for gdb.
    3648           0 : void ScheduleDAGMI::viewGraph() {
    3649           0 :   viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
    3650      245322 : }

Generated by: LCOV version 1.13