LCOV - code coverage report
Current view: top level - lib/CodeGen - MachineScheduler.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 1282 1320 97.1 %
Date: 2017-09-14 15:23:50 Functions: 115 130 88.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // MachineScheduler schedules machine instructions after phi elimination. It
      11             : // preserves LiveIntervals so it can be invoked before register allocation.
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #include "llvm/CodeGen/MachineScheduler.h"
      16             : #include "llvm/ADT/ArrayRef.h"
      17             : #include "llvm/ADT/BitVector.h"
      18             : #include "llvm/ADT/DenseMap.h"
      19             : #include "llvm/ADT/PriorityQueue.h"
      20             : #include "llvm/ADT/STLExtras.h"
      21             : #include "llvm/ADT/SmallVector.h"
      22             : #include "llvm/ADT/iterator_range.h"
      23             : #include "llvm/Analysis/AliasAnalysis.h"
      24             : #include "llvm/CodeGen/LiveInterval.h"
      25             : #include "llvm/CodeGen/LiveIntervalAnalysis.h"
      26             : #include "llvm/CodeGen/MachineBasicBlock.h"
      27             : #include "llvm/CodeGen/MachineDominators.h"
      28             : #include "llvm/CodeGen/MachineFunction.h"
      29             : #include "llvm/CodeGen/MachineFunctionPass.h"
      30             : #include "llvm/CodeGen/MachineInstr.h"
      31             : #include "llvm/CodeGen/MachineLoopInfo.h"
      32             : #include "llvm/CodeGen/MachineOperand.h"
      33             : #include "llvm/CodeGen/MachinePassRegistry.h"
      34             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      35             : #include "llvm/CodeGen/MachineValueType.h"
      36             : #include "llvm/CodeGen/Passes.h"
      37             : #include "llvm/CodeGen/RegisterClassInfo.h"
      38             : #include "llvm/CodeGen/RegisterPressure.h"
      39             : #include "llvm/CodeGen/ScheduleDAG.h"
      40             : #include "llvm/CodeGen/ScheduleDAGInstrs.h"
      41             : #include "llvm/CodeGen/ScheduleDAGMutation.h"
      42             : #include "llvm/CodeGen/ScheduleDFS.h"
      43             : #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
      44             : #include "llvm/CodeGen/SlotIndexes.h"
      45             : #include "llvm/CodeGen/TargetPassConfig.h"
      46             : #include "llvm/CodeGen/TargetSchedule.h"
      47             : #include "llvm/MC/LaneBitmask.h"
      48             : #include "llvm/Pass.h"
      49             : #include "llvm/Support/CommandLine.h"
      50             : #include "llvm/Support/Compiler.h"
      51             : #include "llvm/Support/Debug.h"
      52             : #include "llvm/Support/ErrorHandling.h"
      53             : #include "llvm/Support/GraphWriter.h"
      54             : #include "llvm/Support/raw_ostream.h"
      55             : #include "llvm/Target/TargetInstrInfo.h"
      56             : #include "llvm/Target/TargetLowering.h"
      57             : #include "llvm/Target/TargetRegisterInfo.h"
      58             : #include "llvm/Target/TargetSubtargetInfo.h"
      59             : #include <algorithm>
      60             : #include <cassert>
      61             : #include <cstdint>
      62             : #include <iterator>
      63             : #include <limits>
      64             : #include <memory>
      65             : #include <string>
      66             : #include <tuple>
      67             : #include <utility>
      68             : #include <vector>
      69             : 
      70             : using namespace llvm;
      71             : 
      72             : #define DEBUG_TYPE "machine-scheduler"
      73             : 
      74             : namespace llvm {
      75             : 
      76       72306 : cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
      77      144612 :                            cl::desc("Force top-down list scheduling"));
      78       72306 : cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
      79      144612 :                             cl::desc("Force bottom-up list scheduling"));
      80             : cl::opt<bool>
      81       72306 : DumpCriticalPathLength("misched-dcpl", cl::Hidden,
      82      144612 :                        cl::desc("Print critical path length to stdout"));
      83             : 
      84             : } // end namespace llvm
      85             : 
      86             : #ifndef NDEBUG
      87             : static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
      88             :   cl::desc("Pop up a window to show MISched dags after they are processed"));
      89             : 
      90             : /// In some situations a few uninteresting nodes depend on nearly all other
      91             : /// nodes in the graph, provide a cutoff to hide them.
      92             : static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
      93             :   cl::desc("Hide nodes with more predecessor/successor than cutoff"));
      94             : 
      95             : static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
      96             :   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
      97             : 
      98             : static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
      99             :   cl::desc("Only schedule this function"));
     100             : static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
     101             :   cl::desc("Only schedule this MBB#"));
     102             : #else
     103             : static bool ViewMISchedDAGs = false;
     104             : #endif // NDEBUG
     105             : 
     106             : /// Avoid quadratic complexity in unusually large basic blocks by limiting the
     107             : /// size of the ready lists.
     108       72306 : static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
     109      216918 :   cl::desc("Limit ready list to N instructions"), cl::init(256));
     110             : 
     111       72306 : static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
     112      216918 :   cl::desc("Enable register pressure scheduling."), cl::init(true));
     113             : 
     114       72306 : static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
     115      216918 :   cl::desc("Enable cyclic critical path analysis."), cl::init(true));
     116             : 
     117       72306 : static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
     118      216918 :                                         cl::desc("Enable memop clustering."),
     119      289224 :                                         cl::init(true));
     120             : 
     121       72306 : static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
     122      144612 :   cl::desc("Verify machine instrs before and after machine scheduling"));
     123             : 
     124             : // DAG subtrees must have at least this many nodes.
     125             : static const unsigned MinSubtreeSize = 8;
     126             : 
     127             : // Pin the vtables to this file.
     128           0 : void MachineSchedStrategy::anchor() {}
     129             : 
     130           0 : void ScheduleDAGMutation::anchor() {}
     131             : 
     132             : //===----------------------------------------------------------------------===//
     133             : // Machine Instruction Scheduling Pass and Registry
     134             : //===----------------------------------------------------------------------===//
     135             : 
     136       17265 : MachineSchedContext::MachineSchedContext() {
     137       17265 :   RegClassInfo = new RegisterClassInfo();
     138       17265 : }
     139             : 
     140       34303 : MachineSchedContext::~MachineSchedContext() {
     141       17151 :   delete RegClassInfo;
     142       17152 : }
     143             : 
     144             : namespace {
     145             : 
     146             : /// Base class for a machine scheduler class that can run at any point.
     147       17151 : class MachineSchedulerBase : public MachineSchedContext,
     148             :                              public MachineFunctionPass {
     149             : public:
     150       17265 :   MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
     151             : 
     152             :   void print(raw_ostream &O, const Module* = nullptr) const override;
     153             : 
     154             : protected:
     155             :   void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
     156             : };
     157             : 
     158             : /// MachineScheduler runs after coalescing and before register allocation.
     159       31007 : class MachineScheduler : public MachineSchedulerBase {
     160             : public:
     161             :   MachineScheduler();
     162             : 
     163             :   void getAnalysisUsage(AnalysisUsage &AU) const override;
     164             : 
     165             :   bool runOnMachineFunction(MachineFunction&) override;
     166             : 
     167             :   static char ID; // Class identification, replacement for typeinfo
     168             : 
     169             : protected:
     170             :   ScheduleDAGInstrs *createMachineScheduler();
     171             : };
     172             : 
     173             : /// PostMachineScheduler runs after shortly before code emission.
     174        3296 : class PostMachineScheduler : public MachineSchedulerBase {
     175             : public:
     176             :   PostMachineScheduler();
     177             : 
     178             :   void getAnalysisUsage(AnalysisUsage &AU) const override;
     179             : 
     180             :   bool runOnMachineFunction(MachineFunction&) override;
     181             : 
     182             :   static char ID; // Class identification, replacement for typeinfo
     183             : 
     184             : protected:
     185             :   ScheduleDAGInstrs *createPostMachineScheduler();
     186             : };
     187             : 
     188             : } // end anonymous namespace
     189             : 
     190             : char MachineScheduler::ID = 0;
     191             : 
     192             : char &llvm::MachineSchedulerID = MachineScheduler::ID;
     193             : 
     194       20212 : INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
     195             :                       "Machine Instruction Scheduler", false, false)
     196       20212 : INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
     197       20212 : INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
     198       20212 : INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
     199       20212 : INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
     200      319277 : INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
     201             :                     "Machine Instruction Scheduler", false, false)
     202             : 
     203       31204 : MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
     204       15602 :   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
     205       15602 : }
     206             : 
     207       15551 : void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
     208       15551 :   AU.setPreservesCFG();
     209       15551 :   AU.addRequiredID(MachineDominatorsID);
     210       15551 :   AU.addRequired<MachineLoopInfo>();
     211       15551 :   AU.addRequired<AAResultsWrapperPass>();
     212       15551 :   AU.addRequired<TargetPassConfig>();
     213       15551 :   AU.addRequired<SlotIndexes>();
     214       15551 :   AU.addPreserved<SlotIndexes>();
     215       15551 :   AU.addRequired<LiveIntervals>();
     216       15551 :   AU.addPreserved<LiveIntervals>();
     217       15551 :   MachineFunctionPass::getAnalysisUsage(AU);
     218       15551 : }
     219             : 
     220             : char PostMachineScheduler::ID = 0;
     221             : 
     222             : char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
     223             : 
     224      156188 : INITIALIZE_PASS(PostMachineScheduler, "postmisched",
     225             :                 "PostRA Machine Instruction Scheduler", false, false)
     226             : 
     227        3326 : PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
     228        1663 :   initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
     229        1663 : }
     230             : 
     231        1653 : void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
     232        1653 :   AU.setPreservesCFG();
     233        1653 :   AU.addRequiredID(MachineDominatorsID);
     234        1653 :   AU.addRequired<MachineLoopInfo>();
     235        1653 :   AU.addRequired<TargetPassConfig>();
     236        1653 :   MachineFunctionPass::getAnalysisUsage(AU);
     237        1653 : }
     238             : 
     239             : MachinePassRegistry MachineSchedRegistry::Registry;
     240             : 
     241             : /// A dummy default scheduler factory indicates whether the scheduler
     242             : /// is overridden on the command line.
     243           0 : static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
     244           0 :   return nullptr;
     245             : }
     246             : 
     247             : /// MachineSchedOpt allows command line selection of the scheduler.
     248             : static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
     249             :                RegisterPassParser<MachineSchedRegistry>>
     250       72306 : MachineSchedOpt("misched",
     251      216918 :                 cl::init(&useDefaultMachineSched), cl::Hidden,
     252      289224 :                 cl::desc("Machine instruction scheduler to use"));
     253             : 
     254             : static MachineSchedRegistry
     255       72306 : DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
     256       72306 :                      useDefaultMachineSched);
     257             : 
     258       72306 : static cl::opt<bool> EnableMachineSched(
     259             :     "enable-misched",
     260      289224 :     cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
     261      216918 :     cl::Hidden);
     262             : 
     263       72306 : static cl::opt<bool> EnablePostRAMachineSched(
     264             :     "enable-post-misched",
     265      216918 :     cl::desc("Enable the post-ra machine instruction scheduling pass."),
     266      289224 :     cl::init(true), cl::Hidden);
     267             : 
     268             : /// Decrement this iterator until reaching the top or a non-debug instr.
     269             : static MachineBasicBlock::const_iterator
     270             : priorNonDebug(MachineBasicBlock::const_iterator I,
     271             :               MachineBasicBlock::const_iterator Beg) {
     272             :   assert(I != Beg && "reached the top of the region, cannot decrement");
     273     4753926 :   while (--I != Beg) {
     274     4059290 :     if (!I->isDebugValue())
     275             :       break;
     276             :   }
     277             :   return I;
     278             : }
     279             : 
     280             : /// Non-const version.
     281             : static MachineBasicBlock::iterator
     282     2363269 : priorNonDebug(MachineBasicBlock::iterator I,
     283             :               MachineBasicBlock::const_iterator Beg) {
     284     4726538 :   return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)
     285     2363269 :       .getNonConstIterator();
     286             : }
     287             : 
     288             : /// If this iterator is a debug value, increment until reaching the End or a
     289             : /// non-debug instruction.
     290             : static MachineBasicBlock::const_iterator
     291             : nextIfDebug(MachineBasicBlock::const_iterator I,
     292             :             MachineBasicBlock::const_iterator End) {
     293     1489322 :   for(; I != End; ++I) {
     294     2874140 :     if (!I->isDebugValue())
     295             :       break;
     296             :   }
     297             :   return I;
     298             : }
     299             : 
     300             : /// Non-const version.
     301             : static MachineBasicBlock::iterator
     302      789139 : nextIfDebug(MachineBasicBlock::iterator I,
     303             :             MachineBasicBlock::const_iterator End) {
     304     1578278 :   return nextIfDebug(MachineBasicBlock::const_iterator(I), End)
     305      789139 :       .getNonConstIterator();
     306             : }
     307             : 
     308             : /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
     309      103161 : ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
     310             :   // Select the scheduler, or set the default.
     311      103161 :   MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
     312      103161 :   if (Ctor != useDefaultMachineSched)
     313          15 :     return Ctor(this);
     314             : 
     315             :   // Get the default scheduler set by the target for this function.
     316      103146 :   ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
     317      103146 :   if (Scheduler)
     318             :     return Scheduler;
     319             : 
     320             :   // Default to GenericScheduler.
     321        5865 :   return createGenericSchedLive(this);
     322             : }
     323             : 
     324             : /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
     325             : /// the caller. We don't have a command line option to override the postRA
     326             : /// scheduler. The Target must configure it.
     327       11364 : ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
     328             :   // Get the postRA scheduler set by the target for this function.
     329       11364 :   ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
     330       11364 :   if (Scheduler)
     331             :     return Scheduler;
     332             : 
     333             :   // Default to GenericScheduler.
     334          65 :   return createGenericSchedPostRA(this);
     335             : }
     336             : 
     337             : /// Top-level MachineScheduler pass driver.
     338             : ///
     339             : /// Visit blocks in function order. Divide each block into scheduling regions
     340             : /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
     341             : /// consistent with the DAG builder, which traverses the interior of the
     342             : /// scheduling regions bottom-up.
     343             : ///
     344             : /// This design avoids exposing scheduling boundaries to the DAG builder,
     345             : /// simplifying the DAG builder's support for "special" target instructions.
     346             : /// At the same time the design allows target schedulers to operate across
     347             : /// scheduling boundaries, for example to bundle the boudary instructions
     348             : /// without reordering them. This creates complexity, because the target
     349             : /// scheduler must update the RegionBegin and RegionEnd positions cached by
     350             : /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
     351             : /// design would be to split blocks at scheduling boundaries, but LLVM has a
     352             : /// general bias against block splitting purely for implementation simplicity.
     353      135783 : bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
     354      135783 :   if (skipFunction(*mf.getFunction()))
     355             :     return false;
     356             : 
     357      135726 :   if (EnableMachineSched.getNumOccurrences()) {
     358         459 :     if (!EnableMachineSched)
     359             :       return false;
     360      135267 :   } else if (!mf.getSubtarget().enableMachineScheduler())
     361             :     return false;
     362             : 
     363             :   DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
     364             : 
     365             :   // Initialize the context of the pass.
     366      103161 :   MF = &mf;
     367      103161 :   MLI = &getAnalysis<MachineLoopInfo>();
     368      103161 :   MDT = &getAnalysis<MachineDominatorTree>();
     369      103161 :   PassConfig = &getAnalysis<TargetPassConfig>();
     370      206322 :   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
     371             : 
     372      103161 :   LIS = &getAnalysis<LiveIntervals>();
     373             : 
     374      103161 :   if (VerifyScheduling) {
     375             :     DEBUG(LIS->dump());
     376           1 :     MF->verify(this, "Before machine scheduling.");
     377             :   }
     378      103161 :   RegClassInfo->runOnMachineFunction(*MF);
     379             : 
     380             :   // Instantiate the selected scheduler for this target, function, and
     381             :   // optimization level.
     382      206322 :   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
     383      206322 :   scheduleRegions(*Scheduler, false);
     384             : 
     385             :   DEBUG(LIS->dump());
     386      103161 :   if (VerifyScheduling)
     387           1 :     MF->verify(this, "After machine scheduling.");
     388      103161 :   return true;
     389             : }
     390             : 
     391       15572 : bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
     392       15572 :   if (skipFunction(*mf.getFunction()))
     393             :     return false;
     394             : 
     395       15571 :   if (EnablePostRAMachineSched.getNumOccurrences()) {
     396          45 :     if (!EnablePostRAMachineSched)
     397             :       return false;
     398       15526 :   } else if (!mf.getSubtarget().enablePostRAScheduler()) {
     399             :     DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
     400             :     return false;
     401             :   }
     402             :   DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
     403             : 
     404             :   // Initialize the context of the pass.
     405       11364 :   MF = &mf;
     406       11364 :   MLI = &getAnalysis<MachineLoopInfo>();
     407       11364 :   PassConfig = &getAnalysis<TargetPassConfig>();
     408             : 
     409       11364 :   if (VerifyScheduling)
     410           0 :     MF->verify(this, "Before post machine scheduling.");
     411             : 
     412             :   // Instantiate the selected scheduler for this target, function, and
     413             :   // optimization level.
     414       22728 :   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
     415       22728 :   scheduleRegions(*Scheduler, true);
     416             : 
     417       11364 :   if (VerifyScheduling)
     418           0 :     MF->verify(this, "After post machine scheduling.");
     419       11364 :   return true;
     420             : }
     421             : 
     422             : /// Return true of the given instruction should not be included in a scheduling
     423             : /// region.
     424             : ///
     425             : /// MachineScheduler does not currently support scheduling across calls. To
     426             : /// handle calls, the DAG builder needs to be modified to create register
     427             : /// anti/output dependencies on the registers clobbered by the call's regmask
     428             : /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
     429             : /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
     430             : /// the boundary, but there would be no benefit to postRA scheduling across
     431             : /// calls this late anyway.
     432     3277251 : static bool isSchedBoundary(MachineBasicBlock::iterator MI,
     433             :                             MachineBasicBlock *MBB,
     434             :                             MachineFunction *MF,
     435             :                             const TargetInstrInfo *TII) {
     436     9652916 :   return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
     437             : }
     438             : 
     439             : /// A region of an MBB for scheduling.
     440             : namespace {
     441             : struct SchedRegion {
     442             :   /// RegionBegin is the first instruction in the scheduling region, and
     443             :   /// RegionEnd is either MBB->end() or the scheduling boundary after the
     444             :   /// last instruction in the scheduling region. These iterators cannot refer
     445             :   /// to instructions outside of the identified scheduling region because
     446             :   /// those may be reordered before scheduling this region.
     447             :   MachineBasicBlock::iterator RegionBegin;
     448             :   MachineBasicBlock::iterator RegionEnd;
     449             :   unsigned NumRegionInstrs;
     450             : 
     451             :   SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
     452     1040622 :               unsigned N) :
     453     1040622 :     RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
     454             : };
     455             : } // end anonymous namespace
     456             : 
     457             : using MBBRegionsVector = SmallVector<SchedRegion, 16>;
     458             : 
     459             : static void
     460      252590 : getSchedRegions(MachineBasicBlock *MBB,
     461             :                 MBBRegionsVector &Regions,
     462             :                 bool RegionsTopDown) {
     463      252590 :   MachineFunction *MF = MBB->getParent();
     464      252590 :   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
     465             : 
     466      252590 :   MachineBasicBlock::iterator I = nullptr;
     467     1293212 :   for(MachineBasicBlock::iterator RegionEnd = MBB->end();
     468     3627046 :       RegionEnd != MBB->begin(); RegionEnd = I) {
     469             : 
     470             :     // Avoid decrementing RegionEnd for blocks with no terminator.
     471     2332353 :     if (RegionEnd != MBB->end() ||
     472     1793949 :         isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
     473             :       --RegionEnd;
     474             :     }
     475             : 
     476             :     // The next region starts above the previous region. Look backward in the
     477             :     // instruction stream until we find the nearest boundary.
     478     1040622 :     unsigned NumRegionInstrs = 0;
     479     1040622 :     I = RegionEnd;
     480     6554502 :     for (;I != MBB->begin(); --I) {
     481     6052284 :       MachineInstr &MI = *std::prev(I);
     482     3026142 :       if (isSchedBoundary(&MI, &*MBB, MF, TII))
     483             :         break;
     484     2236629 :       if (!MI.isDebugValue())
     485             :         // MBB::size() uses instr_iterator to count. Here we need a bundle to
     486             :         // count as a single instruction.
     487     2218893 :         ++NumRegionInstrs;
     488             :     }
     489             : 
     490     2081244 :     Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
     491             :   }
     492             : 
     493      252590 :   if (RegionsTopDown)
     494        9753 :     std::reverse(Regions.begin(), Regions.end());
     495      252590 : }
     496             : 
     497             : /// Main driver for both MachineScheduler and PostMachineScheduler.
     498      114525 : void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
     499             :                                            bool FixKillFlags) {
     500             :   // Visit all machine basic blocks.
     501             :   //
     502             :   // TODO: Visit blocks in global postorder or postorder within the bottom-up
     503             :   // loop tree. Then we can optionally compute global RegPressure.
     504      343575 :   for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
     505      367115 :        MBB != MBBEnd; ++MBB) {
     506             : 
     507      505180 :     Scheduler.startBlock(&*MBB);
     508             : 
     509             : #ifndef NDEBUG
     510             :     if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
     511             :       continue;
     512             :     if (SchedOnlyBlock.getNumOccurrences()
     513             :         && (int)SchedOnlyBlock != MBB->getNumber())
     514             :       continue;
     515             : #endif
     516             : 
     517             :     // Break the block into scheduling regions [I, RegionEnd). RegionEnd
     518             :     // points to the scheduling boundary at the bottom of the region. The DAG
     519             :     // does not include RegionEnd, but the region does (i.e. the next
     520             :     // RegionEnd is above the previous RegionBegin). If the current block has
     521             :     // no terminator then RegionEnd == MBB->end() for the bottom region.
     522             :     //
     523             :     // All the regions of MBB are first found and stored in MBBRegions, which
     524             :     // will be processed (MBB) top-down if initialized with true.
     525             :     //
     526             :     // The Scheduler may insert instructions during either schedule() or
     527             :     // exitRegion(), even for empty regions. So the local iterators 'I' and
     528             :     // 'RegionEnd' are invalid across these calls. Instructions must not be
     529             :     // added to other regions than the current one without updating MBBRegions.
     530             : 
     531      505180 :     MBBRegionsVector MBBRegions;
     532      505180 :     getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
     533     1293212 :     for (MBBRegionsVector::iterator R = MBBRegions.begin();
     534     1293212 :          R != MBBRegions.end(); ++R) {
     535     1040622 :       MachineBasicBlock::iterator I = R->RegionBegin;
     536     1040622 :       MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
     537     1040622 :       unsigned NumRegionInstrs = R->NumRegionInstrs;
     538             : 
     539             :       // Notify the scheduler of the region, even if we may skip scheduling
     540             :       // it. Perhaps it still needs to be bundled.
     541     2081244 :       Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
     542             : 
     543             :       // Skip empty scheduling regions (0 or 1 schedulable instructions).
     544     2629539 :       if (I == RegionEnd || I == std::prev(RegionEnd)) {
     545             :         // Close the current region. Bundle the terminator if needed.
     546             :         // This invalidates 'RegionEnd' and 'I'.
     547      685971 :         Scheduler.exitRegion();
     548      685971 :         continue;
     549             :       }
     550             :       DEBUG(dbgs() << "********** MI Scheduling **********\n");
     551             :       DEBUG(dbgs() << MF->getName()
     552             :             << ":BB#" << MBB->getNumber() << " " << MBB->getName()
     553             :             << "\n  From: " << *I << "    To: ";
     554             :             if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
     555             :             else dbgs() << "End";
     556             :             dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
     557      354651 :       if (DumpCriticalPathLength) {
     558           0 :         errs() << MF->getName();
     559           0 :         errs() << ":BB# " << MBB->getNumber();
     560           0 :         errs() << " " << MBB->getName() << " \n";
     561             :       }
     562             : 
     563             :       // Schedule a region: possibly reorder instructions.
     564             :       // This invalidates the original region iterators.
     565      354651 :       Scheduler.schedule();
     566             : 
     567             :       // Close the current region.
     568      354651 :       Scheduler.exitRegion();
     569             :     }
     570      252590 :     Scheduler.finishBlock();
     571             :     // FIXME: Ideally, no further passes should rely on kill flags. However,
     572             :     // thumb2 size reduction is currently an exception, so the PostMIScheduler
     573             :     // needs to do this.
     574      252590 :     if (FixKillFlags)
     575       13161 :       Scheduler.fixupKills(*MBB);
     576             :   }
     577      114525 :   Scheduler.finalizeSchedule();
     578      114525 : }
     579             : 
     580           0 : void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
     581             :   // unimplemented
     582           0 : }
     583             : 
     584             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
     585             : LLVM_DUMP_METHOD void ReadyQueue::dump() const {
     586             :   dbgs() << "Queue " << Name << ": ";
     587             :   for (const SUnit *SU : Queue)
     588             :     dbgs() << SU->NodeNum << " ";
     589             :   dbgs() << "\n";
     590             : }
     591             : #endif
     592             : 
     593             : //===----------------------------------------------------------------------===//
     594             : // ScheduleDAGMI - Basic machine instruction scheduling. This is
     595             : // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
     596             : // virtual registers.
     597             : // ===----------------------------------------------------------------------===/
     598             : 
     599             : // Provide a vtable anchor.
     600             : ScheduleDAGMI::~ScheduleDAGMI() = default;
     601             : 
     602       26507 : bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
     603       26507 :   return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
     604             : }
     605             : 
     606       82974 : bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
     607       82974 :   if (SuccSU != &ExitSU) {
     608             :     // Do not use WillCreateCycle, it assumes SD scheduling.
     609             :     // If Pred is reachable from Succ, then the edge creates a cycle.
     610      141140 :     if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
     611             :       return false;
     612      129354 :     Topo.AddPred(SuccSU, PredDep.getSUnit());
     613             :   }
     614       77081 :   SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
     615             :   // Return true regardless of whether a new edge needed to be inserted.
     616       77081 :   return true;
     617             : }
     618             : 
     619             : /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
     620             : /// NumPredsLeft reaches zero, release the successor node.
     621             : ///
     622             : /// FIXME: Adjust SuccSU height based on MinLatency.
     623      295580 : void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
     624      295580 :   SUnit *SuccSU = SuccEdge->getSUnit();
     625             : 
     626      295580 :   if (SuccEdge->isWeak()) {
     627        5264 :     --SuccSU->WeakPredsLeft;
     628        5264 :     if (SuccEdge->isCluster())
     629        5264 :       NextClusterSucc = SuccSU;
     630             :     return;
     631             :   }
     632             : #ifndef NDEBUG
     633             :   if (SuccSU->NumPredsLeft == 0) {
     634             :     dbgs() << "*** Scheduling failed! ***\n";
     635             :     SuccSU->dump(this);
     636             :     dbgs() << " has been released too many times!\n";
     637             :     llvm_unreachable(nullptr);
     638             :   }
     639             : #endif
     640             :   // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
     641             :   // CurrCycle may have advanced since then.
     642      290316 :   if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
     643      200854 :     SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
     644             : 
     645      290316 :   --SuccSU->NumPredsLeft;
     646      290316 :   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
     647      185276 :     SchedImpl->releaseTopNode(SuccSU);
     648             : }
     649             : 
     650             : /// releaseSuccessors - Call releaseSucc on each of SU's successors.
     651      461592 : void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
     652     1680356 :   for (SDep &Succ : SU->Succs)
     653      295580 :     releaseSucc(SU, &Succ);
     654      461592 : }
     655             : 
     656             : /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
     657             : /// NumSuccsLeft reaches zero, release the predecessor node.
     658             : ///
     659             : /// FIXME: Adjust PredSU height based on MinLatency.
     660     3641937 : void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
     661     3641937 :   SUnit *PredSU = PredEdge->getSUnit();
     662             : 
     663     3641937 :   if (PredEdge->isWeak()) {
     664       31381 :     --PredSU->WeakSuccsLeft;
     665       28511 :     if (PredEdge->isCluster())
     666       28511 :       NextClusterPred = PredSU;
     667             :     return;
     668             :   }
     669             : #ifndef NDEBUG
     670             :   if (PredSU->NumSuccsLeft == 0) {
     671             :     dbgs() << "*** Scheduling failed! ***\n";
     672             :     PredSU->dump(this);
     673             :     dbgs() << " has been released too many times!\n";
     674             :     llvm_unreachable(nullptr);
     675             :   }
     676             : #endif
     677             :   // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
     678             :   // CurrCycle may have advanced since then.
     679     3610556 :   if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
     680     2373229 :     PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
     681             : 
     682     3610556 :   --PredSU->NumSuccsLeft;
     683     3610556 :   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
     684     3577912 :     SchedImpl->releaseBottomNode(PredSU);
     685             : }
     686             : 
     687             : /// releasePredecessors - Call releasePred on each of SU's predecessors.
     688     2390786 : void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
     689    10814295 :   for (SDep &Pred : SU->Preds)
     690     3641937 :     releasePred(SU, &Pred);
     691     2390786 : }
     692             : 
     693      268121 : void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
     694      268121 :   ScheduleDAGInstrs::startBlock(bb);
     695      536242 :   SchedImpl->enterMBB(bb);
     696      268121 : }
     697             : 
     698      268714 : void ScheduleDAGMI::finishBlock() {
     699      537428 :   SchedImpl->leaveMBB();
     700      268714 :   ScheduleDAGInstrs::finishBlock();
     701      268714 : }
     702             : 
     703             : /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
     704             : /// crossing a scheduling boundary. [begin, end) includes all instructions in
     705             : /// the region, including the boundary itself and single-instruction regions
     706             : /// that don't get scheduled.
     707     1056612 : void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
     708             :                                      MachineBasicBlock::iterator begin,
     709             :                                      MachineBasicBlock::iterator end,
     710             :                                      unsigned regioninstrs)
     711             : {
     712     1056612 :   ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
     713             : 
     714     2113224 :   SchedImpl->initPolicy(begin, end, regioninstrs);
     715     1056612 : }
     716             : 
     717             : /// This is normally called from the main scheduler loop but may also be invoked
     718             : /// by the scheduling strategy to perform additional code motion.
     719      195279 : void ScheduleDAGMI::moveInstruction(
     720             :   MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
     721             :   // Advance RegionBegin if the first instruction moves down.
     722      390558 :   if (&*RegionBegin == MI)
     723       10545 :     ++RegionBegin;
     724             : 
     725             :   // Update the instruction stream.
     726      390558 :   BB->splice(InsertPos, BB, MI);
     727             : 
     728             :   // Update LiveIntervals
     729      195279 :   if (LIS)
     730      191385 :     LIS->handleMove(*MI, /*UpdateFlags=*/true);
     731             : 
     732             :   // Recede RegionBegin if an instruction moves above the first.
     733      390558 :   if (RegionBegin == InsertPos)
     734        1124 :     RegionBegin = MI;
     735      195279 : }
     736             : 
     737     2142556 : bool ScheduleDAGMI::checkSchedLimit() {
     738             : #ifndef NDEBUG
     739             :   if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
     740             :     CurrentTop = CurrentBottom;
     741             :     return false;
     742             :   }
     743             :   ++NumInstrsScheduled;
     744             : #endif
     745     2142556 :   return true;
     746             : }
     747             : 
     748             : /// Per-region scheduling driver, called back from
     749             : /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
     750             : /// does not consider liveness or register pressure. It is useful for PostRA
     751             : /// scheduling and potentially other custom schedulers.
     752        7652 : void ScheduleDAGMI::schedule() {
     753             :   DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
     754             :   DEBUG(SchedImpl->dumpPolicy());
     755             : 
     756             :   // Build the DAG.
     757        7652 :   buildSchedGraph(AA);
     758             : 
     759        7652 :   Topo.InitDAGTopologicalSorting();
     760             : 
     761        7652 :   postprocessDAG();
     762             : 
     763       30608 :   SmallVector<SUnit*, 8> TopRoots, BotRoots;
     764        7652 :   findRootsAndBiasEdges(TopRoots, BotRoots);
     765             : 
     766             :   // Initialize the strategy before modifying the DAG.
     767             :   // This may initialize a DFSResult to be used for queue priority.
     768       15304 :   SchedImpl->initialize(this);
     769             : 
     770             :   DEBUG(
     771             :     if (EntrySU.getInstr() != nullptr)
     772             :       EntrySU.dumpAll(this);
     773             :     for (const SUnit &SU : SUnits)
     774             :       SU.dumpAll(this);
     775             :     if (ExitSU.getInstr() != nullptr)
     776             :       ExitSU.dumpAll(this);
     777             :   );
     778        7652 :   if (ViewMISchedDAGs) viewGraph();
     779             : 
     780             :   // Initialize ready queues now that the DAG and priority data are finalized.
     781       15304 :   initQueues(TopRoots, BotRoots);
     782             : 
     783        7652 :   bool IsTopNode = false;
     784             :   while (true) {
     785             :     DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
     786       90750 :     SUnit *SU = SchedImpl->pickNode(IsTopNode);
     787       45375 :     if (!SU) break;
     788             : 
     789             :     assert(!SU->isScheduled && "Node already scheduled");
     790       37723 :     if (!checkSchedLimit())
     791             :       break;
     792             : 
     793       37723 :     MachineInstr *MI = SU->getInstr();
     794       37723 :     if (IsTopNode) {
     795             :       assert(SU->isTopReady() && "node still has unscheduled dependencies");
     796       75446 :       if (&*CurrentTop == MI)
     797      101487 :         CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
     798             :       else
     799        3894 :         moveInstruction(MI, CurrentTop);
     800             :     } else {
     801             :       assert(SU->isBottomReady() && "node still has unscheduled dependencies");
     802             :       MachineBasicBlock::iterator priorII =
     803           0 :         priorNonDebug(CurrentBottom, CurrentTop);
     804           0 :       if (&*priorII == MI)
     805           0 :         CurrentBottom = priorII;
     806             :       else {
     807           0 :         if (&*CurrentTop == MI)
     808           0 :           CurrentTop = nextIfDebug(++CurrentTop, priorII);
     809           0 :         moveInstruction(MI, CurrentBottom);
     810           0 :         CurrentBottom = MI;
     811             :       }
     812             :     }
     813             :     // Notify the scheduling strategy before updating the DAG.
     814             :     // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
     815             :     // runs, it can then use the accurate ReadyCycle time to determine whether
     816             :     // newly released nodes can move to the readyQ.
     817       75446 :     SchedImpl->schedNode(SU, IsTopNode);
     818             : 
     819       37723 :     updateQueues(SU, IsTopNode);
     820       37723 :   }
     821             :   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
     822             : 
     823        7652 :   placeDebugValues();
     824             : 
     825             :   DEBUG({
     826             :       unsigned BBNum = begin()->getParent()->getNumber();
     827             :       dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
     828             :       dumpSchedule();
     829             :       dbgs() << '\n';
     830             :     });
     831        7652 : }
     832             : 
     833             : /// Apply each ScheduleDAGMutation step in order.
     834      353326 : void ScheduleDAGMI::postprocessDAG() {
     835     2137568 :   for (auto &m : Mutations)
     836      724264 :     m->apply(this);
     837      353326 : }
     838             : 
     839      354917 : void ScheduleDAGMI::
     840             : findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
     841             :                       SmallVectorImpl<SUnit*> &BotRoots) {
     842     3564298 :   for (SUnit &SU : SUnits) {
     843             :     assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
     844             : 
     845             :     // Order predecessors so DFSResult follows the critical path.
     846     2144630 :     SU.biasCriticalPath();
     847             : 
     848             :     // A SUnit is ready to top schedule if it has no predecessors.
     849     2144630 :     if (!SU.NumPredsLeft)
     850      956930 :       TopRoots.push_back(&SU);
     851             :     // A SUnit is ready to bottom schedule if it has no successors.
     852     2144630 :     if (!SU.NumSuccsLeft)
     853      304342 :       BotRoots.push_back(&SU);
     854             :   }
     855      354917 :   ExitSU.biasCriticalPath();
     856      354917 : }
     857             : 
     858             : /// Identify DAG roots and setup scheduler queues.
     859      354911 : void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
     860             :                                ArrayRef<SUnit*> BotRoots) {
     861      354911 :   NextClusterSucc = nullptr;
     862      354911 :   NextClusterPred = nullptr;
     863             : 
     864             :   // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
     865             :   //
     866             :   // Nodes with unreleased weak edges can still be roots.
     867             :   // Release top roots in forward order.
     868     1666736 :   for (SUnit *SU : TopRoots)
     869     1913828 :     SchedImpl->releaseTopNode(SU);
     870             : 
     871             :   // Release bottom roots in reverse order so the higher priority nodes appear
     872             :   // first. This is more natural and slightly more efficient.
     873             :   for (SmallVectorImpl<SUnit*>::const_reverse_iterator
     874     1014158 :          I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
     875      913008 :     SchedImpl->releaseBottomNode(*I);
     876             :   }
     877             : 
     878      354911 :   releaseSuccessors(&EntrySU);
     879      354911 :   releasePredecessors(&ExitSU);
     880             : 
     881      709822 :   SchedImpl->registerRoots();
     882             : 
     883             :   // Advance past initial DebugValues.
     884      709822 :   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
     885      354911 :   CurrentBottom = RegionEnd;
     886      354911 : }
     887             : 
     888             : /// Update scheduler queues after scheduling an instruction.
     889     2142556 : void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
     890             :   // Release dependent instructions for scheduling.
     891     2142556 :   if (IsTopNode)
     892      106681 :     releaseSuccessors(SU);
     893             :   else
     894     2035875 :     releasePredecessors(SU);
     895             : 
     896     2142556 :   SU->isScheduled = true;
     897     2142556 : }
     898             : 
     899             : /// Reinsert any remaining debug_values, just like the PostRA scheduler.
     900      354952 : void ScheduleDAGMI::placeDebugValues() {
     901             :   // If first instruction was a DBG_VALUE then put it back.
     902      354952 :   if (FirstDbgValue) {
     903        4564 :     BB->splice(RegionBegin, BB, FirstDbgValue);
     904        4564 :     RegionBegin = FirstDbgValue;
     905             :   }
     906             : 
     907             :   for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
     908     1435166 :          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
     909       15358 :     std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
     910       15358 :     MachineInstr *DbgValue = P.first;
     911       30716 :     MachineBasicBlock::iterator OrigPrevMI = P.second;
     912       30716 :     if (&*RegionBegin == DbgValue)
     913          55 :       ++RegionBegin;
     914       46074 :     BB->splice(++OrigPrevMI, BB, DbgValue);
     915       30716 :     if (OrigPrevMI == std::prev(RegionEnd))
     916         643 :       RegionEnd = DbgValue;
     917             :   }
     918      709904 :   DbgValues.clear();
     919      354952 :   FirstDbgValue = nullptr;
     920      354952 : }
     921             : 
     922             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
     923             : LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
     924             :   for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
     925             :     if (SUnit *SU = getSUnit(&(*MI)))
     926             :       SU->dump(this);
     927             :     else
     928             :       dbgs() << "Missing SUnit\n";
     929             :   }
     930             : }
     931             : #endif
     932             : 
     933             : //===----------------------------------------------------------------------===//
     934             : // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
     935             : // preservation.
     936             : //===----------------------------------------------------------------------===//
     937             : 
     938      912937 : ScheduleDAGMILive::~ScheduleDAGMILive() {
     939      103161 :   delete DFSResult;
     940      190810 : }
     941             : 
     942     1235727 : void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
     943     1235727 :   const MachineInstr &MI = *SU.getInstr();
     944     7234467 :   for (const MachineOperand &MO : MI.operands()) {
     945     5998740 :     if (!MO.isReg())
     946     6880038 :       continue;
     947     1006336 :     if (!MO.readsReg())
     948     1006336 :       continue;
     949     2870183 :     if (TrackLaneMasks && !MO.isUse())
     950       92009 :       continue;
     951             : 
     952     2686165 :     unsigned Reg = MO.getReg();
     953     6718960 :     if (!TargetRegisterInfo::isVirtualRegister(Reg))
     954     1346630 :       continue;
     955             : 
     956             :     // Ignore re-defs.
     957     1339535 :     if (TrackLaneMasks) {
     958      284645 :       bool FoundDef = false;
     959     1732029 :       for (const MachineOperand &MO2 : MI.operands()) {
     960     2418188 :         if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
     961             :           FoundDef = true;
     962             :           break;
     963             :         }
     964             :       }
     965      284645 :       if (FoundDef)
     966        6603 :         continue;
     967             :     }
     968             : 
     969             :     // Record this local VReg use.
     970     2665864 :     VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
     971    41033868 :     for (; UI != VRegUses.end(); ++UI) {
     972    12375428 :       if (UI->SU == &SU)
     973             :         break;
     974             :     }
     975     2696268 :     if (UI == VRegUses.end())
     976     2605056 :       VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
     977             :   }
     978     1235727 : }
     979             : 
     980             : /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
     981             : /// crossing a scheduling boundary. [begin, end) includes all instructions in
     982             : /// the region, including the boundary itself and single-instruction regions
     983             : /// that don't get scheduled.
     984     1038143 : void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
     985             :                                 MachineBasicBlock::iterator begin,
     986             :                                 MachineBasicBlock::iterator end,
     987             :                                 unsigned regioninstrs)
     988             : {
     989             :   // ScheduleDAGMI initializes SchedImpl's per-region policy.
     990     1038143 :   ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
     991             : 
     992             :   // For convenience remember the end of the liveness region.
     993     3100591 :   LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
     994             : 
     995     1038143 :   SUPressureDiffs.clear();
     996             : 
     997     2076286 :   ShouldTrackPressure = SchedImpl->shouldTrackPressure();
     998     2076286 :   ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
     999             : 
    1000             :   assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
    1001             :          "ShouldTrackLaneMasks requires ShouldTrackPressure");
    1002     1038143 : }
    1003             : 
    1004             : // Setup the register pressure trackers for the top scheduled top and bottom
    1005             : // scheduled regions.
    1006       92177 : void ScheduleDAGMILive::initRegPressure() {
    1007      184354 :   VRegUses.clear();
    1008      184354 :   VRegUses.setUniverse(MRI.getNumVirtRegs());
    1009     1604435 :   for (SUnit &SU : SUnits)
    1010     1235727 :     collectVRegUses(SU);
    1011             : 
    1012      276531 :   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
    1013       92177 :                     ShouldTrackLaneMasks, false);
    1014      276531 :   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
    1015       92177 :                     ShouldTrackLaneMasks, false);
    1016             : 
    1017             :   // Close the RPTracker to finalize live ins.
    1018       92177 :   RPTracker.closeRegion();
    1019             : 
    1020             :   DEBUG(RPTracker.dump());
    1021             : 
    1022             :   // Initialize the live ins and live outs.
    1023      184354 :   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
    1024      184354 :   BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
    1025             : 
    1026             :   // Close one end of the tracker so we can call
    1027             :   // getMaxUpward/DownwardPressureDelta before advancing across any
    1028             :   // instructions. This converts currently live regs into live ins/outs.
    1029       92177 :   TopRPTracker.closeTop();
    1030       92177 :   BotRPTracker.closeBottom();
    1031             : 
    1032       92177 :   BotRPTracker.initLiveThru(RPTracker);
    1033      184354 :   if (!BotRPTracker.getLiveThru().empty()) {
    1034      184354 :     TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
    1035             :     DEBUG(dbgs() << "Live Thru: ";
    1036             :           dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
    1037             :   };
    1038             : 
    1039             :   // For each live out vreg reduce the pressure change associated with other
    1040             :   // uses of the same vreg below the live-out reaching def.
    1041      184354 :   updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
    1042             : 
    1043             :   // Account for liveness generated by the region boundary.
    1044      184354 :   if (LiveRegionEnd != RegionEnd) {
    1045      173250 :     SmallVector<RegisterMaskPair, 8> LiveUses;
    1046       86625 :     BotRPTracker.recede(&LiveUses);
    1047       86625 :     updatePressureDiffs(LiveUses);
    1048             :   }
    1049             : 
    1050             :   DEBUG(
    1051             :     dbgs() << "Top Pressure:\n";
    1052             :     dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
    1053             :     dbgs() << "Bottom Pressure:\n";
    1054             :     dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
    1055             :   );
    1056             : 
    1057             :   assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
    1058             : 
    1059             :   // Cache the list of excess pressure sets in this region. This will also track
    1060             :   // the max pressure in the scheduled code for these sets.
    1061      184354 :   RegionCriticalPSets.clear();
    1062             :   const std::vector<unsigned> &RegionPressure =
    1063       92177 :     RPTracker.getPressure().MaxSetPressure;
    1064     2697853 :   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
    1065     2513499 :     unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
    1066     5026998 :     if (RegionPressure[i] > Limit) {
    1067             :       DEBUG(dbgs() << TRI->getRegPressureSetName(i)
    1068             :             << " Limit " << Limit
    1069             :             << " Actual " << RegionPressure[i] << "\n");
    1070        7161 :       RegionCriticalPSets.push_back(PressureChange(i));
    1071             :     }
    1072             :   }
    1073             :   DEBUG(dbgs() << "Excess PSets: ";
    1074             :         for (const PressureChange &RCPS : RegionCriticalPSets)
    1075             :           dbgs() << TRI->getRegPressureSetName(
    1076             :             RCPS.getPSet()) << " ";
    1077             :         dbgs() << "\n");
    1078       92177 : }
    1079             : 
    1080     1235727 : void ScheduleDAGMILive::
    1081             : updateScheduledPressure(const SUnit *SU,
    1082             :                         const std::vector<unsigned> &NewMaxPressure) {
    1083     2471454 :   const PressureDiff &PDiff = getPressureDiff(SU);
    1084     2471454 :   unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
    1085     6041482 :   for (const PressureChange &PC : PDiff) {
    1086     3540280 :     if (!PC.isValid())
    1087             :       break;
    1088     4668602 :     unsigned ID = PC.getPSet();
    1089     2890079 :     while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
    1090       48144 :       ++CritIdx;
    1091     2697503 :     if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
    1092      213206 :       if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
    1093      106603 :           && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
    1094       18691 :         RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
    1095             :     }
    1096     2334301 :     unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
    1097     2334301 :     if (NewMaxPressure[ID] >= Limit - 2) {
    1098             :       DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
    1099             :             << NewMaxPressure[ID]
    1100             :             << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit
    1101             :             << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
    1102             :     }
    1103             :   }
    1104     1235727 : }
    1105             : 
    1106             : /// Update the PressureDiff array for liveness after scheduling this
    1107             : /// instruction.
    1108     1356227 : void ScheduleDAGMILive::updatePressureDiffs(
    1109             :     ArrayRef<RegisterMaskPair> LiveUses) {
    1110     3866368 :   for (const RegisterMaskPair &P : LiveUses) {
    1111     1153914 :     unsigned Reg = P.RegUnit;
    1112             :     /// FIXME: Currently assuming single-use physregs.
    1113     2307828 :     if (!TRI->isVirtualRegister(Reg))
    1114      167037 :       continue;
    1115             : 
    1116      986877 :     if (ShouldTrackLaneMasks) {
    1117             :       // If the register has just become live then other uses won't change
    1118             :       // this fact anymore => decrement pressure.
    1119             :       // If the register has just become dead then other uses make it come
    1120             :       // back to life => increment pressure.
    1121      594254 :       bool Decrement = P.LaneMask.any();
    1122             : 
    1123             :       for (const VReg2SUnit &V2SU
    1124     2376019 :            : make_range(VRegUses.find(Reg), VRegUses.end())) {
    1125      445192 :         SUnit &SU = *V2SU.SU;
    1126      445192 :         if (SU.isScheduled || &SU == &ExitSU)
    1127      185822 :           continue;
    1128             : 
    1129      518740 :         PressureDiff &PDiff = getPressureDiff(&SU);
    1130      259370 :         PDiff.addPressureChange(Reg, Decrement, &MRI);
    1131             :         DEBUG(
    1132             :           dbgs() << "  UpdateRegP: SU(" << SU.NodeNum << ") "
    1133             :                  << PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask)
    1134             :                  << ' ' << *SU.getInstr();
    1135             :           dbgs() << "              to ";
    1136             :           PDiff.dump(*TRI);
    1137             :         );
    1138             :       }
    1139             :     } else {
    1140             :       assert(P.LaneMask.any());
    1141             :       DEBUG(dbgs() << "  LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
    1142             :       // This may be called before CurrentBottom has been initialized. However,
    1143             :       // BotRPTracker must have a valid position. We want the value live into the
    1144             :       // instruction or live out of the block, so ask for the previous
    1145             :       // instruction's live-out.
    1146      689750 :       const LiveInterval &LI = LIS->getInterval(Reg);
    1147             :       VNInfo *VNI;
    1148             :       MachineBasicBlock::const_iterator I =
    1149     2759000 :         nextIfDebug(BotRPTracker.getPos(), BB->end());
    1150     2759000 :       if (I == BB->end())
    1151       72888 :         VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
    1152             :       else {
    1153     1959918 :         LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
    1154      653306 :         VNI = LRQ.valueIn();
    1155             :       }
    1156             :       // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
    1157             :       assert(VNI && "No live value at use.");
    1158             :       for (const VReg2SUnit &V2SU
    1159     6525614 :            : make_range(VRegUses.find(Reg), VRegUses.end())) {
    1160     1538432 :         SUnit *SU = V2SU.SU;
    1161             :         // If this use comes before the reaching def, it cannot be a last use,
    1162             :         // so decrease its pressure change.
    1163     2820134 :         if (!SU->isScheduled && SU != &ExitSU) {
    1164             :           LiveQueryResult LRQ =
    1165     2563404 :               LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
    1166     1281702 :           if (LRQ.valueIn() == VNI) {
    1167     2048316 :             PressureDiff &PDiff = getPressureDiff(SU);
    1168     1024158 :             PDiff.addPressureChange(Reg, true, &MRI);
    1169             :             DEBUG(
    1170             :               dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
    1171             :                      << *SU->getInstr();
    1172             :               dbgs() << "              to ";
    1173             :               PDiff.dump(*TRI);
    1174             :             );
    1175             :           }
    1176             :         }
    1177             :       }
    1178             :     }
    1179             :   }
    1180     1356227 : }
    1181             : 
    1182             : /// schedule - Called back from MachineScheduler::runOnMachineFunction
    1183             : /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
    1184             : /// only includes instructions that have DAG nodes, not scheduling boundaries.
    1185             : ///
    1186             : /// This is a skeletal driver, with all the functionality pushed into helpers,
    1187             : /// so that it can be easily extended by experimental schedulers. Generally,
    1188             : /// implementing MachineSchedStrategy should be sufficient to implement a new
    1189             : /// scheduling algorithm. However, if a scheduler further subclasses
    1190             : /// ScheduleDAGMILive then it will want to override this virtual method in order
    1191             : /// to update any specialized state.
    1192      345674 : void ScheduleDAGMILive::schedule() {
    1193             :   DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
    1194             :   DEBUG(SchedImpl->dumpPolicy());
    1195      345674 :   buildDAGWithRegPressure();
    1196             : 
    1197      345674 :   Topo.InitDAGTopologicalSorting();
    1198             : 
    1199      345674 :   postprocessDAG();
    1200             : 
    1201     1382696 :   SmallVector<SUnit*, 8> TopRoots, BotRoots;
    1202      345674 :   findRootsAndBiasEdges(TopRoots, BotRoots);
    1203             : 
    1204             :   // Initialize the strategy before modifying the DAG.
    1205             :   // This may initialize a DFSResult to be used for queue priority.
    1206      691348 :   SchedImpl->initialize(this);
    1207             : 
    1208             :   DEBUG(
    1209             :     if (EntrySU.getInstr() != nullptr)
    1210             :       EntrySU.dumpAll(this);
    1211             :     for (const SUnit &SU : SUnits) {
    1212             :       SU.dumpAll(this);
    1213             :       if (ShouldTrackPressure) {
    1214             :         dbgs() << "  Pressure Diff      : ";
    1215             :         getPressureDiff(&SU).dump(*TRI);
    1216             :       }
    1217             :       dbgs() << "  Single Issue       : ";
    1218             :       if (SchedModel.mustBeginGroup(SU.getInstr()) &&
    1219             :          SchedModel.mustEndGroup(SU.getInstr()))
    1220             :         dbgs() << "true;";
    1221             :       else
    1222             :         dbgs() << "false;";
    1223             :       dbgs() << '\n';
    1224             :     }
    1225             :     if (ExitSU.getInstr() != nullptr)
    1226             :       ExitSU.dumpAll(this);
    1227             :   );
    1228      345674 :   if (ViewMISchedDAGs) viewGraph();
    1229             : 
    1230             :   // Initialize ready queues now that the DAG and priority data are finalized.
    1231      691348 :   initQueues(TopRoots, BotRoots);
    1232             : 
    1233      345674 :   bool IsTopNode = false;
    1234             :   while (true) {
    1235             :     DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
    1236     4879438 :     SUnit *SU = SchedImpl->pickNode(IsTopNode);
    1237     2439719 :     if (!SU) break;
    1238             : 
    1239             :     assert(!SU->isScheduled && "Node already scheduled");
    1240     2094045 :     if (!checkSchedLimit())
    1241             :       break;
    1242             : 
    1243     2094045 :     scheduleMI(SU, IsTopNode);
    1244             : 
    1245     2094045 :     if (DFSResult) {
    1246         352 :       unsigned SubtreeID = DFSResult->getSubtreeID(SU);
    1247         352 :       if (!ScheduledTrees.test(SubtreeID)) {
    1248          76 :         ScheduledTrees.set(SubtreeID);
    1249          38 :         DFSResult->scheduleTree(SubtreeID);
    1250          76 :         SchedImpl->scheduleTree(SubtreeID);
    1251             :       }
    1252             :     }
    1253             : 
    1254             :     // Notify the scheduling strategy after updating the DAG.
    1255     4188090 :     SchedImpl->schedNode(SU, IsTopNode);
    1256             : 
    1257     2094045 :     updateQueues(SU, IsTopNode);
    1258     2094045 :   }
    1259             :   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
    1260             : 
    1261      345674 :   placeDebugValues();
    1262             : 
    1263             :   DEBUG({
    1264             :       unsigned BBNum = begin()->getParent()->getNumber();
    1265             :       dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
    1266             :       dumpSchedule();
    1267             :       dbgs() << '\n';
    1268             :     });
    1269      345674 : }
    1270             : 
    1271             : /// Build the DAG and setup three register pressure trackers.
    1272      347259 : void ScheduleDAGMILive::buildDAGWithRegPressure() {
    1273      347259 :   if (!ShouldTrackPressure) {
    1274      255082 :     RPTracker.reset();
    1275      510164 :     RegionCriticalPSets.clear();
    1276      255082 :     buildSchedGraph(AA);
    1277      255082 :     return;
    1278             :   }
    1279             : 
    1280             :   // Initialize the register pressure tracker used by buildSchedGraph.
    1281      276531 :   RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
    1282       92177 :                  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
    1283             : 
    1284             :   // Account for liveness generate by the region boundary.
    1285      184354 :   if (LiveRegionEnd != RegionEnd)
    1286       86625 :     RPTracker.recede();
    1287             : 
    1288             :   // Build the DAG, and compute current register pressure.
    1289       92177 :   buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
    1290             : 
    1291             :   // Initialize top/bottom trackers after computing region pressure.
    1292       92177 :   initRegPressure();
    1293             : }
    1294             : 
    1295          10 : void ScheduleDAGMILive::computeDFSResult() {
    1296          10 :   if (!DFSResult)
    1297          16 :     DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
    1298          20 :   DFSResult->clear();
    1299          10 :   ScheduledTrees.clear();
    1300          30 :   DFSResult->resize(SUnits.size());
    1301          20 :   DFSResult->compute(SUnits);
    1302          20 :   ScheduledTrees.resize(DFSResult->getNumSubtrees());
    1303          10 : }
    1304             : 
    1305             : /// Compute the max cyclic critical path through the DAG. The scheduling DAG
    1306             : /// only provides the critical path for single block loops. To handle loops that
    1307             : /// span blocks, we could use the vreg path latencies provided by
    1308             : /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
    1309             : /// available for use in the scheduler.
    1310             : ///
    1311             : /// The cyclic path estimation identifies a def-use pair that crosses the back
    1312             : /// edge and considers the depth and height of the nodes. For example, consider
    1313             : /// the following instruction sequence where each instruction has unit latency
    1314             : /// and defines an epomymous virtual register:
    1315             : ///
    1316             : /// a->b(a,c)->c(b)->d(c)->exit
    1317             : ///
    1318             : /// The cyclic critical path is a two cycles: b->c->b
    1319             : /// The acyclic critical path is four cycles: a->b->c->d->exit
    1320             : /// LiveOutHeight = height(c) = len(c->d->exit) = 2
    1321             : /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
    1322             : /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
    1323             : /// LiveInDepth = depth(b) = len(a->b) = 1
    1324             : ///
    1325             : /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
    1326             : /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
    1327             : /// CyclicCriticalPath = min(2, 2) = 2
    1328             : ///
    1329             : /// This could be relevant to PostRA scheduling, but is currently implemented
    1330             : /// assuming LiveIntervals.
    1331      320484 : unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
    1332             :   // This only applies to single block loop.
    1333      320484 :   if (!BB->isSuccessor(BB))
    1334             :     return 0;
    1335             : 
    1336        1752 :   unsigned MaxCyclicLatency = 0;
    1337             :   // Visit each live out vreg def to find def/use pairs that cross iterations.
    1338        8316 :   for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
    1339        3060 :     unsigned Reg = P.RegUnit;
    1340        6120 :     if (!TRI->isVirtualRegister(Reg))
    1341           5 :         continue;
    1342        3055 :     const LiveInterval &LI = LIS->getInterval(Reg);
    1343        6110 :     const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
    1344        3055 :     if (!DefVNI)
    1345          77 :       continue;
    1346             : 
    1347        5956 :     MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
    1348        5956 :     const SUnit *DefSU = getSUnit(DefMI);
    1349        1290 :     if (!DefSU)
    1350        1688 :       continue;
    1351             : 
    1352        1290 :     unsigned LiveOutHeight = DefSU->getHeight();
    1353        1290 :     unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
    1354             :     // Visit all local users of the vreg def.
    1355             :     for (const VReg2SUnit &V2SU
    1356       13912 :          : make_range(VRegUses.find(Reg), VRegUses.end())) {
    1357        3731 :       SUnit *SU = V2SU.SU;
    1358        3731 :       if (SU == &ExitSU)
    1359         667 :         continue;
    1360             : 
    1361             :       // Only consider uses of the phi.
    1362        7462 :       LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
    1363        7462 :       if (!LRQ.valueIn()->isPHIDef())
    1364         667 :         continue;
    1365             : 
    1366             :       // Assume that a path spanning two iterations is a cycle, which could
    1367             :       // overestimate in strange cases. This allows cyclic latency to be
    1368             :       // estimated as the minimum slack of the vreg's depth or height.
    1369        3064 :       unsigned CyclicLatency = 0;
    1370        3064 :       if (LiveOutDepth > SU->getDepth())
    1371        3061 :         CyclicLatency = LiveOutDepth - SU->getDepth();
    1372             : 
    1373        3064 :       unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
    1374        3064 :       if (LiveInHeight > LiveOutHeight) {
    1375        3062 :         if (LiveInHeight - LiveOutHeight < CyclicLatency)
    1376         344 :           CyclicLatency = LiveInHeight - LiveOutHeight;
    1377             :       } else
    1378             :         CyclicLatency = 0;
    1379             : 
    1380             :       DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
    1381             :             << SU->NodeNum << ") = " << CyclicLatency << "c\n");
    1382        3064 :       if (CyclicLatency > MaxCyclicLatency)
    1383         840 :         MaxCyclicLatency = CyclicLatency;
    1384             :     }
    1385             :   }
    1386             :   DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
    1387             :   return MaxCyclicLatency;
    1388             : }
    1389             : 
    1390             : /// Release ExitSU predecessors and setup scheduler queues. Re-position
    1391             : /// the Top RP tracker in case the region beginning has changed.
    1392      347259 : void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
    1393             :                                    ArrayRef<SUnit*> BotRoots) {
    1394      347259 :   ScheduleDAGMI::initQueues(TopRoots, BotRoots);
    1395      347259 :   if (ShouldTrackPressure) {
    1396             :     assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
    1397      184354 :     TopRPTracker.setPos(CurrentTop);
    1398             :   }
    1399      347259 : }
    1400             : 
    1401             : /// Move an instruction and update register pressure.
    1402     2104849 : void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
    1403             :   // Move the instruction to its new location in the instruction stream.
    1404     2104849 :   MachineInstr *MI = SU->getInstr();
    1405             : 
    1406     2104849 :   if (IsTopNode) {
    1407             :     assert(SU->isTopReady() && "node still has unscheduled dependencies");
    1408      137948 :     if (&*CurrentTop == MI)
    1409      171312 :       CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
    1410             :     else {
    1411       11870 :       moveInstruction(MI, CurrentTop);
    1412       23740 :       TopRPTracker.setPos(MI);
    1413             :     }
    1414             : 
    1415       68974 :     if (ShouldTrackPressure) {
    1416             :       // Update top scheduled pressure.
    1417      116604 :       RegisterOperands RegOpers;
    1418       58302 :       RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
    1419       58302 :       if (ShouldTrackLaneMasks) {
    1420             :         // Adjust liveness and add missing dead+read-undef flags.
    1421      142164 :         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
    1422       47388 :         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
    1423             :       } else {
    1424             :         // Adjust for missing dead-def flags.
    1425       10914 :         RegOpers.detectDeadDefs(*MI, *LIS);
    1426             :       }
    1427             : 
    1428       58302 :       TopRPTracker.advance(RegOpers);
    1429             :       assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
    1430             :       DEBUG(
    1431             :         dbgs() << "Top Pressure:\n";
    1432             :         dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
    1433             :       );
    1434             : 
    1435       58302 :       updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
    1436             :     }
    1437             :   } else {
    1438             :     assert(SU->isBottomReady() && "node still has unscheduled dependencies");
    1439             :     MachineBasicBlock::iterator priorII =
    1440     4071750 :       priorNonDebug(CurrentBottom, CurrentTop);
    1441     2035875 :     if (&*priorII == MI)
    1442     1859100 :       CurrentBottom = priorII;
    1443             :     else {
    1444      353550 :       if (&*CurrentTop == MI) {
    1445       31564 :         CurrentTop = nextIfDebug(++CurrentTop, priorII);
    1446       31564 :         TopRPTracker.setPos(CurrentTop);
    1447             :       }
    1448      176775 :       moveInstruction(MI, CurrentBottom);
    1449      176775 :       CurrentBottom = MI;
    1450             :     }
    1451     2035875 :     if (ShouldTrackPressure) {
    1452     2354850 :       RegisterOperands RegOpers;
    1453     1177425 :       RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
    1454     1177425 :       if (ShouldTrackLaneMasks) {
    1455             :         // Adjust liveness and add missing dead+read-undef flags.
    1456      658473 :         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
    1457      219491 :         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
    1458             :       } else {
    1459             :         // Adjust for missing dead-def flags.
    1460      957934 :         RegOpers.detectDeadDefs(*MI, *LIS);
    1461             :       }
    1462             : 
    1463     1177425 :       BotRPTracker.recedeSkipDebugValues();
    1464     2354850 :       SmallVector<RegisterMaskPair, 8> LiveUses;
    1465     1177425 :       BotRPTracker.recede(RegOpers, &LiveUses);
    1466             :       assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
    1467             :       DEBUG(
    1468             :         dbgs() << "Bottom Pressure:\n";
    1469             :         dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
    1470             :       );
    1471             : 
    1472     1177425 :       updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
    1473     1177425 :       updatePressureDiffs(LiveUses);
    1474             :     }
    1475             :   }
    1476     2104849 : }
    1477             : 
    1478             : //===----------------------------------------------------------------------===//
    1479             : // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
    1480             : //===----------------------------------------------------------------------===//
    1481             : 
    1482             : namespace {
    1483             : 
    1484             : /// \brief Post-process the DAG to create cluster edges between neighboring
    1485             : /// loads or between neighboring stores.
    1486       50634 : class BaseMemOpClusterMutation : public ScheduleDAGMutation {
    1487             :   struct MemOpInfo {
    1488             :     SUnit *SU;
    1489             :     unsigned BaseReg;
    1490             :     int64_t Offset;
    1491             : 
    1492             :     MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
    1493       54054 :         : SU(su), BaseReg(reg), Offset(ofs) {}
    1494             : 
    1495             :     bool operator<(const MemOpInfo&RHS) const {
    1496      191838 :       return std::tie(BaseReg, Offset, SU->NodeNum) <
    1497      255784 :              std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum);
    1498             :     }
    1499             :   };
    1500             : 
    1501             :   const TargetInstrInfo *TII;
    1502             :   const TargetRegisterInfo *TRI;
    1503             :   bool IsLoad;
    1504             : 
    1505             : public:
    1506             :   BaseMemOpClusterMutation(const TargetInstrInfo *tii,
    1507             :                            const TargetRegisterInfo *tri, bool IsLoad)
    1508       50634 :       : TII(tii), TRI(tri), IsLoad(IsLoad) {}
    1509             : 
    1510             :   void apply(ScheduleDAGInstrs *DAGInstrs) override;
    1511             : 
    1512             : protected:
    1513             :   void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
    1514             : };
    1515             : 
    1516       50634 : class StoreClusterMutation : public BaseMemOpClusterMutation {
    1517             : public:
    1518             :   StoreClusterMutation(const TargetInstrInfo *tii,
    1519             :                        const TargetRegisterInfo *tri)
    1520       50634 :       : BaseMemOpClusterMutation(tii, tri, false) {}
    1521             : };
    1522             : 
    1523       50634 : class LoadClusterMutation : public BaseMemOpClusterMutation {
    1524             : public:
    1525             :   LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
    1526       50634 :       : BaseMemOpClusterMutation(tii, tri, true) {}
    1527             : };
    1528             : 
    1529             : } // end anonymous namespace
    1530             : 
    1531             : namespace llvm {
    1532             : 
    1533             : std::unique_ptr<ScheduleDAGMutation>
    1534       25317 : createLoadClusterDAGMutation(const TargetInstrInfo *TII,
    1535             :                              const TargetRegisterInfo *TRI) {
    1536       50634 :   return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
    1537       75951 :                             : nullptr;
    1538             : }
    1539             : 
    1540             : std::unique_ptr<ScheduleDAGMutation>
    1541       25317 : createStoreClusterDAGMutation(const TargetInstrInfo *TII,
    1542             :                               const TargetRegisterInfo *TRI) {
    1543       50634 :   return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
    1544       75951 :                             : nullptr;
    1545             : }
    1546             : 
    1547             : } // end namespace llvm
    1548             : 
    1549       46736 : void BaseMemOpClusterMutation::clusterNeighboringMemOps(
    1550             :     ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
    1551       58930 :   SmallVector<MemOpInfo, 32> MemOpRecords;
    1552      184668 :   for (SUnit *SU : MemOps) {
    1553             :     unsigned BaseReg;
    1554             :     int64_t Offset;
    1555       91196 :     if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
    1556      108108 :       MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
    1557             :   }
    1558       46736 :   if (MemOpRecords.size() < 2)
    1559       34542 :     return;
    1560             : 
    1561       24388 :   std::sort(MemOpRecords.begin(), MemOpRecords.end());
    1562       12194 :   unsigned ClusterLength = 1;
    1563       24388 :   for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
    1564       89638 :     if (MemOpRecords[Idx].BaseReg != MemOpRecords[Idx+1].BaseReg) {
    1565        8629 :       ClusterLength = 1;
    1566             :       continue;
    1567             :     }
    1568             : 
    1569       36748 :     SUnit *SUa = MemOpRecords[Idx].SU;
    1570       36748 :     SUnit *SUb = MemOpRecords[Idx+1].SU;
    1571       36748 :     if (TII->shouldClusterMemOps(*SUa->getInstr(), *SUb->getInstr(),
    1572       51981 :                                  ClusterLength) &&
    1573       33607 :         DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
    1574             :       DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
    1575             :             << SUb->NodeNum << ")\n");
    1576             :       // Copy successor edges from SUa to SUb. Interleaving computation
    1577             :       // dependent on SUa can prevent load combining due to register reuse.
    1578             :       // Predecessor edges do not need to be copied from SUb to SUa since nearby
    1579             :       // loads should have effectively the same inputs.
    1580      166090 :       for (const SDep &Succ : SUa->Succs) {
    1581       52579 :         if (Succ.getSUnit() == SUb)
    1582             :           continue;
    1583             :         DEBUG(dbgs() << "  Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n");
    1584       74692 :         DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
    1585             :       }
    1586       15233 :       ++ClusterLength;
    1587             :     } else
    1588             :       ClusterLength = 1;
    1589             :   }
    1590             : }
    1591             : 
    1592             : /// \brief Callback from DAG postProcessing to create cluster edges for loads.
    1593       55714 : void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
    1594       55714 :   ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
    1595             : 
    1596             :   // Map DAG NodeNum to store chain ID.
    1597      111428 :   DenseMap<unsigned, unsigned> StoreChainIDs;
    1598             :   // Map each store chain to a set of dependent MemOps.
    1599      111428 :   SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
    1600      877242 :   for (SUnit &SU : DAG->SUnits) {
    1601     1600877 :     if ((IsLoad && !SU.getInstr()->mayLoad()) ||
    1602      710494 :         (!IsLoad && !SU.getInstr()->mayStore()))
    1603      563190 :       continue;
    1604             : 
    1605      182392 :     unsigned ChainPredID = DAG->SUnits.size();
    1606      422584 :     for (const SDep &Pred : SU.Preds) {
    1607      180391 :       if (Pred.isCtrl()) {
    1608       31395 :         ChainPredID = Pred.getSUnit()->NodeNum;
    1609       31395 :         break;
    1610             :       }
    1611             :     }
    1612             :     // Check if this chain-like pred has been seen
    1613             :     // before. ChainPredID==MaxNodeID at the top of the schedule.
    1614       91196 :     unsigned NumChains = StoreChainDependents.size();
    1615             :     std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
    1616      273588 :       StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
    1617       91196 :     if (Result.second)
    1618       46736 :       StoreChainDependents.resize(NumChains + 1);
    1619      182392 :     StoreChainDependents[Result.first->second].push_back(&SU);
    1620             :   }
    1621             : 
    1622             :   // Iterate over the store chains.
    1623      213878 :   for (auto &SCD : StoreChainDependents)
    1624       93472 :     clusterNeighboringMemOps(SCD, DAG);
    1625       55714 : }
    1626             : 
    1627             : //===----------------------------------------------------------------------===//
    1628             : // CopyConstrain - DAG post-processing to encourage copy elimination.
    1629             : //===----------------------------------------------------------------------===//
    1630             : 
    1631             : namespace {
    1632             : 
    1633             : /// \brief Post-process the DAG to create weak edges from all uses of a copy to
    1634             : /// the one use that defines the copy's source vreg, most likely an induction
    1635             : /// variable increment.
    1636       86442 : class CopyConstrain : public ScheduleDAGMutation {
    1637             :   // Transient state.
    1638             :   SlotIndex RegionBeginIdx;
    1639             : 
    1640             :   // RegionEndIdx is the slot index of the last non-debug instruction in the
    1641             :   // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
    1642             :   SlotIndex RegionEndIdx;
    1643             : 
    1644             : public:
    1645      259326 :   CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
    1646             : 
    1647             :   void apply(ScheduleDAGInstrs *DAGInstrs) override;
    1648             : 
    1649             : protected:
    1650             :   void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
    1651             : };
    1652             : 
    1653             : } // end anonymous namespace
    1654             : 
    1655             : namespace llvm {
    1656             : 
    1657             : std::unique_ptr<ScheduleDAGMutation>
    1658       86442 : createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
    1659             :                                const TargetRegisterInfo *TRI) {
    1660      345768 :   return llvm::make_unique<CopyConstrain>(TII, TRI);
    1661             : }
    1662             : 
    1663             : } // end namespace llvm
    1664             : 
    1665             : /// constrainLocalCopy handles two possibilities:
    1666             : /// 1) Local src:
    1667             : /// I0:     = dst
    1668             : /// I1: src = ...
    1669             : /// I2:     = dst
    1670             : /// I3: dst = src (copy)
    1671             : /// (create pred->succ edges I0->I1, I2->I1)
    1672             : ///
    1673             : /// 2) Local copy:
    1674             : /// I0: dst = src (copy)
    1675             : /// I1:     = dst
    1676             : /// I2: src = ...
    1677             : /// I3:     = dst
    1678             : /// (create pred->succ edges I1->I2, I3->I2)
    1679             : ///
    1680             : /// Although the MachineScheduler is currently constrained to single blocks,
    1681             : /// this algorithm should handle extended blocks. An EBB is a set of
    1682             : /// contiguously numbered blocks such that the previous block in the EBB is
    1683             : /// always the single predecessor.
    1684      509828 : void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
    1685      509828 :   LiveIntervals *LIS = DAG->getLIS();
    1686      509828 :   MachineInstr *Copy = CopySU->getInstr();
    1687             : 
    1688             :   // Check for pure vreg copies.
    1689     1019656 :   const MachineOperand &SrcOp = Copy->getOperand(1);
    1690      509828 :   unsigned SrcReg = SrcOp.getReg();
    1691      509828 :   if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
    1692      494533 :     return;
    1693             : 
    1694      363438 :   const MachineOperand &DstOp = Copy->getOperand(0);
    1695      363438 :   unsigned DstReg = DstOp.getReg();
    1696      398355 :   if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
    1697             :     return;
    1698             : 
    1699             :   // Check if either the dest or source is local. If it's live across a back
    1700             :   // edge, it's not local. Note that if both vregs are live across the back
    1701             :   // edge, we cannot successfully contrain the copy without cyclic scheduling.
    1702             :   // If both the copy's source and dest are local live intervals, then we
    1703             :   // should treat the dest as the global for the purpose of adding
    1704             :   // constraints. This adds edges from source's other uses to the copy.
    1705       34891 :   unsigned LocalReg = SrcReg;
    1706       34891 :   unsigned GlobalReg = DstReg;
    1707       34891 :   LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
    1708       34891 :   if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
    1709        9931 :     LocalReg = DstReg;
    1710        9931 :     GlobalReg = SrcReg;
    1711        9931 :     LocalLI = &LIS->getInterval(LocalReg);
    1712        9931 :     if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
    1713             :       return;
    1714             :   }
    1715       30915 :   LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
    1716             : 
    1717             :   // Find the global segment after the start of the local LI.
    1718       61830 :   LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
    1719             :   // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
    1720             :   // local live range. We could create edges from other global uses to the local
    1721             :   // start, but the coalescer should have already eliminated these cases, so
    1722             :   // don't bother dealing with it.
    1723       61830 :   if (GlobalSegment == GlobalLI->end())
    1724             :     return;
    1725             : 
    1726             :   // If GlobalSegment is killed at the LocalLI->start, the call to find()
    1727             :   // returned the next global segment. But if GlobalSegment overlaps with
    1728             :   // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
    1729             :   // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
    1730       61790 :   if (GlobalSegment->contains(LocalLI->beginIndex()))
    1731        6027 :     ++GlobalSegment;
    1732             : 
    1733       61790 :   if (GlobalSegment == GlobalLI->end())
    1734             :     return;
    1735             : 
    1736             :   // Check if GlobalLI contains a hole in the vicinity of LocalLI.
    1737       54652 :   if (GlobalSegment != GlobalLI->begin()) {
    1738             :     // Two address defs have no hole.
    1739        5028 :     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
    1740             :                                GlobalSegment->start)) {
    1741             :       return;
    1742             :     }
    1743             :     // If the prior global segment may be defined by the same two-address
    1744             :     // instruction that also defines LocalLI, then can't make a hole here.
    1745        6284 :     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
    1746             :                                LocalLI->beginIndex())) {
    1747             :       return;
    1748             :     }
    1749             :     // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
    1750             :     // it would be a disconnected component in the live range.
    1751             :     assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
    1752             :            "Disconnected LRG within the scheduling region.");
    1753             :   }
    1754       26383 :   MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
    1755       26383 :   if (!GlobalDef)
    1756             :     return;
    1757             : 
    1758       50624 :   SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
    1759       25273 :   if (!GlobalSU)
    1760             :     return;
    1761             : 
    1762             :   // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
    1763             :   // constraining the uses of the last local def to precede GlobalDef.
    1764       40568 :   SmallVector<SUnit*,8> LocalUses;
    1765       50546 :   const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
    1766       25273 :   MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
    1767       50546 :   SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
    1768      109728 :   for (const SDep &Succ : LastLocalSU->Succs) {
    1769       45389 :     if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
    1770             :       continue;
    1771       58531 :     if (Succ.getSUnit() == GlobalSU)
    1772             :       continue;
    1773       52038 :     if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
    1774             :       return;
    1775       16096 :     LocalUses.push_back(Succ.getSUnit());
    1776             :   }
    1777             :   // Open the top of the GlobalLI hole by constraining any earlier global uses
    1778             :   // to precede the start of LocalLI.
    1779       30645 :   SmallVector<SUnit*,8> GlobalUses;
    1780             :   MachineInstr *FirstLocalDef =
    1781       46050 :     LIS->getInstructionFromIndex(LocalLI->beginIndex());
    1782       30700 :   SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
    1783       62161 :   for (const SDep &Pred : GlobalSU->Preds) {
    1784       31500 :     if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
    1785             :       continue;
    1786        1176 :     if (Pred.getSUnit() == FirstLocalSU)
    1787             :       continue;
    1788         976 :     if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
    1789             :       return;
    1790         433 :     GlobalUses.push_back(Pred.getSUnit());
    1791             :   }
    1792             :   DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
    1793             :   // Add the weak edges.
    1794        2443 :   for (SmallVectorImpl<SUnit*>::const_iterator
    1795       30590 :          I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
    1796             :     DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU("
    1797             :           << GlobalSU->NodeNum << ")\n");
    1798        4886 :     DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
    1799             :   }
    1800         428 :   for (SmallVectorImpl<SUnit*>::const_iterator
    1801       30590 :          I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
    1802             :     DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU("
    1803             :           << FirstLocalSU->NodeNum << ")\n");
    1804         856 :     DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
    1805             :   }
    1806             : }
    1807             : 
    1808             : /// \brief Callback from DAG postProcessing to create weak edges to encourage
    1809             : /// copy elimination.
    1810      327513 : void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
    1811      327513 :   ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
    1812             :   assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
    1813             : 
    1814      655026 :   MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
    1815      655026 :   if (FirstPos == DAG->end())
    1816             :     return;
    1817      982182 :   RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
    1818      654788 :   RegionEndIdx = DAG->getLIS()->getInstructionIndex(
    1819     1309576 :       *priorNonDebug(DAG->end(), DAG->begin()));
    1820             : 
    1821     3084830 :   for (SUnit &SU : DAG->SUnits) {
    1822     3550508 :     if (!SU.getInstr()->isCopy())
    1823     1265426 :       continue;
    1824             : 
    1825      509828 :     constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
    1826             :   }
    1827             : }
    1828             : 
    1829             : //===----------------------------------------------------------------------===//
    1830             : // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
    1831             : // and possibly other custom schedulers.
    1832             : //===----------------------------------------------------------------------===//
    1833             : 
    1834             : static const unsigned InvalidCycle = ~0U;
    1835             : 
    1836      626925 : SchedBoundary::~SchedBoundary() { delete HazardRec; }
    1837             : 
    1838      902035 : void SchedBoundary::reset() {
    1839             :   // A new HazardRec is created for each DAG and owned by SchedBoundary.
    1840             :   // Destroying and reconstructing it is very expensive though. So keep
    1841             :   // invalid, placeholder HazardRecs.
    1842      902035 :   if (HazardRec && HazardRec->isEnabled()) {
    1843        3308 :     delete HazardRec;
    1844        3308 :     HazardRec = nullptr;
    1845             :   }
    1846     1804070 :   Available.clear();
    1847     1804070 :   Pending.clear();
    1848      902035 :   CheckPending = false;
    1849      902035 :   CurrCycle = 0;
    1850      902035 :   CurrMOps = 0;
    1851      902035 :   MinReadyCycle = std::numeric_limits<unsigned>::max();
    1852      902035 :   ExpectedLatency = 0;
    1853      902035 :   DependentLatency = 0;
    1854      902035 :   RetiredMOps = 0;
    1855      902035 :   MaxExecutedResCount = 0;
    1856      902035 :   ZoneCritResIdx = 0;
    1857      902035 :   IsResourceLimited = false;
    1858     1804070 :   ReservedCycles.clear();
    1859             : #ifndef NDEBUG
    1860             :   // Track the maximum number of stall cycles that could arise either from the
    1861             :   // latency of a DAG edge or the number of cycles that a processor resource is
    1862             :   // reserved (SchedBoundary::ReservedCycles).
    1863             :   MaxObservedStall = 0;
    1864             : #endif
    1865             :   // Reserve a zero-count for invalid CritResIdx.
    1866      902035 :   ExecutedResCounts.resize(1);
    1867             :   assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
    1868      902035 : }
    1869             : 
    1870      349561 : void SchedRemainder::
    1871             : init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
    1872      349561 :   reset();
    1873      349561 :   if (!SchedModel->hasInstrSchedModel())
    1874             :     return;
    1875      274966 :   RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
    1876     1462184 :   for (SUnit &SU : DAG->SUnits) {
    1877      912252 :     const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
    1878     1824504 :     RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
    1879      912252 :       * SchedModel->getMicroOpFactor();
    1880     1492618 :     for (TargetSchedModel::ProcResIter
    1881     1824504 :            PI = SchedModel->getWriteProcResBegin(SC),
    1882     1824504 :            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
    1883     1492618 :       unsigned PIdx = PI->ProcResourceIdx;
    1884     1492618 :       unsigned Factor = SchedModel->getResourceFactor(PIdx);
    1885     2985236 :       RemainingCounts[PIdx] += (Factor * PI->Cycles);
    1886             :     }
    1887             :   }
    1888             : }
    1889             : 
    1890      693060 : void SchedBoundary::
    1891             : init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
    1892      693060 :   reset();
    1893      693060 :   DAG = dag;
    1894      693060 :   SchedModel = smodel;
    1895      693060 :   Rem = rem;
    1896      693060 :   if (SchedModel->hasInstrSchedModel()) {
    1897      549260 :     ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
    1898      549260 :     ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
    1899             :   }
    1900      693060 : }
    1901             : 
    1902             : /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
    1903             : /// these "soft stalls" differently than the hard stall cycles based on CPU
    1904             : /// resources and computed by checkHazard(). A fully in-order model
    1905             : /// (MicroOpBufferSize==0) will not make use of this since instructions are not
    1906             : /// available for scheduling until they are ready. However, a weaker in-order
    1907             : /// model may use this for heuristics. For example, if a processor has in-order
    1908             : /// behavior when reading certain resources, this may come into play.
    1909    19093860 : unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
    1910    19093860 :   if (!SU->isUnbuffered)
    1911             :     return 0;
    1912             : 
    1913     2040397 :   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
    1914     2040397 :   if (ReadyCycle > CurrCycle)
    1915       39665 :     return ReadyCycle - CurrCycle;
    1916             :   return 0;
    1917             : }
    1918             : 
    1919             : /// Compute the next cycle at which the given processor resource can be
    1920             : /// scheduled.
    1921     1495347 : unsigned SchedBoundary::
    1922             : getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
    1923     2990694 :   unsigned NextUnreserved = ReservedCycles[PIdx];
    1924             :   // If this resource has never been used, always return cycle zero.
    1925     1495347 :   if (NextUnreserved == InvalidCycle)
    1926             :     return 0;
    1927             :   // For bottom-up scheduling add the cycles needed for the current operation.
    1928        2070 :   if (!isTop())
    1929         927 :     NextUnreserved += Cycles;
    1930             :   return NextUnreserved;
    1931             : }
    1932             : 
    1933             : /// Does this SU have a hazard within the current instruction group.
    1934             : ///
    1935             : /// The scheduler supports two modes of hazard recognition. The first is the
    1936             : /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
    1937             : /// supports highly complicated in-order reservation tables
    1938             : /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
    1939             : ///
    1940             : /// The second is a streamlined mechanism that checks for hazards based on
    1941             : /// simple counters that the scheduler itself maintains. It explicitly checks
    1942             : /// for instruction dispatch limitations, including the number of micro-ops that
    1943             : /// can dispatch per cycle.
    1944             : ///
    1945             : /// TODO: Also check whether the SU must start a new group.
    1946     8812177 : bool SchedBoundary::checkHazard(SUnit *SU) {
    1947     8812177 :   if (HazardRec->isEnabled()
    1948     8812177 :       && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
    1949             :     return true;
    1950             :   }
    1951             : 
    1952     8808118 :   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
    1953     8808118 :   if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
    1954             :     DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
    1955             :           << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
    1956             :     return true;
    1957             :   }
    1958             : 
    1959    14794483 :   if (CurrMOps > 0 &&
    1960    12258771 :       ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
    1961    12241289 :        (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
    1962             :     DEBUG(dbgs() << "  hazard: SU(" << SU->NodeNum << ") must "
    1963             :                  << (isTop()? "begin" : "end") << " group\n");
    1964             :     return true;
    1965             :   }
    1966             : 
    1967     8669468 :   if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
    1968        1796 :     const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
    1969        1829 :     for (TargetSchedModel::ProcResIter
    1970        3592 :            PI = SchedModel->getWriteProcResBegin(SC),
    1971        3592 :            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
    1972        2426 :       unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles);
    1973        2426 :       if (NRCycle > CurrCycle) {
    1974             : #ifndef NDEBUG
    1975             :         MaxObservedStall = std::max(PI->Cycles, MaxObservedStall);
    1976             : #endif
    1977             :         DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "
    1978             :               << SchedModel->getResourceName(PI->ProcResourceIdx)
    1979             :               << "=" << NRCycle << "c\n");
    1980             :         return true;
    1981             :       }
    1982             :     }
    1983             :   }
    1984             :   return false;
    1985             : }
    1986             : 
    1987             : // Find the unscheduled node in ReadySUs with the highest latency.
    1988      997190 : unsigned SchedBoundary::
    1989             : findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
    1990      997190 :   SUnit *LateSU = nullptr;
    1991      997190 :   unsigned RemLatency = 0;
    1992     7044932 :   for (SUnit *SU : ReadySUs) {
    1993     5050552 :     unsigned L = getUnscheduledLatency(SU);
    1994     5050552 :     if (L > RemLatency) {
    1995      729848 :       RemLatency = L;
    1996      729848 :       LateSU = SU;
    1997             :     }
    1998             :   }
    1999             :   if (LateSU) {
    2000             :     DEBUG(dbgs() << Available.getName() << " RemLatency SU("
    2001             :           << LateSU->NodeNum << ") " << RemLatency << "c\n");
    2002             :   }
    2003      997190 :   return RemLatency;
    2004             : }
    2005             : 
    2006             : // Count resources in this zone and the remaining unscheduled
    2007             : // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
    2008             : // resource index, or zero if the zone is issue limited.
    2009      490356 : unsigned SchedBoundary::
    2010             : getOtherResourceCount(unsigned &OtherCritIdx) {
    2011      490356 :   OtherCritIdx = 0;
    2012      490356 :   if (!SchedModel->hasInstrSchedModel())
    2013             :     return 0;
    2014             : 
    2015      325802 :   unsigned OtherCritCount = Rem->RemIssueCount
    2016      325802 :     + (RetiredMOps * SchedModel->getMicroOpFactor());
    2017             :   DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
    2018             :         << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
    2019     2678368 :   for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
    2020     2352566 :        PIdx != PEnd; ++PIdx) {
    2021     4053528 :     unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
    2022     2026764 :     if (OtherCount > OtherCritCount) {
    2023        8038 :       OtherCritCount = OtherCount;
    2024        8038 :       OtherCritIdx = PIdx;
    2025             :     }
    2026             :   }
    2027             :   if (OtherCritIdx) {
    2028             :     DEBUG(dbgs() << "  " << Available.getName() << " + Remain CritRes: "
    2029             :           << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
    2030             :           << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
    2031             :   }
    2032      325802 :   return OtherCritCount;
    2033             : }
    2034             : 
    2035     3013721 : void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
    2036             :   assert(SU->getInstr() && "Scheduled SUnit must have instr");
    2037             : 
    2038             : #ifndef NDEBUG
    2039             :   // ReadyCycle was been bumped up to the CurrCycle when this node was
    2040             :   // scheduled, but CurrCycle may have been eagerly advanced immediately after
    2041             :   // scheduling, so may now be greater than ReadyCycle.
    2042             :   if (ReadyCycle > CurrCycle)
    2043             :     MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
    2044             : #endif
    2045             : 
    2046     3013721 :   if (ReadyCycle < MinReadyCycle)
    2047      710822 :     MinReadyCycle = ReadyCycle;
    2048             : 
    2049             :   // Check for interlocks first. For the purpose of other heuristics, an
    2050             :   // instruction that cannot issue appears as if it's not in the ReadyQueue.
    2051     3013721 :   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
    2052     5941204 :   if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
    2053     8782449 :       Available.size() >= ReadyListLimit)
    2054      103997 :     Pending.push(SU);
    2055             :   else
    2056     2909724 :     Available.push(SU);
    2057     3013721 : }
    2058             : 
    2059             : /// Move the boundary of scheduled code by one cycle.
    2060      635509 : void SchedBoundary::bumpCycle(unsigned NextCycle) {
    2061      635509 :   if (SchedModel->getMicroOpBufferSize() == 0) {
    2062             :     assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
    2063             :            "MinReadyCycle uninitialized");
    2064      159702 :     if (MinReadyCycle > NextCycle)
    2065       14969 :       NextCycle = MinReadyCycle;
    2066             :   }
    2067             :   // Update the current micro-ops, which will issue in the next cycle.
    2068      635509 :   unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
    2069      635509 :   CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
    2070             : 
    2071             :   // Decrement DependentLatency based on the next cycle.
    2072      635509 :   if ((NextCycle - CurrCycle) > DependentLatency)
    2073       76110 :     DependentLatency = 0;
    2074             :   else
    2075      559399 :     DependentLatency -= (NextCycle - CurrCycle);
    2076             : 
    2077      635509 :   if (!HazardRec->isEnabled()) {
    2078             :     // Bypass HazardRec virtual calls.
    2079      610986 :     CurrCycle = NextCycle;
    2080             :   } else {
    2081             :     // Bypass getHazardType calls in case of long latency.
    2082      103095 :     for (; CurrCycle != NextCycle; ++CurrCycle) {
    2083       39286 :       if (isTop())
    2084        1020 :         HazardRec->AdvanceCycle();
    2085             :       else
    2086       38266 :         HazardRec->RecedeCycle();
    2087             :     }
    2088             :   }
    2089      635509 :   CheckPending = true;
    2090      635509 :   unsigned LFactor = SchedModel->getLatencyFactor();
    2091      635509 :   IsResourceLimited =
    2092     1271018 :     (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
    2093      635509 :     > (int)LFactor;
    2094             : 
    2095             :   DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
    2096      635509 : }
    2097             : 
    2098     1492618 : void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
    2099     2985236 :   ExecutedResCounts[PIdx] += Count;
    2100     2985236 :   if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
    2101      976224 :     MaxExecutedResCount = ExecutedResCounts[PIdx];
    2102     1492618 : }
    2103             : 
    2104             : /// Add the given processor resource to this scheduled zone.
    2105             : ///
    2106             : /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
    2107             : /// during which this resource is consumed.
    2108             : ///
    2109             : /// \return the next cycle at which the instruction may execute without
    2110             : /// oversubscribing resources.
    2111     1492618 : unsigned SchedBoundary::
    2112             : countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
    2113     2985236 :   unsigned Factor = SchedModel->getResourceFactor(PIdx);
    2114     1492618 :   unsigned Count = Factor * Cycles;
    2115             :   DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx)
    2116             :         << " +" << Cycles << "x" << Factor << "u\n");
    2117             : 
    2118             :   // Update Executed resources counts.
    2119     1492618 :   incExecutedResources(PIdx, Count);
    2120             :   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
    2121     2985236 :   Rem->RemainingCounts[PIdx] -= Count;
    2122             : 
    2123             :   // Check if this resource exceeds the current critical resource. If so, it
    2124             :   // becomes the critical resource.
    2125     4193376 :   if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
    2126      170014 :     ZoneCritResIdx = PIdx;
    2127             :     DEBUG(dbgs() << "  *** Critical resource "
    2128             :           << SchedModel->getResourceName(PIdx) << ": "
    2129             :           << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
    2130             :   }
    2131             :   // For reserved resources, record the highest cycle using the resource.
    2132     1492618 :   unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
    2133             :   if (NextAvailable > CurrCycle) {
    2134             :     DEBUG(dbgs() << "  Resource conflict: "
    2135             :           << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
    2136             :           << NextAvailable << "\n");
    2137             :   }
    2138     1492618 :   return NextAvailable;
    2139             : }
    2140             : 
    2141             : /// Move the boundary of scheduled code by one SUnit.
    2142     2067556 : void SchedBoundary::bumpNode(SUnit *SU) {
    2143             :   // Update the reservation table.
    2144     2067556 :   if (HazardRec->isEnabled()) {
    2145       32645 :     if (!isTop() && SU->isCall) {
    2146             :       // Calls are scheduled with their preceding instructions. For bottom-up
    2147             :       // scheduling, clear the pipeline state before emitting.
    2148           0 :       HazardRec->Reset();
    2149             :     }
    2150       32645 :     HazardRec->EmitInstruction(SU);
    2151             :   }
    2152             :   // checkHazard should prevent scheduling multiple instructions per cycle that
    2153             :   // exceed the issue width.
    2154     2067556 :   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
    2155     2067556 :   unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
    2156             :   assert(
    2157             :       (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
    2158             :       "Cannot schedule this instruction's MicroOps in the current cycle.");
    2159             : 
    2160     2067556 :   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
    2161             :   DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
    2162             : 
    2163     2067556 :   unsigned NextCycle = CurrCycle;
    2164     2067556 :   switch (SchedModel->getMicroOpBufferSize()) {
    2165             :   case 0:
    2166             :     assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
    2167             :     break;
    2168      199910 :   case 1:
    2169      199910 :     if (ReadyCycle > NextCycle) {
    2170       17548 :       NextCycle = ReadyCycle;
    2171             :       DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
    2172             :     }
    2173             :     break;
    2174     1672925 :   default:
    2175             :     // We don't currently model the OOO reorder buffer, so consider all
    2176             :     // scheduled MOps to be "retired". We do loosely model in-order resource
    2177             :     // latency. If this instruction uses an in-order resource, account for any
    2178             :     // likely stall cycles.
    2179     1672925 :     if (SU->isUnbuffered && ReadyCycle > NextCycle)
    2180         314 :       NextCycle = ReadyCycle;
    2181             :     break;
    2182             :   }
    2183     2067556 :   RetiredMOps += IncMOps;
    2184             : 
    2185             :   // Update resource counts and critical resource.
    2186     2067556 :   if (SchedModel->hasInstrSchedModel()) {
    2187      912252 :     unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
    2188             :     assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
    2189      912252 :     Rem->RemIssueCount -= DecRemIssue;
    2190      912252 :     if (ZoneCritResIdx) {
    2191             :       // Scale scheduled micro-ops for comparing with the critical resource.
    2192             :       unsigned ScaledMOps =
    2193      346948 :         RetiredMOps * SchedModel->getMicroOpFactor();
    2194             : 
    2195             :       // If scaled micro-ops are now more than the previous critical resource by
    2196             :       // a full cycle, then micro-ops issue becomes critical.
    2197     1040844 :       if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
    2198      346948 :           >= (int)SchedModel->getLatencyFactor()) {
    2199       10213 :         ZoneCritResIdx = 0;
    2200             :         DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
    2201             :               << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
    2202             :       }
    2203             :     }
    2204     1492618 :     for (TargetSchedModel::ProcResIter
    2205     1824504 :            PI = SchedModel->getWriteProcResBegin(SC),
    2206     1824504 :            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
    2207             :       unsigned RCycle =
    2208     1492618 :         countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
    2209     1492618 :       if (RCycle > NextCycle)
    2210           4 :         NextCycle = RCycle;
    2211             :     }
    2212      912252 :     if (SU->hasReservedResource) {
    2213             :       // For reserved resources, record the highest cycle using the resource.
    2214             :       // For top-down scheduling, this is the cycle in which we schedule this
    2215             :       // instruction plus the number of cycles the operations reserves the
    2216             :       // resource. For bottom-up is it simply the instruction's cycle.
    2217         737 :       for (TargetSchedModel::ProcResIter
    2218        1178 :              PI = SchedModel->getWriteProcResBegin(SC),
    2219        1178 :              PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
    2220         737 :         unsigned PIdx = PI->ProcResourceIdx;
    2221        1474 :         if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
    2222         589 :           if (isTop()) {
    2223         909 :             ReservedCycles[PIdx] =
    2224        1212 :               std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
    2225             :           }
    2226             :           else
    2227         572 :             ReservedCycles[PIdx] = NextCycle;
    2228             :         }
    2229             :       }
    2230             :     }
    2231             :   }
    2232             :   // Update ExpectedLatency and DependentLatency.
    2233     2067556 :   unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
    2234     2067556 :   unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
    2235     2067556 :   if (SU->getDepth() > TopLatency) {
    2236      411545 :     TopLatency = SU->getDepth();
    2237             :     DEBUG(dbgs() << "  " << Available.getName()
    2238             :           << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
    2239             :   }
    2240     2067556 :   if (SU->getHeight() > BotLatency) {
    2241      708056 :     BotLatency = SU->getHeight();
    2242             :     DEBUG(dbgs() << "  " << Available.getName()
    2243             :           << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
    2244             :   }
    2245             :   // If we stall for any reason, bump the cycle.
    2246     2067556 :   if (NextCycle > CurrCycle) {
    2247       17866 :     bumpCycle(NextCycle);
    2248             :   } else {
    2249             :     // After updating ZoneCritResIdx and ExpectedLatency, check if we're
    2250             :     // resource limited. If a stall occurred, bumpCycle does this.
    2251     2049690 :     unsigned LFactor = SchedModel->getLatencyFactor();
    2252     2049690 :     IsResourceLimited =
    2253     4099380 :       (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
    2254     2049690 :       > (int)LFactor;
    2255             :   }
    2256             :   // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
    2257             :   // resets CurrMOps. Loop to handle instructions with more MOps than issue in
    2258             :   // one cycle.  Since we commonly reach the max MOps here, opportunistically
    2259             :   // bump the cycle to avoid uselessly checking everything in the readyQ.
    2260     2067556 :   CurrMOps += IncMOps;
    2261             : 
    2262             :   // Bump the cycle count for issue group constraints.
    2263             :   // This must be done after NextCycle has been adjust for all other stalls.
    2264             :   // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
    2265             :   // currCycle to X.
    2266     4135112 :   if ((isTop() &&  SchedModel->mustEndGroup(SU->getInstr())) ||
    2267     4043910 :       (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
    2268             :     DEBUG(dbgs() << "  Bump cycle to "
    2269             :                  << (isTop() ? "end" : "begin") << " group\n");
    2270           0 :     bumpCycle(++NextCycle);
    2271             :   }
    2272             : 
    2273     3159592 :   while (CurrMOps >= SchedModel->getIssueWidth()) {
    2274             :     DEBUG(dbgs() << "  *** Max MOps " << CurrMOps
    2275             :           << " at cycle " << CurrCycle << '\n');
    2276      546018 :     bumpCycle(++NextCycle);
    2277             :   }
    2278             :   DEBUG(dumpScheduledState());
    2279     2067556 : }
    2280             : 
    2281             : /// Release pending ready nodes in to the available queue. This makes them
    2282             : /// visible to heuristics.
    2283      550788 : void SchedBoundary::releasePending() {
    2284             :   // If the available queue is empty, it is safe to reset MinReadyCycle.
    2285     1101576 :   if (Available.empty())
    2286       87180 :     MinReadyCycle = std::numeric_limits<unsigned>::max();
    2287             : 
    2288             :   // Check to see if any of the pending instructions are ready to issue.  If
    2289             :   // so, add them to the available queue.
    2290      550788 :   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
    2291     1368815 :   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
    2292      811647 :     SUnit *SU = *(Pending.begin()+i);
    2293      270549 :     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
    2294             : 
    2295      270549 :     if (ReadyCycle < MinReadyCycle)
    2296       93370 :       MinReadyCycle = ReadyCycle;
    2297             : 
    2298      270549 :     if (!IsBuffered && ReadyCycle > CurrCycle)
    2299       50630 :       continue;
    2300             : 
    2301      219919 :     if (checkHazard(SU))
    2302         145 :       continue;
    2303             : 
    2304      659322 :     if (Available.size() >= ReadyListLimit)
    2305             :       break;
    2306             : 
    2307      432928 :     Available.push(SU);
    2308      865856 :     Pending.remove(Pending.begin()+i);
    2309      216464 :     --i; --e;
    2310             :   }
    2311      550788 :   CheckPending = false;
    2312      550788 : }
    2313             : 
    2314             : /// Remove SU from the ready set for this boundary.
    2315     3013711 : void SchedBoundary::removeReady(SUnit *SU) {
    2316     6027422 :   if (Available.isInQueue(SU))
    2317     6008336 :     Available.remove(Available.find(SU));
    2318             :   else {
    2319             :     assert(Pending.isInQueue(SU) && "bad ready count");
    2320       19086 :     Pending.remove(Pending.find(SU));
    2321             :   }
    2322     3013711 : }
    2323             : 
    2324             : /// If this queue only has one ready candidate, return it. As a side effect,
    2325             : /// defer any nodes that now hit a hazard, and advance the cycle until at least
    2326             : /// one node is ready. If multiple instructions are ready, return NULL.
    2327     2320007 : SUnit *SchedBoundary::pickOnlyChoice() {
    2328     2320007 :   if (CheckPending)
    2329      479163 :     releasePending();
    2330             : 
    2331     2320007 :   if (CurrMOps > 0) {
    2332             :     // Defer any ready instrs that now have a hazard.
    2333    14698850 :     for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
    2334     5765640 :       if (checkHazard(*I)) {
    2335      244026 :         Pending.push(*I);
    2336      244026 :         I = Available.remove(I);
    2337      122013 :         continue;
    2338             :       }
    2339             :       ++I;
    2340             :     }
    2341             :   }
    2342     2534882 :   for (unsigned i = 0; Available.empty(); ++i) {
    2343             : //  FIXME: Re-enable assert once PR20057 is resolved.
    2344             : //    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
    2345             : //           "permanent hazard");
    2346             :     (void)i;
    2347       71625 :     bumpCycle(CurrCycle + 1);
    2348       71625 :     releasePending();
    2349             :   }
    2350             : 
    2351             :   DEBUG(Pending.dump());
    2352             :   DEBUG(Available.dump());
    2353             : 
    2354     4640014 :   if (Available.size() == 1)
    2355     1598538 :     return *Available.begin();
    2356             :   return nullptr;
    2357             : }
    2358             : 
    2359             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    2360             : // This is useful information to dump after bumpNode.
    2361             : // Note that the Queue contents are more useful before pickNodeFromQueue.
    2362             : LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
    2363             :   unsigned ResFactor;
    2364             :   unsigned ResCount;
    2365             :   if (ZoneCritResIdx) {
    2366             :     ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
    2367             :     ResCount = getResourceCount(ZoneCritResIdx);
    2368             :   } else {
    2369             :     ResFactor = SchedModel->getMicroOpFactor();
    2370             :     ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
    2371             :   }
    2372             :   unsigned LFactor = SchedModel->getLatencyFactor();
    2373             :   dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
    2374             :          << "  Retired: " << RetiredMOps;
    2375             :   dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
    2376             :   dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
    2377             :          << ResCount / ResFactor << " "
    2378             :          << SchedModel->getResourceName(ZoneCritResIdx)
    2379             :          << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
    2380             :          << (IsResourceLimited ? "  - Resource" : "  - Latency")
    2381             :          << " limited.\n";
    2382             : }
    2383             : #endif
    2384             : 
    2385             : //===----------------------------------------------------------------------===//
    2386             : // GenericScheduler - Generic implementation of MachineSchedStrategy.
    2387             : //===----------------------------------------------------------------------===//
    2388             : 
    2389    12990172 : void GenericSchedulerBase::SchedCandidate::
    2390             : initResourceDelta(const ScheduleDAGMI *DAG,
    2391             :                   const TargetSchedModel *SchedModel) {
    2392    12990172 :   if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
    2393             :     return;
    2394             : 
    2395        8992 :   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
    2396       15110 :   for (TargetSchedModel::ProcResIter
    2397       17984 :          PI = SchedModel->getWriteProcResBegin(SC),
    2398       17984 :          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
    2399       15110 :     if (PI->ProcResourceIdx == Policy.ReduceResIdx)
    2400        2202 :       ResDelta.CritResources += PI->Cycles;
    2401       15110 :     if (PI->ProcResourceIdx == Policy.DemandResIdx)
    2402        2573 :       ResDelta.DemandedResources += PI->Cycles;
    2403             :   }
    2404             : }
    2405             : 
    2406             : /// Set the CandPolicy given a scheduling zone given the current resources and
    2407             : /// latencies inside and outside the zone.
    2408      498595 : void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
    2409             :                                      SchedBoundary &CurrZone,
    2410             :                                      SchedBoundary *OtherZone) {
    2411             :   // Apply preemptive heuristics based on the total latency and resources
    2412             :   // inside and outside this zone. Potential stalls should be considered before
    2413             :   // following this policy.
    2414             : 
    2415             :   // Compute remaining latency. We need this both to determine whether the
    2416             :   // overall schedule has become latency-limited and whether the instructions
    2417             :   // outside this zone are resource or latency limited.
    2418             :   //
    2419             :   // The "dependent" latency is updated incrementally during scheduling as the
    2420             :   // max height/depth of scheduled nodes minus the cycles since it was
    2421             :   // scheduled:
    2422             :   //   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
    2423             :   //
    2424             :   // The "independent" latency is the max ready queue depth:
    2425             :   //   ILat = max N.depth for N in Available|Pending
    2426             :   //
    2427             :   // RemainingLatency is the greater of independent and dependent latency.
    2428      498595 :   unsigned RemLatency = CurrZone.getDependentLatency();
    2429      498595 :   RemLatency = std::max(RemLatency,
    2430     1994380 :                         CurrZone.findMaxLatency(CurrZone.Available.elements()));
    2431      498595 :   RemLatency = std::max(RemLatency,
    2432     1994380 :                         CurrZone.findMaxLatency(CurrZone.Pending.elements()));
    2433             : 
    2434             :   // Compute the critical resource outside the zone.
    2435      498595 :   unsigned OtherCritIdx = 0;
    2436             :   unsigned OtherCount =
    2437      498595 :     OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
    2438             : 
    2439      498595 :   bool OtherResLimited = false;
    2440      498595 :   if (SchedModel->hasInstrSchedModel()) {
    2441      326292 :     unsigned LFactor = SchedModel->getLatencyFactor();
    2442      326292 :     OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
    2443             :   }
    2444             :   // Schedule aggressively for latency in PostRA mode. We don't check for
    2445             :   // acyclic latency during PostRA, and highly out-of-order processors will
    2446             :   // skip PostRA scheduling.
    2447      326292 :   if (!OtherResLimited) {
    2448      335690 :     if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
    2449      145939 :       Policy.ReduceLatency |= true;
    2450             :       DEBUG(dbgs() << "  " << CurrZone.Available.getName()
    2451             :             << " RemainingLatency " << RemLatency << " + "
    2452             :             << CurrZone.getCurrCycle() << "c > CritPath "
    2453             :             << Rem.CriticalPath << "\n");
    2454             :     }
    2455             :   }
    2456             :   // If the same resource is limiting inside and outside the zone, do nothing.
    2457      498595 :   if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
    2458      491604 :     return;
    2459             : 
    2460             :   DEBUG(
    2461             :     if (CurrZone.isResourceLimited()) {
    2462             :       dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "
    2463             :              << SchedModel->getResourceName(CurrZone.getZoneCritResIdx())
    2464             :              << "\n";
    2465             :     }
    2466             :     if (OtherResLimited)
    2467             :       dbgs() << "  RemainingLimit: "
    2468             :              << SchedModel->getResourceName(OtherCritIdx) << "\n";
    2469             :     if (!CurrZone.isResourceLimited() && !OtherResLimited)
    2470             :       dbgs() << "  Latency limited both directions.\n");
    2471             : 
    2472        6991 :   if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
    2473         666 :     Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
    2474             : 
    2475        6991 :   if (OtherResLimited)
    2476        1173 :     Policy.DemandResIdx = OtherCritIdx;
    2477             : }
    2478             : 
    2479             : #ifndef NDEBUG
    2480             : const char *GenericSchedulerBase::getReasonStr(
    2481             :   GenericSchedulerBase::CandReason Reason) {
    2482             :   switch (Reason) {
    2483             :   case NoCand:         return "NOCAND    ";
    2484             :   case Only1:          return "ONLY1     ";
    2485             :   case PhysRegCopy:    return "PREG-COPY ";
    2486             :   case RegExcess:      return "REG-EXCESS";
    2487             :   case RegCritical:    return "REG-CRIT  ";
    2488             :   case Stall:          return "STALL     ";
    2489             :   case Cluster:        return "CLUSTER   ";
    2490             :   case Weak:           return "WEAK      ";
    2491             :   case RegMax:         return "REG-MAX   ";
    2492             :   case ResourceReduce: return "RES-REDUCE";
    2493             :   case ResourceDemand: return "RES-DEMAND";
    2494             :   case TopDepthReduce: return "TOP-DEPTH ";
    2495             :   case TopPathReduce:  return "TOP-PATH  ";
    2496             :   case BotHeightReduce:return "BOT-HEIGHT";
    2497             :   case BotPathReduce:  return "BOT-PATH  ";
    2498             :   case NextDefUse:     return "DEF-USE   ";
    2499             :   case NodeOrder:      return "ORDER     ";
    2500             :   };
    2501             :   llvm_unreachable("Unknown reason!");
    2502             : }
    2503             : 
    2504             : void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
    2505             :   PressureChange P;
    2506             :   unsigned ResIdx = 0;
    2507             :   unsigned Latency = 0;
    2508             :   switch (Cand.Reason) {
    2509             :   default:
    2510             :     break;
    2511             :   case RegExcess:
    2512             :     P = Cand.RPDelta.Excess;
    2513             :     break;
    2514             :   case RegCritical:
    2515             :     P = Cand.RPDelta.CriticalMax;
    2516             :     break;
    2517             :   case RegMax:
    2518             :     P = Cand.RPDelta.CurrentMax;
    2519             :     break;
    2520             :   case ResourceReduce:
    2521             :     ResIdx = Cand.Policy.ReduceResIdx;
    2522             :     break;
    2523             :   case ResourceDemand:
    2524             :     ResIdx = Cand.Policy.DemandResIdx;
    2525             :     break;
    2526             :   case TopDepthReduce:
    2527             :     Latency = Cand.SU->getDepth();
    2528             :     break;
    2529             :   case TopPathReduce:
    2530             :     Latency = Cand.SU->getHeight();
    2531             :     break;
    2532             :   case BotHeightReduce:
    2533             :     Latency = Cand.SU->getHeight();
    2534             :     break;
    2535             :   case BotPathReduce:
    2536             :     Latency = Cand.SU->getDepth();
    2537             :     break;
    2538             :   }
    2539             :   dbgs() << "  Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
    2540             :   if (P.isValid())
    2541             :     dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
    2542             :            << ":" << P.getUnitInc() << " ";
    2543             :   else
    2544             :     dbgs() << "      ";
    2545             :   if (ResIdx)
    2546             :     dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
    2547             :   else
    2548             :     dbgs() << "         ";
    2549             :   if (Latency)
    2550             :     dbgs() << " " << Latency << " cycles ";
    2551             :   else
    2552             :     dbgs() << "          ";
    2553             :   dbgs() << '\n';
    2554             : }
    2555             : #endif
    2556             : 
    2557             : /// Return true if this heuristic determines order.
    2558             : static bool tryLess(int TryVal, int CandVal,
    2559             :                     GenericSchedulerBase::SchedCandidate &TryCand,
    2560             :                     GenericSchedulerBase::SchedCandidate &Cand,
    2561             :                     GenericSchedulerBase::CandReason Reason) {
    2562    55682772 :   if (TryVal < CandVal) {
    2563       58665 :     TryCand.Reason = Reason;
    2564             :     return true;
    2565             :   }
    2566    55624107 :   if (TryVal > CandVal) {
    2567      255768 :     if (Cand.Reason > Reason)
    2568       72321 :       Cand.Reason = Reason;
    2569             :     return true;
    2570             :   }
    2571             :   return false;
    2572             : }
    2573             : 
    2574             : static bool tryGreater(int TryVal, int CandVal,
    2575             :                        GenericSchedulerBase::SchedCandidate &TryCand,
    2576             :                        GenericSchedulerBase::SchedCandidate &Cand,
    2577             :                        GenericSchedulerBase::CandReason Reason) {
    2578    58333607 :   if (TryVal > CandVal) {
    2579      271425 :     TryCand.Reason = Reason;
    2580             :     return true;
    2581             :   }
    2582    58062182 :   if (TryVal < CandVal) {
    2583      806325 :     if (Cand.Reason > Reason)
    2584      291750 :       Cand.Reason = Reason;
    2585             :     return true;
    2586             :   }
    2587             :   return false;
    2588             : }
    2589             : 
    2590      762100 : static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
    2591             :                        GenericSchedulerBase::SchedCandidate &Cand,
    2592             :                        SchedBoundary &Zone) {
    2593      762100 :   if (Zone.isTop()) {
    2594      525843 :     if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
    2595        5264 :       if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
    2596             :                   TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
    2597             :         return true;
    2598             :     }
    2599      559214 :     if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
    2600             :                    TryCand, Cand, GenericSchedulerBase::TopPathReduce))
    2601             :       return true;
    2602             :   } else {
    2603     1760457 :     if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
    2604       28392 :       if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
    2605             :                   TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
    2606             :         return true;
    2607             :     }
    2608     1822207 :     if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
    2609             :                    TryCand, Cand, GenericSchedulerBase::BotPathReduce))
    2610             :       return true;
    2611             :   }
    2612             :   return false;
    2613             : }
    2614             : 
    2615             : static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
    2616             :   DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
    2617             :         << GenericSchedulerBase::getReasonStr(Reason) << '\n');
    2618             : }
    2619             : 
    2620             : static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
    2621     1058629 :   tracePick(Cand.Reason, Cand.AtTop);
    2622             : }
    2623             : 
    2624      343499 : void GenericScheduler::initialize(ScheduleDAGMI *dag) {
    2625             :   assert(dag->hasVRegLiveness() &&
    2626             :          "(PreRA)GenericScheduler needs vreg liveness");
    2627      343499 :   DAG = static_cast<ScheduleDAGMILive*>(dag);
    2628      686998 :   SchedModel = DAG->getSchedModel();
    2629      343499 :   TRI = DAG->TRI;
    2630             : 
    2631      343499 :   Rem.init(DAG, SchedModel);
    2632      343499 :   Top.init(DAG, SchedModel, &Rem);
    2633      343499 :   Bot.init(DAG, SchedModel, &Rem);
    2634             : 
    2635             :   // Initialize resource counts.
    2636             : 
    2637             :   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
    2638             :   // are disabled, then these HazardRecs will be disabled.
    2639      686998 :   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
    2640      343499 :   if (!Top.HazardRec) {
    2641       97633 :     Top.HazardRec =
    2642      195266 :         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
    2643       97633 :             Itin, DAG);
    2644             :   }
    2645      343499 :   if (!Bot.HazardRec) {
    2646       97633 :     Bot.HazardRec =
    2647      195266 :         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
    2648       97633 :             Itin, DAG);
    2649             :   }
    2650      343499 :   TopCand.SU = nullptr;
    2651      343499 :   BotCand.SU = nullptr;
    2652      343499 : }
    2653             : 
    2654             : /// Initialize the per-region scheduling policy.
    2655     1033169 : void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
    2656             :                                   MachineBasicBlock::iterator End,
    2657             :                                   unsigned NumRegionInstrs) {
    2658     1033169 :   const MachineFunction &MF = *Begin->getParent()->getParent();
    2659     1033169 :   const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
    2660             : 
    2661             :   // Avoid setting up the register pressure tracker for small regions to save
    2662             :   // compile time. As a rough heuristic, only track pressure when the number of
    2663             :   // schedulable instructions exceeds half the integer register file.
    2664     1033169 :   RegionPolicy.ShouldTrackPressure = true;
    2665     4132676 :   for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
    2666     3099507 :     MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
    2667     6317169 :     if (TLI->isTypeLegal(LegalIntVT)) {
    2668    11925408 :       unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
    2669     5962704 :         TLI->getRegClassFor(LegalIntVT));
    2670     2981352 :       RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
    2671             :     }
    2672             :   }
    2673             : 
    2674             :   // For generic targets, we default to bottom-up, because it's simpler and more
    2675             :   // compile-time optimizations have been implemented in that direction.
    2676     1033169 :   RegionPolicy.OnlyBottomUp = true;
    2677             : 
    2678             :   // Allow the subtarget to override default policy.
    2679     1033169 :   MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
    2680             : 
    2681             :   // After subtarget overrides, apply command line options.
    2682     1033169 :   if (!EnableRegPressure)
    2683           0 :     RegionPolicy.ShouldTrackPressure = false;
    2684             : 
    2685             :   // Check -misched-topdown/bottomup can force or unforce scheduling direction.
    2686             :   // e.g. -misched-bottomup=false allows scheduling in both directions.
    2687             :   assert((!ForceTopDown || !ForceBottomUp) &&
    2688             :          "-misched-topdown incompatible with -misched-bottomup");
    2689     1033169 :   if (ForceBottomUp.getNumOccurrences() > 0) {
    2690           0 :     RegionPolicy.OnlyBottomUp = ForceBottomUp;
    2691           0 :     if (RegionPolicy.OnlyBottomUp)
    2692           0 :       RegionPolicy.OnlyTopDown = false;
    2693             :   }
    2694     1033169 :   if (ForceTopDown.getNumOccurrences() > 0) {
    2695           4 :     RegionPolicy.OnlyTopDown = ForceTopDown;
    2696           4 :     if (RegionPolicy.OnlyTopDown)
    2697           4 :       RegionPolicy.OnlyBottomUp = false;
    2698             :   }
    2699     1033169 : }
    2700             : 
    2701           0 : void GenericScheduler::dumpPolicy() const {
    2702             :   // Cannot completely remove virtual function even in release mode.
    2703             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    2704             :   dbgs() << "GenericScheduler RegionPolicy: "
    2705             :          << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
    2706             :          << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
    2707             :          << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
    2708             :          << "\n";
    2709             : #endif
    2710           0 : }
    2711             : 
    2712             : /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
    2713             : /// critical path by more cycles than it takes to drain the instruction buffer.
    2714             : /// We estimate an upper bounds on in-flight instructions as:
    2715             : ///
    2716             : /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
    2717             : /// InFlightIterations = AcyclicPath / CyclesPerIteration
    2718             : /// InFlightResources = InFlightIterations * LoopResources
    2719             : ///
    2720             : /// TODO: Check execution resources in addition to IssueCount.
    2721      320484 : void GenericScheduler::checkAcyclicLatency() {
    2722      320484 :   if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
    2723             :     return;
    2724             : 
    2725             :   // Scaled number of cycles per loop iteration.
    2726             :   unsigned IterCount =
    2727         666 :     std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
    2728         666 :              Rem.RemIssueCount);
    2729             :   // Scaled acyclic critical path.
    2730         333 :   unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
    2731             :   // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
    2732         333 :   unsigned InFlightCount =
    2733         333 :     (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
    2734             :   unsigned BufferLimit =
    2735         333 :     SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
    2736             : 
    2737         333 :   Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
    2738             : 
    2739             :   DEBUG(dbgs() << "IssueCycles="
    2740             :         << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
    2741             :         << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
    2742             :         << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
    2743             :         << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
    2744             :         << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
    2745             :         if (Rem.IsAcyclicLatencyLimited)
    2746             :           dbgs() << "  ACYCLIC LATENCY LIMIT\n");
    2747             : }
    2748             : 
    2749      343499 : void GenericScheduler::registerRoots() {
    2750      686998 :   Rem.CriticalPath = DAG->ExitSU.getDepth();
    2751             : 
    2752             :   // Some roots may not feed into ExitSU. Check all of them in case.
    2753     2133054 :   for (const SUnit *SU : Bot.Available) {
    2754      759058 :     if (SU->getDepth() > Rem.CriticalPath)
    2755       46017 :       Rem.CriticalPath = SU->getDepth();
    2756             :   }
    2757             :   DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
    2758      343499 :   if (DumpCriticalPathLength) {
    2759           0 :     errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
    2760             :   }
    2761             : 
    2762      343499 :   if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
    2763      320484 :     Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
    2764      320484 :     checkAcyclicLatency();
    2765             :   }
    2766      343499 : }
    2767             : 
    2768    27989579 : static bool tryPressure(const PressureChange &TryP,
    2769             :                         const PressureChange &CandP,
    2770             :                         GenericSchedulerBase::SchedCandidate &TryCand,
    2771             :                         GenericSchedulerBase::SchedCandidate &Cand,
    2772             :                         GenericSchedulerBase::CandReason Reason,
    2773             :                         const TargetRegisterInfo *TRI,
    2774             :                         const MachineFunction &MF) {
    2775             :   // If one candidate decreases and the other increases, go with it.
    2776             :   // Invalid candidates have UnitInc==0.
    2777    83977080 :   if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
    2778             :                  Reason)) {
    2779             :     return true;
    2780             :   }
    2781             :   // Do not compare the magnitude of pressure changes between top and bottom
    2782             :   // boundary.
    2783    27970272 :   if (Cand.AtTop != TryCand.AtTop)
    2784             :     return false;
    2785             : 
    2786             :   // If both candidates affect the same set in the same boundary, go with the
    2787             :   // smallest increase.
    2788    55380234 :   unsigned TryPSet = TryP.getPSetOrMax();
    2789    55380234 :   unsigned CandPSet = CandP.getPSetOrMax();
    2790    27690117 :   if (TryPSet == CandPSet) {
    2791    27410081 :     return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
    2792             :                    Reason);
    2793             :   }
    2794             : 
    2795      280036 :   int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
    2796      280036 :                                  std::numeric_limits<int>::max();
    2797             : 
    2798      280036 :   int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
    2799      280036 :                                    std::numeric_limits<int>::max();
    2800             : 
    2801             :   // If the candidates are decreasing pressure, reverse priority.
    2802      280036 :   if (TryP.getUnitInc() < 0)
    2803             :     std::swap(TryRank, CandRank);
    2804      280036 :   return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
    2805             : }
    2806             : 
    2807             : static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
    2808    18931342 :   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
    2809             : }
    2810             : 
    2811             : /// Minimize physical register live ranges. Regalloc wants them adjacent to
    2812             : /// their physreg def/use.
    2813             : ///
    2814             : /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
    2815             : /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
    2816             : /// with the operation that produces or consumes the physreg. We'll do this when
    2817             : /// regalloc has support for parallel copies.
    2818    20866552 : static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
    2819    20866552 :   const MachineInstr *MI = SU->getInstr();
    2820    20866552 :   if (!MI->isCopy())
    2821             :     return 0;
    2822             : 
    2823     2423086 :   unsigned ScheduledOper = isTop ? 1 : 0;
    2824     2423086 :   unsigned UnscheduledOper = isTop ? 0 : 1;
    2825             :   // If we have already scheduled the physreg produce/consumer, immediately
    2826             :   // schedule the copy.
    2827     4846172 :   if (TargetRegisterInfo::isPhysicalRegister(
    2828     4846172 :         MI->getOperand(ScheduledOper).getReg()))
    2829             :     return 1;
    2830             :   // If the physreg is at the boundary, defer it. Otherwise schedule it
    2831             :   // immediately to free the dependent. We can hoist the copy later.
    2832     1589323 :   bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
    2833     3178646 :   if (TargetRegisterInfo::isPhysicalRegister(
    2834     3178646 :         MI->getOperand(UnscheduledOper).getReg()))
    2835      484930 :     return AtBoundary ? -1 : 1;
    2836             :   return 0;
    2837             : }
    2838             : 
    2839     8806327 : void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
    2840             :                                      bool AtTop,
    2841             :                                      const RegPressureTracker &RPTracker,
    2842             :                                      RegPressureTracker &TempTracker) {
    2843     8806327 :   Cand.SU = SU;
    2844     8806327 :   Cand.AtTop = AtTop;
    2845     8806327 :   if (DAG->isTrackingPressure()) {
    2846     7743689 :     if (AtTop) {
    2847      257915 :       TempTracker.getMaxDownwardPressureDelta(
    2848       51583 :         Cand.SU->getInstr(),
    2849             :         Cand.RPDelta,
    2850       51583 :         DAG->getRegionCriticalPSets(),
    2851       51583 :         DAG->getRegPressure().MaxSetPressure);
    2852             :     } else {
    2853     7692106 :       if (VerifyScheduling) {
    2854          95 :         TempTracker.getMaxUpwardPressureDelta(
    2855          19 :           Cand.SU->getInstr(),
    2856          38 :           &DAG->getPressureDiff(Cand.SU),
    2857             :           Cand.RPDelta,
    2858          19 :           DAG->getRegionCriticalPSets(),
    2859          19 :           DAG->getRegPressure().MaxSetPressure);
    2860             :       } else {
    2861    46152522 :         RPTracker.getUpwardPressureDelta(
    2862     7692087 :           Cand.SU->getInstr(),
    2863             :           DAG->getPressureDiff(Cand.SU),
    2864             :           Cand.RPDelta,
    2865     7692087 :           DAG->getRegionCriticalPSets(),
    2866     7692087 :           DAG->getRegPressure().MaxSetPressure);
    2867             :       }
    2868             :     }
    2869             :   }
    2870             :   DEBUG(if (Cand.RPDelta.Excess.isValid())
    2871             :           dbgs() << "  Try  SU(" << Cand.SU->NodeNum << ") "
    2872             :                  << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet())
    2873             :                  << ":" << Cand.RPDelta.Excess.getUnitInc() << "\n");
    2874     8806327 : }
    2875             : 
    2876             : /// Apply a set of heursitics to a new candidate. Heuristics are currently
    2877             : /// hierarchical. This may be more efficient than a graduated cost model because
    2878             : /// we don't need to evaluate all aspects of the model for each node in the
    2879             : /// queue. But it's really done to make the heuristics easier to debug and
    2880             : /// statistically analyze.
    2881             : ///
    2882             : /// \param Cand provides the policy and current best candidate.
    2883             : /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
    2884             : /// \param Zone describes the scheduled zone that we are extending, or nullptr
    2885             : //              if Cand is from a different zone than TryCand.
    2886    11755979 : void GenericScheduler::tryCandidate(SchedCandidate &Cand,
    2887             :                                     SchedCandidate &TryCand,
    2888             :                                     SchedBoundary *Zone) {
    2889             :   // Initialize the candidate if needed.
    2890    11755979 :   if (!Cand.isValid()) {
    2891     1322703 :     TryCand.Reason = NodeOrder;
    2892     1322703 :     return;
    2893             :   }
    2894             : 
    2895    10745193 :   if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
    2896    10433276 :                  biasPhysRegCopy(Cand.SU, Cand.AtTop),
    2897             :                  TryCand, Cand, PhysRegCopy))
    2898             :     return;
    2899             : 
    2900             :   // Avoid exceeding the target's limit.
    2901    19511093 :   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
    2902             :                                                Cand.RPDelta.Excess,
    2903             :                                                TryCand, Cand, RegExcess, TRI,
    2904     9535147 :                                                DAG->MF))
    2905             :     return;
    2906             : 
    2907             :   // Avoid increasing the max critical pressure in the scheduled region.
    2908    19365415 :   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
    2909             :                                                Cand.RPDelta.CriticalMax,
    2910             :                                                TryCand, Cand, RegCritical, TRI,
    2911     9462308 :                                                DAG->MF))
    2912             :     return;
    2913             : 
    2914             :   // We only compare a subset of features when comparing nodes between
    2915             :   // Top and Bottom boundary. Some properties are simply incomparable, in many
    2916             :   // other instances we should only override the other boundary if something
    2917             :   // is a clear good pick on one boundary. Skip heuristics that are more
    2918             :   // "tie-breaking" in nature.
    2919     9625535 :   bool SameBoundary = Zone != nullptr;
    2920     9625535 :   if (SameBoundary) {
    2921             :     // For loops that are acyclic path limited, aggressively schedule for
    2922             :     // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
    2923             :     // heuristics to take precedence.
    2924     9527704 :     if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
    2925         889 :         tryLatency(TryCand, Cand, *Zone))
    2926             :       return;
    2927             : 
    2928             :     // Prioritize instructions that read unbuffered resources by stall cycles.
    2929     9540232 :     if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
    2930     9526128 :                 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
    2931             :       return;
    2932             :   }
    2933             : 
    2934             :   // Keep clustered nodes together to encourage downstream peephole
    2935             :   // optimizations which may reduce resource requirements.
    2936             :   //
    2937             :   // This is a best effort to set things up for a post-RA pass. Optimizations
    2938             :   // like generating loads of multiple registers should ideally be done within
    2939             :   // the scheduler pass by combining the loads during DAG postprocessing.
    2940             :   const SUnit *CandNextClusterSU =
    2941     9601430 :     Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
    2942             :   const SUnit *TryCandNextClusterSU =
    2943     9601430 :     TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
    2944     9617942 :   if (tryGreater(TryCand.SU == TryCandNextClusterSU,
    2945     9601430 :                  Cand.SU == CandNextClusterSU,
    2946             :                  TryCand, Cand, Cluster))
    2947             :     return;
    2948             : 
    2949     9564047 :   if (SameBoundary) {
    2950             :     // Weak edges are for clustering and other constraints.
    2951    19021272 :     if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
    2952    18931342 :                 getWeakLeft(Cand.SU, Cand.AtTop),
    2953             :                 TryCand, Cand, Weak))
    2954             :       return;
    2955             :   }
    2956             : 
    2957             :   // Avoid increasing the max pressure of the entire region.
    2958    18423441 :   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
    2959             :                                                Cand.RPDelta.CurrentMax,
    2960             :                                                TryCand, Cand, RegMax, TRI,
    2961     8992124 :                                                DAG->MF))
    2962             :     return;
    2963             : 
    2964     9327575 :   if (SameBoundary) {
    2965             :     // Avoid critical resource consumption and balance the schedule.
    2966     9229199 :     TryCand.initResourceDelta(DAG, SchedModel);
    2967     9229245 :     if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
    2968             :                 TryCand, Cand, ResourceReduce))
    2969             :       return;
    2970     9229165 :     if (tryGreater(TryCand.ResDelta.DemandedResources,
    2971     9229131 :                    Cand.ResDelta.DemandedResources,
    2972             :                    TryCand, Cand, ResourceDemand))
    2973             :       return;
    2974             : 
    2975             :     // Avoid serializing long latency dependence chains.
    2976             :     // For acyclic path limited loops, latency was already checked above.
    2977    19197137 :     if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
    2978    10710348 :         !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
    2979             :       return;
    2980             : 
    2981             :     // Fall through to original instruction order.
    2982     9169094 :     if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
    2983    17814931 :         || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
    2984     2103761 :       TryCand.Reason = NodeOrder;
    2985             :     }
    2986             :   }
    2987             : }
    2988             : 
    2989             : /// Pick the best candidate from the queue.
    2990             : ///
    2991             : /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
    2992             : /// DAG building. To adjust for the current scheduling location we need to
    2993             : /// maintain the number of vreg uses remaining to be top-scheduled.
    2994     1064040 : void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
    2995             :                                          const CandPolicy &ZonePolicy,
    2996             :                                          const RegPressureTracker &RPTracker,
    2997             :                                          SchedCandidate &Cand) {
    2998             :   // getMaxPressureDelta temporarily modifies the tracker.
    2999     1064040 :   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
    3000             : 
    3001     1064040 :   ReadyQueue &Q = Zone.Available;
    3002    13062487 :   for (SUnit *SU : Q) {
    3003             : 
    3004     8806327 :     SchedCandidate TryCand(ZonePolicy);
    3005     8806327 :     initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
    3006             :     // Pass SchedBoundary only when comparing nodes from the same boundary.
    3007     8806327 :     SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
    3008     8806327 :     tryCandidate(Cand, TryCand, ZoneArg);
    3009     8806327 :     if (TryCand.Reason != NoCand) {
    3010             :       // Initialize resource delta if needed in case future heuristics query it.
    3011     5505902 :       if (TryCand.ResDelta == SchedResourceDelta())
    3012     2752032 :         TryCand.initResourceDelta(DAG, SchedModel);
    3013             :       Cand.setBest(TryCand);
    3014             :       DEBUG(traceCandidate(Cand));
    3015             :     }
    3016             :   }
    3017     1064040 : }
    3018             : 
    3019             : /// Pick the best candidate node from either the top or bottom queue.
    3020      100710 : SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
    3021             :   // Schedule as far as possible in the direction of no choice. This is most
    3022             :   // efficient, but also provides the best heuristics for CriticalPSets.
    3023      100710 :   if (SUnit *SU = Bot.pickOnlyChoice()) {
    3024       62950 :     IsTopNode = false;
    3025       62950 :     tracePick(Only1, false);
    3026       62950 :     return SU;
    3027             :   }
    3028       37760 :   if (SUnit *SU = Top.pickOnlyChoice()) {
    3029        2240 :     IsTopNode = true;
    3030        2240 :     tracePick(Only1, true);
    3031        2240 :     return SU;
    3032             :   }
    3033             :   // Set the bottom-up policy based on the state of the current bottom zone and
    3034             :   // the instructions outside the zone, including the top zone.
    3035       35520 :   CandPolicy BotPolicy;
    3036       35520 :   setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
    3037             :   // Set the top-down policy based on the state of the current top zone and
    3038             :   // the instructions outside the zone, including the bottom zone.
    3039       35520 :   CandPolicy TopPolicy;
    3040       35520 :   setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
    3041             : 
    3042             :   // See if BotCand is still valid (because we previously scheduled from Top).
    3043             :   DEBUG(dbgs() << "Picking from Bot:\n");
    3044       35520 :   if (!BotCand.isValid() || BotCand.SU->isScheduled ||
    3045        9113 :       BotCand.Policy != BotPolicy) {
    3046       53932 :     BotCand.reset(CandPolicy());
    3047       53932 :     pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
    3048             :     assert(BotCand.Reason != NoCand && "failed to find the first candidate");
    3049             :   } else {
    3050             :     DEBUG(traceCandidate(BotCand));
    3051             : #ifndef NDEBUG
    3052             :     if (VerifyScheduling) {
    3053             :       SchedCandidate TCand;
    3054             :       TCand.reset(CandPolicy());
    3055             :       pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
    3056             :       assert(TCand.SU == BotCand.SU &&
    3057             :              "Last pick result should correspond to re-picking right now");
    3058             :     }
    3059             : #endif
    3060             :   }
    3061             : 
    3062             :   // Check if the top Q has a better candidate.
    3063             :   DEBUG(dbgs() << "Picking from Top:\n");
    3064       35520 :   if (!TopCand.isValid() || TopCand.SU->isScheduled ||
    3065       14089 :       TopCand.Policy != TopPolicy) {
    3066       44408 :     TopCand.reset(CandPolicy());
    3067       44408 :     pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
    3068             :     assert(TopCand.Reason != NoCand && "failed to find the first candidate");
    3069             :   } else {
    3070             :     DEBUG(traceCandidate(TopCand));
    3071             : #ifndef NDEBUG
    3072             :     if (VerifyScheduling) {
    3073             :       SchedCandidate TCand;
    3074             :       TCand.reset(CandPolicy());
    3075             :       pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
    3076             :       assert(TCand.SU == TopCand.SU &&
    3077             :            "Last pick result should correspond to re-picking right now");
    3078             :     }
    3079             : #endif
    3080             :   }
    3081             : 
    3082             :   // Pick best from BotCand and TopCand.
    3083             :   assert(BotCand.isValid());
    3084             :   assert(TopCand.isValid());
    3085       35520 :   SchedCandidate Cand = BotCand;
    3086       35520 :   TopCand.Reason = NoCand;
    3087       35520 :   tryCandidate(Cand, TopCand, nullptr);
    3088       35520 :   if (TopCand.Reason != NoCand) {
    3089       16097 :     Cand.setBest(TopCand);
    3090             :     DEBUG(traceCandidate(Cand));
    3091             :   }
    3092             : 
    3093       35520 :   IsTopNode = Cand.AtTop;
    3094       35520 :   tracePick(Cand);
    3095       35520 :   return Cand.SU;
    3096             : }
    3097             : 
    3098             : /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
    3099     2102767 : SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
    3100     4205534 :   if (DAG->top() == DAG->bottom()) {
    3101             :     assert(Top.Available.empty() && Top.Pending.empty() &&
    3102             :            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
    3103             :     return nullptr;
    3104             :   }
    3105             :   SUnit *SU;
    3106             :   do {
    3107     1775254 :     if (RegionPolicy.OnlyTopDown) {
    3108          54 :       SU = Top.pickOnlyChoice();
    3109          54 :       if (!SU) {
    3110          46 :         CandPolicy NoPolicy;
    3111          92 :         TopCand.reset(NoPolicy);
    3112          92 :         pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
    3113             :         assert(TopCand.Reason != NoCand && "failed to find a candidate");
    3114          46 :         tracePick(TopCand);
    3115          46 :         SU = TopCand.SU;
    3116             :       }
    3117          54 :       IsTopNode = true;
    3118     1775200 :     } else if (RegionPolicy.OnlyBottomUp) {
    3119     1674490 :       SU = Bot.pickOnlyChoice();
    3120     1674490 :       if (!SU) {
    3121     1014824 :         CandPolicy NoPolicy;
    3122     2029648 :         BotCand.reset(NoPolicy);
    3123     2029648 :         pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
    3124             :         assert(BotCand.Reason != NoCand && "failed to find a candidate");
    3125     1014824 :         tracePick(BotCand);
    3126     1014824 :         SU = BotCand.SU;
    3127             :       }
    3128     1674490 :       IsTopNode = false;
    3129             :     } else {
    3130      100710 :       SU = pickNodeBidirectional(IsTopNode);
    3131             :     }
    3132     1775254 :   } while (SU->isScheduled);
    3133             : 
    3134     1775254 :   if (SU->isTopReady())
    3135      878553 :     Top.removeReady(SU);
    3136     1775254 :   if (SU->isBottomReady())
    3137     1766586 :     Bot.removeReady(SU);
    3138             : 
    3139             :   DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
    3140             :   return SU;
    3141             : }
    3142             : 
    3143      143118 : void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
    3144      286236 :   MachineBasicBlock::iterator InsertPos = SU->getInstr();
    3145      143118 :   if (!isTop)
    3146             :     ++InsertPos;
    3147      143118 :   SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
    3148             : 
    3149             :   // Find already scheduled copies with a single physreg dependence and move
    3150             :   // them just above the scheduled instruction.
    3151      824472 :   for (SDep &Dep : Deps) {
    3152      520029 :     if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg()))
    3153      284300 :       continue;
    3154      110818 :     SUnit *DepSU = Dep.getSUnit();
    3155      221636 :     if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
    3156      105075 :       continue;
    3157        5743 :     MachineInstr *Copy = DepSU->getInstr();
    3158        5743 :     if (!Copy->isCopy())
    3159        3003 :       continue;
    3160             :     DEBUG(dbgs() << "  Rescheduling physreg copy ";
    3161             :           Dep.getSUnit()->dump(DAG));
    3162        2740 :     DAG->moveInstruction(Copy, InsertPos);
    3163             :   }
    3164      143118 : }
    3165             : 
    3166             : /// Update the scheduler's state after scheduling a node. This is the same node
    3167             : /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
    3168             : /// update it's state based on the current cycle before MachineSchedStrategy
    3169             : /// does.
    3170             : ///
    3171             : /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
    3172             : /// them here. See comments in biasPhysRegCopy.
    3173     2042133 : void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
    3174     2042133 :   if (IsTopNode) {
    3175      131558 :     SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
    3176       65779 :     Top.bumpNode(SU);
    3177       65779 :     if (SU->hasPhysRegUses)
    3178       38560 :       reschedulePhysRegCopies(SU, true);
    3179             :   } else {
    3180     3952708 :     SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
    3181     1976354 :     Bot.bumpNode(SU);
    3182     1976354 :     if (SU->hasPhysRegDefs)
    3183      104558 :       reschedulePhysRegCopies(SU, false);
    3184             :   }
    3185     2042133 : }
    3186             : 
    3187             : /// Create the standard converging machine scheduler. This will be used as the
    3188             : /// default scheduler if the target does not set a default.
    3189       85584 : ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
    3190             :   ScheduleDAGMILive *DAG =
    3191      427920 :       new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
    3192             :   // Register DAG post-processors.
    3193             :   //
    3194             :   // FIXME: extend the mutation API to allow earlier mutations to instantiate
    3195             :   // data and pass it to later mutations. Have a single mutation that gathers
    3196             :   // the interesting nodes in one pass.
    3197      256752 :   DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
    3198       85584 :   return DAG;
    3199             : }
    3200             : 
    3201           0 : static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) {
    3202           0 :   return createGenericSchedLive(C);
    3203             : }
    3204             : 
    3205             : static MachineSchedRegistry
    3206       72306 : GenericSchedRegistry("converge", "Standard converging scheduler.",
    3207       72306 :                      createConveringSched);
    3208             : 
    3209             : //===----------------------------------------------------------------------===//
    3210             : // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
    3211             : //===----------------------------------------------------------------------===//
    3212             : 
    3213        6062 : void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
    3214        6062 :   DAG = Dag;
    3215       12124 :   SchedModel = DAG->getSchedModel();
    3216        6062 :   TRI = DAG->TRI;
    3217             : 
    3218        6062 :   Rem.init(DAG, SchedModel);
    3219        6062 :   Top.init(DAG, SchedModel, &Rem);
    3220       12124 :   BotRoots.clear();
    3221             : 
    3222             :   // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
    3223             :   // or are disabled, then these HazardRecs will be disabled.
    3224       12124 :   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
    3225        6062 :   if (!Top.HazardRec) {
    3226        5114 :     Top.HazardRec =
    3227       10228 :         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
    3228        5114 :             Itin, DAG);
    3229             :   }
    3230        6062 : }
    3231             : 
    3232        6062 : void PostGenericScheduler::registerRoots() {
    3233       12124 :   Rem.CriticalPath = DAG->ExitSU.getDepth();
    3234             : 
    3235             :   // Some roots may not feed into ExitSU. Check all of them in case.
    3236       26198 :   for (const SUnit *SU : BotRoots) {
    3237        8012 :     if (SU->getDepth() > Rem.CriticalPath)
    3238         986 :       Rem.CriticalPath = SU->getDepth();
    3239             :   }
    3240             :   DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
    3241        6062 :   if (DumpCriticalPathLength) {
    3242           0 :     errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
    3243             :   }
    3244        6062 : }
    3245             : 
    3246             : /// Apply a set of heursitics to a new candidate for PostRA scheduling.
    3247             : ///
    3248             : /// \param Cand provides the policy and current best candidate.
    3249             : /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
    3250       29041 : void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
    3251             :                                         SchedCandidate &TryCand) {
    3252             :   // Initialize the candidate if needed.
    3253       29041 :   if (!Cand.isValid()) {
    3254        8239 :     TryCand.Reason = NodeOrder;
    3255        8239 :     return;
    3256             :   }
    3257             : 
    3258             :   // Prioritize instructions that read unbuffered resources by stall cycles.
    3259       20827 :   if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
    3260       20802 :               Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
    3261             :     return;
    3262             : 
    3263             :   // Keep clustered nodes together.
    3264       20880 :   if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
    3265       20777 :                  Cand.SU == DAG->getNextClusterSucc(),
    3266             :                  TryCand, Cand, Cluster))
    3267             :     return;
    3268             : 
    3269             :   // Avoid critical resource consumption and balance the schedule.
    3270       20695 :   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
    3271             :               TryCand, Cand, ResourceReduce))
    3272             :     return;
    3273       20625 :   if (tryGreater(TryCand.ResDelta.DemandedResources,
    3274       20625 :                  Cand.ResDelta.DemandedResources,
    3275             :                  TryCand, Cand, ResourceDemand))
    3276             :     return;
    3277             : 
    3278             :   // Avoid serializing long latency dependence chains.
    3279       20625 :   if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
    3280             :     return;
    3281             :   }
    3282             : 
    3283             :   // Fall through to original instruction order.
    3284       12539 :   if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
    3285        4693 :     TryCand.Reason = NodeOrder;
    3286             : }
    3287             : 
    3288        8239 : void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
    3289        8239 :   ReadyQueue &Q = Top.Available;
    3290       61997 :   for (SUnit *SU : Q) {
    3291       58082 :     SchedCandidate TryCand(Cand.Policy);
    3292       29041 :     TryCand.SU = SU;
    3293       29041 :     TryCand.AtTop = true;
    3294       29041 :     TryCand.initResourceDelta(DAG, SchedModel);
    3295       29041 :     tryCandidate(Cand, TryCand);
    3296       29041 :     if (TryCand.Reason != NoCand) {
    3297             :       Cand.setBest(TryCand);
    3298             :       DEBUG(traceCandidate(Cand));
    3299             :     }
    3300             :   }
    3301        8239 : }
    3302             : 
    3303             : /// Pick the next node to schedule.
    3304       31485 : SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
    3305       62970 :   if (DAG->top() == DAG->bottom()) {
    3306             :     assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
    3307             :     return nullptr;
    3308             :   }
    3309             :   SUnit *SU;
    3310             :   do {
    3311       25423 :     SU = Top.pickOnlyChoice();
    3312       25423 :     if (SU) {
    3313             :       tracePick(Only1, true);
    3314             :     } else {
    3315        8239 :       CandPolicy NoPolicy;
    3316        8239 :       SchedCandidate TopCand(NoPolicy);
    3317             :       // Set the top-down policy based on the state of the current top zone and
    3318             :       // the instructions outside the zone, including the bottom zone.
    3319        8239 :       setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
    3320        8239 :       pickNodeFromQueue(TopCand);
    3321             :       assert(TopCand.Reason != NoCand && "failed to find a candidate");
    3322        8239 :       tracePick(TopCand);
    3323        8239 :       SU = TopCand.SU;
    3324             :     }
    3325       25423 :   } while (SU->isScheduled);
    3326             : 
    3327       25423 :   IsTopNode = true;
    3328       25423 :   Top.removeReady(SU);
    3329             : 
    3330             :   DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
    3331       25423 :   return SU;
    3332             : }
    3333             : 
    3334             : /// Called after ScheduleDAGMI has scheduled an instruction and updated
    3335             : /// scheduled/remaining flags in the DAG nodes.
    3336       25423 : void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
    3337       50846 :   SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
    3338       25423 :   Top.bumpNode(SU);
    3339       25423 : }
    3340             : 
    3341        8505 : ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
    3342       25515 :   return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
    3343       25515 :                            /*RemoveKillFlags=*/true);
    3344             : }
    3345             : 
    3346             : //===----------------------------------------------------------------------===//
    3347             : // ILP Scheduler. Currently for experimental analysis of heuristics.
    3348             : //===----------------------------------------------------------------------===//
    3349             : 
    3350             : namespace {
    3351             : 
    3352             : /// \brief Order nodes by the ILP metric.
    3353             : struct ILPOrder {
    3354             :   const SchedDFSResult *DFSResult = nullptr;
    3355             :   const BitVector *ScheduledTrees = nullptr;
    3356             :   bool MaximizeILP;
    3357             : 
    3358           8 :   ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
    3359             : 
    3360             :   /// \brief Apply a less-than relation on node priority.
    3361             :   ///
    3362             :   /// (Return true if A comes after B in the Q.)
    3363         300 :   bool operator()(const SUnit *A, const SUnit *B) const {
    3364         600 :     unsigned SchedTreeA = DFSResult->getSubtreeID(A);
    3365         600 :     unsigned SchedTreeB = DFSResult->getSubtreeID(B);
    3366         300 :     if (SchedTreeA != SchedTreeB) {
    3367             :       // Unscheduled trees have lower priority.
    3368         477 :       if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
    3369             :         return ScheduledTrees->test(SchedTreeB);
    3370             : 
    3371             :       // Trees with shallower connections have have lower priority.
    3372         174 :       if (DFSResult->getSubtreeLevel(SchedTreeA)
    3373         116 :           != DFSResult->getSubtreeLevel(SchedTreeB)) {
    3374           0 :         return DFSResult->getSubtreeLevel(SchedTreeA)
    3375           0 :           < DFSResult->getSubtreeLevel(SchedTreeB);
    3376             :       }
    3377             :     }
    3378         199 :     if (MaximizeILP)
    3379         232 :       return DFSResult->getILP(A) < DFSResult->getILP(B);
    3380             :     else
    3381         166 :       return DFSResult->getILP(A) > DFSResult->getILP(B);
    3382             :   }
    3383             : };
    3384             : 
    3385             : /// \brief Schedule based on the ILP metric.
    3386          16 : class ILPScheduler : public MachineSchedStrategy {
    3387             :   ScheduleDAGMILive *DAG = nullptr;
    3388             :   ILPOrder Cmp;
    3389             : 
    3390             :   std::vector<SUnit*> ReadyQ;
    3391             : 
    3392             : public:
    3393          24 :   ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
    3394             : 
    3395          10 :   void initialize(ScheduleDAGMI *dag) override {
    3396             :     assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
    3397          10 :     DAG = static_cast<ScheduleDAGMILive*>(dag);
    3398          10 :     DAG->computeDFSResult();
    3399          10 :     Cmp.DFSResult = DAG->getDFSResult();
    3400          20 :     Cmp.ScheduledTrees = &DAG->getScheduledTrees();
    3401          20 :     ReadyQ.clear();
    3402          10 :   }
    3403             : 
    3404          10 :   void registerRoots() override {
    3405             :     // Restore the heap in ReadyQ with the updated DFS results.
    3406          40 :     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
    3407          10 :   }
    3408             : 
    3409             :   /// Implement MachineSchedStrategy interface.
    3410             :   /// -----------------------------------------
    3411             : 
    3412             :   /// Callback to select the highest priority node from the ready Q.
    3413         186 :   SUnit *pickNode(bool &IsTopNode) override {
    3414         372 :     if (ReadyQ.empty()) return nullptr;
    3415         528 :     std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
    3416         352 :     SUnit *SU = ReadyQ.back();
    3417         176 :     ReadyQ.pop_back();
    3418         176 :     IsTopNode = false;
    3419             :     DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
    3420             :           << " ILP: " << DAG->getDFSResult()->getILP(SU)
    3421             :           << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
    3422             :           << DAG->getDFSResult()->getSubtreeLevel(
    3423             :             DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
    3424             :           << "Scheduling " << *SU->getInstr());
    3425         176 :     return SU;
    3426             :   }
    3427             : 
    3428             :   /// \brief Scheduler callback to notify that a new subtree is scheduled.
    3429          38 :   void scheduleTree(unsigned SubtreeID) override {
    3430         152 :     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
    3431          38 :   }
    3432             : 
    3433             :   /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
    3434             :   /// DFSResults, and resort the priority Q.
    3435         176 :   void schedNode(SUnit *SU, bool IsTopNode) override {
    3436             :     assert(!IsTopNode && "SchedDFSResult needs bottom-up");
    3437         176 :   }
    3438             : 
    3439          64 :   void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
    3440             : 
    3441         176 :   void releaseBottomNode(SUnit *SU) override {
    3442         176 :     ReadyQ.push_back(SU);
    3443         528 :     std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
    3444         176 :   }
    3445             : };
    3446             : 
    3447             : } // end anonymous namespace
    3448             : 
    3449           6 : static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
    3450          24 :   return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
    3451             : }
    3452           2 : static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
    3453           8 :   return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
    3454             : }
    3455             : 
    3456       72306 : static MachineSchedRegistry ILPMaxRegistry(
    3457       72306 :   "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
    3458       72306 : static MachineSchedRegistry ILPMinRegistry(
    3459       72306 :   "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
    3460             : 
    3461             : //===----------------------------------------------------------------------===//
    3462             : // Machine Instruction Shuffler for Correctness Testing
    3463             : //===----------------------------------------------------------------------===//
    3464             : 
    3465             : #ifndef NDEBUG
    3466             : namespace {
    3467             : 
    3468             : /// Apply a less-than relation on the node order, which corresponds to the
    3469             : /// instruction order prior to scheduling. IsReverse implements greater-than.
    3470             : template<bool IsReverse>
    3471             : struct SUnitOrder {
    3472             :   bool operator()(SUnit *A, SUnit *B) const {
    3473             :     if (IsReverse)
    3474             :       return A->NodeNum > B->NodeNum;
    3475             :     else
    3476             :       return A->NodeNum < B->NodeNum;
    3477             :   }
    3478             : };
    3479             : 
    3480             : /// Reorder instructions as much as possible.
    3481             : class InstructionShuffler : public MachineSchedStrategy {
    3482             :   bool IsAlternating;
    3483             :   bool IsTopDown;
    3484             : 
    3485             :   // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
    3486             :   // gives nodes with a higher number higher priority causing the latest
    3487             :   // instructions to be scheduled first.
    3488             :   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
    3489             :     TopQ;
    3490             : 
    3491             :   // When scheduling bottom-up, use greater-than as the queue priority.
    3492             :   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
    3493             :     BottomQ;
    3494             : 
    3495             : public:
    3496             :   InstructionShuffler(bool alternate, bool topdown)
    3497             :     : IsAlternating(alternate), IsTopDown(topdown) {}
    3498             : 
    3499             :   void initialize(ScheduleDAGMI*) override {
    3500             :     TopQ.clear();
    3501             :     BottomQ.clear();
    3502             :   }
    3503             : 
    3504             :   /// Implement MachineSchedStrategy interface.
    3505             :   /// -----------------------------------------
    3506             : 
    3507             :   SUnit *pickNode(bool &IsTopNode) override {
    3508             :     SUnit *SU;
    3509             :     if (IsTopDown) {
    3510             :       do {
    3511             :         if (TopQ.empty()) return nullptr;
    3512             :         SU = TopQ.top();
    3513             :         TopQ.pop();
    3514             :       } while (SU->isScheduled);
    3515             :       IsTopNode = true;
    3516             :     } else {
    3517             :       do {
    3518             :         if (BottomQ.empty()) return nullptr;
    3519             :         SU = BottomQ.top();
    3520             :         BottomQ.pop();
    3521             :       } while (SU->isScheduled);
    3522             :       IsTopNode = false;
    3523             :     }
    3524             :     if (IsAlternating)
    3525             :       IsTopDown = !IsTopDown;
    3526             :     return SU;
    3527             :   }
    3528             : 
    3529             :   void schedNode(SUnit *SU, bool IsTopNode) override {}
    3530             : 
    3531             :   void releaseTopNode(SUnit *SU) override {
    3532             :     TopQ.push(SU);
    3533             :   }
    3534             :   void releaseBottomNode(SUnit *SU) override {
    3535             :     BottomQ.push(SU);
    3536             :   }
    3537             : };
    3538             : 
    3539             : } // end anonymous namespace
    3540             : 
    3541             : static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
    3542             :   bool Alternate = !ForceTopDown && !ForceBottomUp;
    3543             :   bool TopDown = !ForceBottomUp;
    3544             :   assert((TopDown || !ForceTopDown) &&
    3545             :          "-misched-topdown incompatible with -misched-bottomup");
    3546             :   return new ScheduleDAGMILive(
    3547             :       C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
    3548             : }
    3549             : 
    3550             : static MachineSchedRegistry ShufflerRegistry(
    3551             :   "shuffle", "Shuffle machine instructions alternating directions",
    3552             :   createInstructionShuffler);
    3553             : #endif // !NDEBUG
    3554             : 
    3555             : //===----------------------------------------------------------------------===//
    3556             : // GraphWriter support for ScheduleDAGMILive.
    3557             : //===----------------------------------------------------------------------===//
    3558             : 
    3559             : #ifndef NDEBUG
    3560             : namespace llvm {
    3561             : 
    3562             : template<> struct GraphTraits<
    3563             :   ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
    3564             : 
    3565             : template<>
    3566             : struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
    3567             :   DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
    3568             : 
    3569             :   static std::string getGraphName(const ScheduleDAG *G) {
    3570             :     return G->MF.getName();
    3571             :   }
    3572             : 
    3573             :   static bool renderGraphFromBottomUp() {
    3574             :     return true;
    3575             :   }
    3576             : 
    3577             :   static bool isNodeHidden(const SUnit *Node) {
    3578             :     if (ViewMISchedCutoff == 0)
    3579             :       return false;
    3580             :     return (Node->Preds.size() > ViewMISchedCutoff
    3581             :          || Node->Succs.size() > ViewMISchedCutoff);
    3582             :   }
    3583             : 
    3584             :   /// If you want to override the dot attributes printed for a particular
    3585             :   /// edge, override this method.
    3586             :   static std::string getEdgeAttributes(const SUnit *Node,
    3587             :                                        SUnitIterator EI,
    3588             :                                        const ScheduleDAG *Graph) {
    3589             :     if (EI.isArtificialDep())
    3590             :       return "color=cyan,style=dashed";
    3591             :     if (EI.isCtrlDep())
    3592             :       return "color=blue,style=dashed";
    3593             :     return "";
    3594             :   }
    3595             : 
    3596             :   static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
    3597             :     std::string Str;
    3598             :     raw_string_ostream SS(Str);
    3599             :     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
    3600             :     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
    3601             :       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
    3602             :     SS << "SU:" << SU->NodeNum;
    3603             :     if (DFS)
    3604             :       SS << " I:" << DFS->getNumInstrs(SU);
    3605             :     return SS.str();
    3606             :   }
    3607             : 
    3608             :   static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
    3609             :     return G->getGraphNodeLabel(SU);
    3610             :   }
    3611             : 
    3612             :   static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
    3613             :     std::string Str("shape=Mrecord");
    3614             :     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
    3615             :     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
    3616             :       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
    3617             :     if (DFS) {
    3618             :       Str += ",style=filled,fillcolor=\"#";
    3619             :       Str += DOT::getColorString(DFS->getSubtreeID(N));
    3620             :       Str += '"';
    3621             :     }
    3622             :     return Str;
    3623             :   }
    3624             : };
    3625             : 
    3626             : } // end namespace llvm
    3627             : #endif // NDEBUG
    3628             : 
    3629             : /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
    3630             : /// rendered using 'dot'.
    3631           0 : void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
    3632             : #ifndef NDEBUG
    3633             :   ViewGraph(this, Name, false, Title);
    3634             : #else
    3635           0 :   errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
    3636           0 :          << "systems with Graphviz or gv!\n";
    3637             : #endif  // NDEBUG
    3638           0 : }
    3639             : 
    3640             : /// Out-of-line implementation with no arguments is handy for gdb.
    3641           0 : void ScheduleDAGMI::viewGraph() {
    3642           0 :   viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
    3643      216918 : }

Generated by: LCOV version 1.13