LCOV - code coverage report
Current view: top level - lib/CodeGen - MachineScheduler.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 1065 1191 89.4 %
Date: 2018-10-20 13:21:21 Functions: 110 124 88.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // MachineScheduler schedules machine instructions after phi elimination. It
      11             : // preserves LiveIntervals so it can be invoked before register allocation.
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #include "llvm/CodeGen/MachineScheduler.h"
      16             : #include "llvm/ADT/ArrayRef.h"
      17             : #include "llvm/ADT/BitVector.h"
      18             : #include "llvm/ADT/DenseMap.h"
      19             : #include "llvm/ADT/PriorityQueue.h"
      20             : #include "llvm/ADT/STLExtras.h"
      21             : #include "llvm/ADT/SmallVector.h"
      22             : #include "llvm/ADT/iterator_range.h"
      23             : #include "llvm/Analysis/AliasAnalysis.h"
      24             : #include "llvm/CodeGen/LiveInterval.h"
      25             : #include "llvm/CodeGen/LiveIntervals.h"
      26             : #include "llvm/CodeGen/MachineBasicBlock.h"
      27             : #include "llvm/CodeGen/MachineDominators.h"
      28             : #include "llvm/CodeGen/MachineFunction.h"
      29             : #include "llvm/CodeGen/MachineFunctionPass.h"
      30             : #include "llvm/CodeGen/MachineInstr.h"
      31             : #include "llvm/CodeGen/MachineLoopInfo.h"
      32             : #include "llvm/CodeGen/MachineOperand.h"
      33             : #include "llvm/CodeGen/MachinePassRegistry.h"
      34             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      35             : #include "llvm/CodeGen/Passes.h"
      36             : #include "llvm/CodeGen/RegisterClassInfo.h"
      37             : #include "llvm/CodeGen/RegisterPressure.h"
      38             : #include "llvm/CodeGen/ScheduleDAG.h"
      39             : #include "llvm/CodeGen/ScheduleDAGInstrs.h"
      40             : #include "llvm/CodeGen/ScheduleDAGMutation.h"
      41             : #include "llvm/CodeGen/ScheduleDFS.h"
      42             : #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
      43             : #include "llvm/CodeGen/SlotIndexes.h"
      44             : #include "llvm/CodeGen/TargetInstrInfo.h"
      45             : #include "llvm/CodeGen/TargetLowering.h"
      46             : #include "llvm/CodeGen/TargetPassConfig.h"
      47             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      48             : #include "llvm/CodeGen/TargetSchedule.h"
      49             : #include "llvm/CodeGen/TargetSubtargetInfo.h"
      50             : #include "llvm/Config/llvm-config.h"
      51             : #include "llvm/MC/LaneBitmask.h"
      52             : #include "llvm/Pass.h"
      53             : #include "llvm/Support/CommandLine.h"
      54             : #include "llvm/Support/Compiler.h"
      55             : #include "llvm/Support/Debug.h"
      56             : #include "llvm/Support/ErrorHandling.h"
      57             : #include "llvm/Support/GraphWriter.h"
      58             : #include "llvm/Support/MachineValueType.h"
      59             : #include "llvm/Support/raw_ostream.h"
      60             : #include <algorithm>
      61             : #include <cassert>
      62             : #include <cstdint>
      63             : #include <iterator>
      64             : #include <limits>
      65             : #include <memory>
      66             : #include <string>
      67             : #include <tuple>
      68             : #include <utility>
      69             : #include <vector>
      70             : 
      71             : using namespace llvm;
      72             : 
      73             : #define DEBUG_TYPE "machine-scheduler"
      74             : 
      75             : namespace llvm {
      76             : 
      77             : cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
      78             :                            cl::desc("Force top-down list scheduling"));
      79             : cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
      80             :                             cl::desc("Force bottom-up list scheduling"));
      81             : cl::opt<bool>
      82             : DumpCriticalPathLength("misched-dcpl", cl::Hidden,
      83             :                        cl::desc("Print critical path length to stdout"));
      84             : 
      85             : } // end namespace llvm
      86             : 
      87             : #ifndef NDEBUG
      88             : static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
      89             :   cl::desc("Pop up a window to show MISched dags after they are processed"));
      90             : 
      91             : /// In some situations a few uninteresting nodes depend on nearly all other
      92             : /// nodes in the graph, provide a cutoff to hide them.
      93             : static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
      94             :   cl::desc("Hide nodes with more predecessor/successor than cutoff"));
      95             : 
      96             : static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
      97             :   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
      98             : 
      99             : static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
     100             :   cl::desc("Only schedule this function"));
     101             : static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
     102             :                                         cl::desc("Only schedule this MBB#"));
     103             : static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
     104             :                               cl::desc("Print schedule DAGs"));
     105             : #else
     106             : static const bool ViewMISchedDAGs = false;
     107             : static const bool PrintDAGs = false;
     108             : #endif // NDEBUG
     109             : 
     110             : /// Avoid quadratic complexity in unusually large basic blocks by limiting the
     111             : /// size of the ready lists.
     112             : static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
     113             :   cl::desc("Limit ready list to N instructions"), cl::init(256));
     114             : 
     115             : static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
     116             :   cl::desc("Enable register pressure scheduling."), cl::init(true));
     117             : 
     118             : static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
     119             :   cl::desc("Enable cyclic critical path analysis."), cl::init(true));
     120             : 
     121             : static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
     122             :                                         cl::desc("Enable memop clustering."),
     123             :                                         cl::init(true));
     124             : 
     125             : static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
     126             :   cl::desc("Verify machine instrs before and after machine scheduling"));
     127             : 
     128             : // DAG subtrees must have at least this many nodes.
     129             : static const unsigned MinSubtreeSize = 8;
     130             : 
     131             : // Pin the vtables to this file.
     132           0 : void MachineSchedStrategy::anchor() {}
     133             : 
     134           0 : void ScheduleDAGMutation::anchor() {}
     135             : 
     136             : //===----------------------------------------------------------------------===//
     137             : // Machine Instruction Scheduling Pass and Registry
     138             : //===----------------------------------------------------------------------===//
     139             : 
     140       23651 : MachineSchedContext::MachineSchedContext() {
     141       23651 :   RegClassInfo = new RegisterClassInfo();
     142       23651 : }
     143             : 
     144       46974 : MachineSchedContext::~MachineSchedContext() {
     145       23487 :   delete RegClassInfo;
     146       23487 : }
     147           0 : 
     148             : namespace {
     149           0 : 
     150       46974 : /// Base class for a machine scheduler class that can run at any point.
     151       23487 : class MachineSchedulerBase : public MachineSchedContext,
     152       23487 :                              public MachineFunctionPass {
     153             : public:
     154             :   MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
     155             : 
     156             :   void print(raw_ostream &O, const Module* = nullptr) const override;
     157             : 
     158             : protected:
     159             :   void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
     160       23651 : };
     161             : 
     162             : /// MachineScheduler runs after coalescing and before register allocation.
     163             : class MachineScheduler : public MachineSchedulerBase {
     164             : public:
     165             :   MachineScheduler();
     166             : 
     167             :   void getAnalysisUsage(AnalysisUsage &AU) const override;
     168             : 
     169             :   bool runOnMachineFunction(MachineFunction&) override;
     170             : 
     171             :   static char ID; // Class identification, replacement for typeinfo
     172             : 
     173             : protected:
     174             :   ScheduleDAGInstrs *createMachineScheduler();
     175             : };
     176             : 
     177             : /// PostMachineScheduler runs after shortly before code emission.
     178             : class PostMachineScheduler : public MachineSchedulerBase {
     179             : public:
     180             :   PostMachineScheduler();
     181             : 
     182             :   void getAnalysisUsage(AnalysisUsage &AU) const override;
     183             : 
     184             :   bool runOnMachineFunction(MachineFunction&) override;
     185             : 
     186             :   static char ID; // Class identification, replacement for typeinfo
     187             : 
     188             : protected:
     189             :   ScheduleDAGInstrs *createPostMachineScheduler();
     190             : };
     191             : 
     192             : } // end anonymous namespace
     193             : 
     194             : char MachineScheduler::ID = 0;
     195             : 
     196             : char &llvm::MachineSchedulerID = MachineScheduler::ID;
     197             : 
     198             : INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
     199             :                       "Machine Instruction Scheduler", false, false)
     200             : INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
     201             : INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
     202             : INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
     203             : INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
     204       31780 : INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
     205             :                     "Machine Instruction Scheduler", false, false)
     206       31780 : 
     207       31780 : MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
     208       31780 :   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
     209       31780 : }
     210      168905 : 
     211             : void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
     212             :   AU.setPreservesCFG();
     213       20198 :   AU.addRequiredID(MachineDominatorsID);
     214       20198 :   AU.addRequired<MachineLoopInfo>();
     215       20198 :   AU.addRequired<AAResultsWrapperPass>();
     216             :   AU.addRequired<TargetPassConfig>();
     217       20035 :   AU.addRequired<SlotIndexes>();
     218       20035 :   AU.addPreserved<SlotIndexes>();
     219       20035 :   AU.addRequired<LiveIntervals>();
     220             :   AU.addPreserved<LiveIntervals>();
     221             :   MachineFunctionPass::getAnalysisUsage(AU);
     222             : }
     223             : 
     224             : char PostMachineScheduler::ID = 0;
     225             : 
     226             : char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
     227       20035 : 
     228       20035 : INITIALIZE_PASS(PostMachineScheduler, "postmisched",
     229             :                 "PostRA Machine Instruction Scheduler", false, false)
     230             : 
     231             : PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
     232             :   initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
     233             : }
     234       88600 : 
     235             : void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
     236             :   AU.setPreservesCFG();
     237        3453 :   AU.addRequiredID(MachineDominatorsID);
     238        3453 :   AU.addRequired<MachineLoopInfo>();
     239        3453 :   AU.addRequired<TargetPassConfig>();
     240             :   MachineFunctionPass::getAnalysisUsage(AU);
     241        3424 : }
     242        3424 : 
     243        3424 : MachinePassRegistry MachineSchedRegistry::Registry;
     244             : 
     245             : /// A dummy default scheduler factory indicates whether the scheduler
     246        3424 : /// is overridden on the command line.
     247        3424 : static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
     248             :   return nullptr;
     249             : }
     250             : 
     251             : /// MachineSchedOpt allows command line selection of the scheduler.
     252             : static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
     253           0 :                RegisterPassParser<MachineSchedRegistry>>
     254           0 : MachineSchedOpt("misched",
     255             :                 cl::init(&useDefaultMachineSched), cl::Hidden,
     256             :                 cl::desc("Machine instruction scheduler to use"));
     257             : 
     258             : static MachineSchedRegistry
     259             : DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
     260             :                      useDefaultMachineSched);
     261             : 
     262             : static cl::opt<bool> EnableMachineSched(
     263             :     "enable-misched",
     264             :     cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
     265             :     cl::Hidden);
     266             : 
     267             : static cl::opt<bool> EnablePostRAMachineSched(
     268             :     "enable-post-misched",
     269             :     cl::desc("Enable the post-ra machine instruction scheduling pass."),
     270             :     cl::init(true), cl::Hidden);
     271             : 
     272             : /// Decrement this iterator until reaching the top or a non-debug instr.
     273             : static MachineBasicBlock::const_iterator
     274             : priorNonDebug(MachineBasicBlock::const_iterator I,
     275             :               MachineBasicBlock::const_iterator Beg) {
     276             :   assert(I != Beg && "reached the top of the region, cannot decrement");
     277             :   while (--I != Beg) {
     278             :     if (!I->isDebugInstr())
     279             :       break;
     280     2897911 :   }
     281             :   return I;
     282             : }
     283     2981912 : 
     284             : /// Non-const version.
     285             : static MachineBasicBlock::iterator
     286             : priorNonDebug(MachineBasicBlock::iterator I,
     287     2897911 :               MachineBasicBlock::const_iterator Beg) {
     288             :   return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)
     289             :       .getNonConstIterator();
     290             : }
     291             : 
     292             : /// If this iterator is a debug value, increment until reaching the End or a
     293             : /// non-debug instruction.
     294     2897911 : static MachineBasicBlock::const_iterator
     295             : nextIfDebug(MachineBasicBlock::const_iterator I,
     296             :             MachineBasicBlock::const_iterator End) {
     297             :   for(; I != End; ++I) {
     298             :     if (!I->isDebugInstr())
     299             :       break;
     300             :   }
     301     1903830 :   return I;
     302             : }
     303     2001338 : 
     304             : /// Non-const version.
     305             : static MachineBasicBlock::iterator
     306             : nextIfDebug(MachineBasicBlock::iterator I,
     307     1903830 :             MachineBasicBlock::const_iterator End) {
     308             :   return nextIfDebug(MachineBasicBlock::const_iterator(I), End)
     309             :       .getNonConstIterator();
     310             : }
     311             : 
     312             : /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
     313             : ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
     314     1053111 :   // Select the scheduler, or set the default.
     315             :   MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
     316             :   if (Ctor != useDefaultMachineSched)
     317             :     return Ctor(this);
     318             : 
     319      167111 :   // Get the default scheduler set by the target for this function.
     320             :   ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
     321             :   if (Scheduler)
     322      167111 :     return Scheduler;
     323          17 : 
     324             :   // Default to GenericScheduler.
     325             :   return createGenericSchedLive(this);
     326      167094 : }
     327      167094 : 
     328             : /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
     329             : /// the caller. We don't have a command line option to override the postRA
     330             : /// scheduler. The Target must configure it.
     331       16520 : ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
     332             :   // Get the postRA scheduler set by the target for this function.
     333             :   ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
     334             :   if (Scheduler)
     335             :     return Scheduler;
     336             : 
     337       22418 :   // Default to GenericScheduler.
     338             :   return createGenericSchedPostRA(this);
     339       22418 : }
     340       22418 : 
     341             : /// Top-level MachineScheduler pass driver.
     342             : ///
     343             : /// Visit blocks in function order. Divide each block into scheduling regions
     344        8920 : /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
     345             : /// consistent with the DAG builder, which traverses the interior of the
     346             : /// scheduling regions bottom-up.
     347             : ///
     348             : /// This design avoids exposing scheduling boundaries to the DAG builder,
     349             : /// simplifying the DAG builder's support for "special" target instructions.
     350             : /// At the same time the design allows target schedulers to operate across
     351             : /// scheduling boundaries, for example to bundle the boundary instructions
     352             : /// without reordering them. This creates complexity, because the target
     353             : /// scheduler must update the RegionBegin and RegionEnd positions cached by
     354             : /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
     355             : /// design would be to split blocks at scheduling boundaries, but LLVM has a
     356             : /// general bias against block splitting purely for implementation simplicity.
     357             : bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
     358             :   if (skipFunction(mf.getFunction()))
     359             :     return false;
     360             : 
     361             :   if (EnableMachineSched.getNumOccurrences()) {
     362             :     if (!EnableMachineSched)
     363      197898 :       return false;
     364      197898 :   } else if (!mf.getSubtarget().enableMachineScheduler())
     365             :     return false;
     366             : 
     367      197712 :   LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
     368         451 : 
     369             :   // Initialize the context of the pass.
     370      197261 :   MF = &mf;
     371             :   MLI = &getAnalysis<MachineLoopInfo>();
     372             :   MDT = &getAnalysis<MachineDominatorTree>();
     373             :   PassConfig = &getAnalysis<TargetPassConfig>();
     374             :   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
     375             : 
     376      167111 :   LIS = &getAnalysis<LiveIntervals>();
     377      167111 : 
     378      167111 :   if (VerifyScheduling) {
     379      167111 :     LLVM_DEBUG(LIS->dump());
     380      167111 :     MF->verify(this, "Before machine scheduling.");
     381             :   }
     382      167111 :   RegClassInfo->runOnMachineFunction(*MF);
     383             : 
     384      167111 :   // Instantiate the selected scheduler for this target, function, and
     385             :   // optimization level.
     386           1 :   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
     387             :   scheduleRegions(*Scheduler, false);
     388      167111 : 
     389             :   LLVM_DEBUG(LIS->dump());
     390             :   if (VerifyScheduling)
     391             :     MF->verify(this, "After machine scheduling.");
     392      167111 :   return true;
     393      167111 : }
     394             : 
     395             : bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
     396      167111 :   if (skipFunction(mf.getFunction()))
     397           1 :     return false;
     398             : 
     399             :   if (EnablePostRAMachineSched.getNumOccurrences()) {
     400             :     if (!EnablePostRAMachineSched)
     401       27273 :       return false;
     402       27273 :   } else if (!mf.getSubtarget().enablePostRAScheduler()) {
     403             :     LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
     404             :     return false;
     405       27266 :   }
     406          45 :   LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
     407             : 
     408       27221 :   // Initialize the context of the pass.
     409             :   MF = &mf;
     410             :   MLI = &getAnalysis<MachineLoopInfo>();
     411             :   PassConfig = &getAnalysis<TargetPassConfig>();
     412             : 
     413             :   if (VerifyScheduling)
     414             :     MF->verify(this, "Before post machine scheduling.");
     415       22418 : 
     416       22418 :   // Instantiate the selected scheduler for this target, function, and
     417       22418 :   // optimization level.
     418             :   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
     419       22418 :   scheduleRegions(*Scheduler, true);
     420           0 : 
     421             :   if (VerifyScheduling)
     422             :     MF->verify(this, "After post machine scheduling.");
     423             :   return true;
     424       22418 : }
     425       22418 : 
     426             : /// Return true of the given instruction should not be included in a scheduling
     427       22418 : /// region.
     428           0 : ///
     429             : /// MachineScheduler does not currently support scheduling across calls. To
     430             : /// handle calls, the DAG builder needs to be modified to create register
     431             : /// anti/output dependencies on the registers clobbered by the call's regmask
     432             : /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
     433             : /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
     434             : /// the boundary, but there would be no benefit to postRA scheduling across
     435             : /// calls this late anyway.
     436             : static bool isSchedBoundary(MachineBasicBlock::iterator MI,
     437             :                             MachineBasicBlock *MBB,
     438             :                             MachineFunction *MF,
     439             :                             const TargetInstrInfo *TII) {
     440             :   return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
     441             : }
     442     3923495 : 
     443             : /// A region of an MBB for scheduling.
     444             : namespace {
     445             : struct SchedRegion {
     446     3923495 :   /// RegionBegin is the first instruction in the scheduling region, and
     447             :   /// RegionEnd is either MBB->end() or the scheduling boundary after the
     448             :   /// last instruction in the scheduling region. These iterators cannot refer
     449             :   /// to instructions outside of the identified scheduling region because
     450             :   /// those may be reordered before scheduling this region.
     451             :   MachineBasicBlock::iterator RegionBegin;
     452             :   MachineBasicBlock::iterator RegionEnd;
     453             :   unsigned NumRegionInstrs;
     454             : 
     455             :   SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
     456             :               unsigned N) :
     457             :     RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
     458             : };
     459             : } // end anonymous namespace
     460             : 
     461             : using MBBRegionsVector = SmallVector<SchedRegion, 16>;
     462      999719 : 
     463      999719 : static void
     464             : getSchedRegions(MachineBasicBlock *MBB,
     465             :                 MBBRegionsVector &Regions,
     466             :                 bool RegionsTopDown) {
     467             :   MachineFunction *MF = MBB->getParent();
     468             :   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
     469             : 
     470      451711 :   MachineBasicBlock::iterator I = nullptr;
     471             :   for(MachineBasicBlock::iterator RegionEnd = MBB->end();
     472             :       RegionEnd != MBB->begin(); RegionEnd = I) {
     473      451711 : 
     474      451711 :     // Avoid decrementing RegionEnd for blocks with no terminator.
     475             :     if (RegionEnd != MBB->end() ||
     476             :         isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
     477      451711 :       --RegionEnd;
     478     1451430 :     }
     479             : 
     480             :     // The next region starts above the previous region. Look backward in the
     481     1448090 :     // instruction stream until we find the nearest boundary.
     482     1896461 :     unsigned NumRegionInstrs = 0;
     483             :     I = RegionEnd;
     484             :     for (;I != MBB->begin(); --I) {
     485             :       MachineInstr &MI = *std::prev(I);
     486             :       if (isSchedBoundary(&MI, &*MBB, MF, TII))
     487             :         break;
     488             :       if (!MI.isDebugInstr())
     489      999719 :         // MBB::size() uses instr_iterator to count. Here we need a bundle to
     490     3923495 :         // count as a single instruction.
     491     3475124 :         ++NumRegionInstrs;
     492     3475124 :     }
     493             : 
     494             :     Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
     495             :   }
     496             : 
     497     2800923 :   if (RegionsTopDown)
     498             :     std::reverse(Regions.begin(), Regions.end());
     499             : }
     500      999719 : 
     501             : /// Main driver for both MachineScheduler and PostMachineScheduler.
     502             : void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
     503      451711 :                                            bool FixKillFlags) {
     504             :   // Visit all machine basic blocks.
     505      451711 :   //
     506             :   // TODO: Visit blocks in global postorder or postorder within the bottom-up
     507             :   // loop tree. Then we can optionally compute global RegPressure.
     508      189529 :   for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
     509             :        MBB != MBBEnd; ++MBB) {
     510             : 
     511             :     Scheduler.startBlock(&*MBB);
     512             : 
     513             : #ifndef NDEBUG
     514      189529 :     if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
     515      641240 :       continue;
     516             :     if (SchedOnlyBlock.getNumOccurrences()
     517      451711 :         && (int)SchedOnlyBlock != MBB->getNumber())
     518             :       continue;
     519             : #endif
     520             : 
     521             :     // Break the block into scheduling regions [I, RegionEnd). RegionEnd
     522             :     // points to the scheduling boundary at the bottom of the region. The DAG
     523             :     // does not include RegionEnd, but the region does (i.e. the next
     524             :     // RegionEnd is above the previous RegionBegin). If the current block has
     525             :     // no terminator then RegionEnd == MBB->end() for the bottom region.
     526             :     //
     527             :     // All the regions of MBB are first found and stored in MBBRegions, which
     528             :     // will be processed (MBB) top-down if initialized with true.
     529             :     //
     530             :     // The Scheduler may insert instructions during either schedule() or
     531             :     // exitRegion(), even for empty regions. So the local iterators 'I' and
     532             :     // 'RegionEnd' are invalid across these calls. Instructions must not be
     533             :     // added to other regions than the current one without updating MBBRegions.
     534             : 
     535             :     MBBRegionsVector MBBRegions;
     536             :     getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
     537             :     for (MBBRegionsVector::iterator R = MBBRegions.begin();
     538             :          R != MBBRegions.end(); ++R) {
     539             :       MachineBasicBlock::iterator I = R->RegionBegin;
     540             :       MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
     541      451711 :       unsigned NumRegionInstrs = R->NumRegionInstrs;
     542      451711 : 
     543      999719 :       // Notify the scheduler of the region, even if we may skip scheduling
     544     1451430 :       // it. Perhaps it still needs to be bundled.
     545      999719 :       Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
     546      999719 : 
     547      999719 :       // Skip empty scheduling regions (0 or 1 schedulable instructions).
     548             :       if (I == RegionEnd || I == std::prev(RegionEnd)) {
     549             :         // Close the current region. Bundle the terminator if needed.
     550             :         // This invalidates 'RegionEnd' and 'I'.
     551      999719 :         Scheduler.exitRegion();
     552             :         continue;
     553             :       }
     554     1595743 :       LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
     555             :       LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
     556             :                         << " " << MBB->getName() << "\n  From: " << *I
     557      547993 :                         << "    To: ";
     558             :                  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
     559             :                  else dbgs() << "End";
     560             :                  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
     561             :       if (DumpCriticalPathLength) {
     562             :         errs() << MF->getName();
     563             :         errs() << ":%bb. " << MBB->getNumber();
     564             :         errs() << " " << MBB->getName() << " \n";
     565             :       }
     566             : 
     567      451726 :       // Schedule a region: possibly reorder instructions.
     568           0 :       // This invalidates the original region iterators.
     569           0 :       Scheduler.schedule();
     570           0 : 
     571             :       // Close the current region.
     572             :       Scheduler.exitRegion();
     573             :     }
     574             :     Scheduler.finishBlock();
     575      451726 :     // FIXME: Ideally, no further passes should rely on kill flags. However,
     576             :     // thumb2 size reduction is currently an exception, so the PostMIScheduler
     577             :     // needs to do this.
     578      451726 :     if (FixKillFlags)
     579             :       Scheduler.fixupKills(*MBB);
     580      451711 :   }
     581             :   Scheduler.finalizeSchedule();
     582             : }
     583             : 
     584      451711 : void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
     585       28811 :   // unimplemented
     586             : }
     587      189529 : 
     588      189529 : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
     589             : LLVM_DUMP_METHOD void ReadyQueue::dump() const {
     590           0 :   dbgs() << "Queue " << Name << ": ";
     591             :   for (const SUnit *SU : Queue)
     592           0 :     dbgs() << SU->NodeNum << " ";
     593             :   dbgs() << "\n";
     594             : }
     595             : #endif
     596             : 
     597             : //===----------------------------------------------------------------------===//
     598             : // ScheduleDAGMI - Basic machine instruction scheduling. This is
     599             : // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
     600             : // virtual registers.
     601             : // ===----------------------------------------------------------------------===/
     602             : 
     603             : // Provide a vtable anchor.
     604             : ScheduleDAGMI::~ScheduleDAGMI() = default;
     605             : 
     606             : bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
     607             :   return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
     608             : }
     609             : 
     610             : bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
     611             :   if (SuccSU != &ExitSU) {
     612       19971 :     // Do not use WillCreateCycle, it assumes SD scheduling.
     613       19971 :     // If Pred is reachable from Succ, then the edge creates a cycle.
     614             :     if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
     615             :       return false;
     616      146863 :     Topo.AddPred(SuccSU, PredDep.getSUnit());
     617      146863 :   }
     618             :   SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
     619             :   // Return true regardless of whether a new edge needed to be inserted.
     620      241948 :   return true;
     621             : }
     622      120105 : 
     623             : /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
     624      145994 : /// NumPredsLeft reaches zero, release the successor node.
     625             : ///
     626      145994 : /// FIXME: Adjust SuccSU height based on MinLatency.
     627             : void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
     628             :   SUnit *SuccSU = SuccEdge->getSUnit();
     629             : 
     630             :   if (SuccEdge->isWeak()) {
     631             :     --SuccSU->WeakPredsLeft;
     632             :     if (SuccEdge->isCluster())
     633      366227 :       NextClusterSucc = SuccSU;
     634             :     return;
     635             :   }
     636             : #ifndef NDEBUG
     637        6917 :   if (SuccSU->NumPredsLeft == 0) {
     638             :     dbgs() << "*** Scheduling failed! ***\n";
     639        6849 :     dumpNode(*SuccSU);
     640        6917 :     dbgs() << " has been released too many times!\n";
     641             :     llvm_unreachable(nullptr);
     642             :   }
     643             : #endif
     644             :   // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
     645             :   // CurrCycle may have advanced since then.
     646             :   if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
     647             :     SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
     648             : 
     649             :   --SuccSU->NumPredsLeft;
     650             :   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
     651             :     SchedImpl->releaseTopNode(SuccSU);
     652      359310 : }
     653      252269 : 
     654             : /// releaseSuccessors - Call releaseSucc on each of SU's successors.
     655      359310 : void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
     656      359310 :   for (SDep &Succ : SU->Succs)
     657      144753 :     releaseSucc(SU, &Succ);
     658             : }
     659             : 
     660             : /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
     661      647054 : /// NumSuccsLeft reaches zero, release the predecessor node.
     662     1013281 : ///
     663      366227 : /// FIXME: Adjust PredSU height based on MinLatency.
     664      647054 : void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
     665             :   SUnit *PredSU = PredEdge->getSUnit();
     666             : 
     667             :   if (PredEdge->isWeak()) {
     668             :     --PredSU->WeakSuccsLeft;
     669             :     if (PredEdge->isCluster())
     670     4191126 :       NextClusterPred = PredSU;
     671             :     return;
     672             :   }
     673             : #ifndef NDEBUG
     674       43345 :   if (PredSU->NumSuccsLeft == 0) {
     675             :     dbgs() << "*** Scheduling failed! ***\n";
     676       41093 :     dumpNode(*PredSU);
     677       43345 :     dbgs() << " has been released too many times!\n";
     678             :     llvm_unreachable(nullptr);
     679             :   }
     680             : #endif
     681             :   // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
     682             :   // CurrCycle may have advanced since then.
     683             :   if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
     684             :     PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
     685             : 
     686             :   --PredSU->NumSuccsLeft;
     687             :   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
     688             :     SchedImpl->releaseBottomNode(PredSU);
     689     4147781 : }
     690     2671933 : 
     691             : /// releasePredecessors - Call releasePred on each of SU's predecessors.
     692     4147781 : void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
     693     4147781 :   for (SDep &Pred : SU->Preds)
     694     2160004 :     releasePred(SU, &Pred);
     695             : }
     696             : 
     697             : void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
     698     2941566 :   ScheduleDAGInstrs::startBlock(bb);
     699     7132692 :   SchedImpl->enterMBB(bb);
     700     4191126 : }
     701     2941566 : 
     702             : void ScheduleDAGMI::finishBlock() {
     703      472445 :   SchedImpl->leaveMBB();
     704      472445 :   ScheduleDAGInstrs::finishBlock();
     705      472445 : }
     706      472445 : 
     707             : /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
     708      473137 : /// crossing a scheduling boundary. [begin, end) includes all instructions in
     709      473137 : /// the region, including the boundary itself and single-instruction regions
     710      473137 : /// that don't get scheduled.
     711      473137 : void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
     712             :                                      MachineBasicBlock::iterator begin,
     713             :                                      MachineBasicBlock::iterator end,
     714             :                                      unsigned regioninstrs)
     715             : {
     716             :   ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
     717     1021179 : 
     718             :   SchedImpl->initPolicy(begin, end, regioninstrs);
     719             : }
     720             : 
     721             : /// This is normally called from the main scheduler loop but may also be invoked
     722     1021179 : /// by the scheduling strategy to perform additional code motion.
     723             : void ScheduleDAGMI::moveInstruction(
     724     1021179 :   MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
     725     1021179 :   // Advance RegionBegin if the first instruction moves down.
     726             :   if (&*RegionBegin == MI)
     727             :     ++RegionBegin;
     728             : 
     729      215569 :   // Update the instruction stream.
     730             :   BB->splice(InsertPos, BB, MI);
     731             : 
     732      215569 :   // Update LiveIntervals
     733             :   if (LIS)
     734             :     LIS->handleMove(*MI, /*UpdateFlags=*/true);
     735             : 
     736      431138 :   // Recede RegionBegin if an instruction moves above the first.
     737             :   if (RegionBegin == InsertPos)
     738             :     RegionBegin = MI;
     739      215569 : }
     740      205042 : 
     741             : bool ScheduleDAGMI::checkSchedLimit() {
     742             : #ifndef NDEBUG
     743      215569 :   if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
     744        2216 :     CurrentTop = CurrentBottom;
     745      215569 :     return false;
     746             :   }
     747     2684496 :   ++NumInstrsScheduled;
     748             : #endif
     749             :   return true;
     750             : }
     751             : 
     752             : /// Per-region scheduling driver, called back from
     753             : /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
     754             : /// does not consider liveness or register pressure. It is useful for PostRA
     755     2684496 : /// scheduling and potentially other custom schedulers.
     756             : void ScheduleDAGMI::schedule() {
     757             :   LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
     758             :   LLVM_DEBUG(SchedImpl->dumpPolicy());
     759             : 
     760             :   // Build the DAG.
     761             :   buildSchedGraph(AA);
     762       19370 : 
     763             :   Topo.InitDAGTopologicalSorting();
     764             : 
     765             :   postprocessDAG();
     766             : 
     767       19370 :   SmallVector<SUnit*, 8> TopRoots, BotRoots;
     768             :   findRootsAndBiasEdges(TopRoots, BotRoots);
     769       19370 : 
     770             :   LLVM_DEBUG(dump());
     771       19370 :   if (PrintDAGs) dump();
     772             :   if (ViewMISchedDAGs) viewGraph();
     773             : 
     774       19370 :   // Initialize the strategy before modifying the DAG.
     775             :   // This may initialize a DFSResult to be used for queue priority.
     776             :   SchedImpl->initialize(this);
     777             : 
     778             :   // Initialize ready queues now that the DAG and priority data are finalized.
     779             :   initQueues(TopRoots, BotRoots);
     780             : 
     781             :   bool IsTopNode = false;
     782       19370 :   while (true) {
     783             :     LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
     784             :     SUnit *SU = SchedImpl->pickNode(IsTopNode);
     785       19370 :     if (!SU) break;
     786             : 
     787       19370 :     assert(!SU->isScheduled && "Node already scheduled");
     788             :     if (!checkSchedLimit())
     789             :       break;
     790      113495 : 
     791      113495 :     MachineInstr *MI = SU->getInstr();
     792             :     if (IsTopNode) {
     793             :       assert(SU->isTopReady() && "node still has unscheduled dependencies");
     794       94125 :       if (&*CurrentTop == MI)
     795             :         CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
     796             :       else
     797       94125 :         moveInstruction(MI, CurrentTop);
     798       94125 :     } else {
     799             :       assert(SU->isBottomReady() && "node still has unscheduled dependencies");
     800       94125 :       MachineBasicBlock::iterator priorII =
     801       83598 :         priorNonDebug(CurrentBottom, CurrentTop);
     802             :       if (&*priorII == MI)
     803       10527 :         CurrentBottom = priorII;
     804             :       else {
     805             :         if (&*CurrentTop == MI)
     806             :           CurrentTop = nextIfDebug(++CurrentTop, priorII);
     807           0 :         moveInstruction(MI, CurrentBottom);
     808           0 :         CurrentBottom = MI;
     809           0 :       }
     810             :     }
     811           0 :     // Notify the scheduling strategy before updating the DAG.
     812           0 :     // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
     813           0 :     // runs, it can then use the accurate ReadyCycle time to determine whether
     814           0 :     // newly released nodes can move to the readyQ.
     815             :     SchedImpl->schedNode(SU, IsTopNode);
     816             : 
     817             :     updateQueues(SU, IsTopNode);
     818             :   }
     819             :   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
     820             : 
     821       94125 :   placeDebugValues();
     822             : 
     823       94125 :   LLVM_DEBUG({
     824       94125 :     dbgs() << "*** Final schedule for "
     825             :            << printMBBReference(*begin()->getParent()) << " ***\n";
     826             :     dumpSchedule();
     827       19370 :     dbgs() << '\n';
     828             :   });
     829             : }
     830             : 
     831             : /// Apply each ScheduleDAGMutation step in order.
     832             : void ScheduleDAGMI::postprocessDAG() {
     833             :   for (auto &m : Mutations)
     834             :     m->apply(this);
     835       19370 : }
     836             : 
     837             : void ScheduleDAGMI::
     838      452060 : findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
     839     1359734 :                       SmallVectorImpl<SUnit*> &BotRoots) {
     840      907674 :   for (SUnit &SU : SUnits) {
     841      452060 :     assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
     842             : 
     843      452070 :     // Order predecessors so DFSResult follows the critical path.
     844             :     SU.biasCriticalPath();
     845             : 
     846     3139006 :     // A SUnit is ready to top schedule if it has no predecessors.
     847             :     if (!SU.NumPredsLeft)
     848             :       TopRoots.push_back(&SU);
     849             :     // A SUnit is ready to bottom schedule if it has no successors.
     850     2686936 :     if (!SU.NumSuccsLeft)
     851             :       BotRoots.push_back(&SU);
     852             :   }
     853     2686936 :   ExitSU.biasCriticalPath();
     854     1182130 : }
     855             : 
     856     2686936 : /// Identify DAG roots and setup scheduler queues.
     857      429784 : void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
     858             :                                ArrayRef<SUnit*> BotRoots) {
     859      452070 :   NextClusterSucc = nullptr;
     860      452070 :   NextClusterPred = nullptr;
     861             : 
     862             :   // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
     863      452062 :   //
     864             :   // Nodes with unreleased weak edges can still be roots.
     865      452062 :   // Release top roots in forward order.
     866      452062 :   for (SUnit *SU : TopRoots)
     867             :     SchedImpl->releaseTopNode(SU);
     868             : 
     869             :   // Release bottom roots in reverse order so the higher priority nodes appear
     870             :   // first. This is more natural and slightly more efficient.
     871             :   for (SmallVectorImpl<SUnit*>::const_reverse_iterator
     872     1634172 :          I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
     873     1182110 :     SchedImpl->releaseBottomNode(*I);
     874             :   }
     875             : 
     876             :   releaseSuccessors(&EntrySU);
     877             :   releasePredecessors(&ExitSU);
     878      881838 : 
     879      429776 :   SchedImpl->registerRoots();
     880             : 
     881             :   // Advance past initial DebugValues.
     882      452062 :   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
     883      452062 :   CurrentBottom = RegionEnd;
     884             : }
     885      452062 : 
     886             : /// Update scheduler queues after scheduling an instruction.
     887             : void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
     888      452062 :   // Release dependent instructions for scheduling.
     889      452062 :   if (IsTopNode)
     890      452062 :     releaseSuccessors(SU);
     891             :   else
     892             :     releasePredecessors(SU);
     893     2684496 : 
     894             :   SU->isScheduled = true;
     895     2684496 : }
     896      194992 : 
     897             : /// Reinsert any remaining debug_values, just like the PostRA scheduler.
     898     2489504 : void ScheduleDAGMI::placeDebugValues() {
     899             :   // If first instruction was a DBG_VALUE then put it back.
     900     2684496 :   if (FirstDbgValue) {
     901     2684496 :     BB->splice(RegionBegin, BB, FirstDbgValue);
     902             :     RegionBegin = FirstDbgValue;
     903             :   }
     904      452081 : 
     905             :   for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
     906      452081 :          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
     907       22164 :     std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
     908       11082 :     MachineInstr *DbgValue = P.first;
     909             :     MachineBasicBlock::iterator OrigPrevMI = P.second;
     910             :     if (&*RegionBegin == DbgValue)
     911             :       ++RegionBegin;
     912      563427 :     BB->splice(++OrigPrevMI, BB, DbgValue);
     913      111346 :     if (OrigPrevMI == std::prev(RegionEnd))
     914             :       RegionEnd = DbgValue;
     915             :   }
     916      111346 :   DbgValues.clear();
     917             :   FirstDbgValue = nullptr;
     918      222692 : }
     919      111346 : 
     920        4778 : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
     921             : LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
     922             :   for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
     923      452081 :     if (SUnit *SU = getSUnit(&(*MI)))
     924      452081 :       dumpNode(*SU);
     925             :     else
     926             :       dbgs() << "Missing SUnit\n";
     927             :   }
     928             : }
     929             : #endif
     930             : 
     931             : //===----------------------------------------------------------------------===//
     932             : // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
     933             : // preservation.
     934             : //===----------------------------------------------------------------------===//
     935             : 
     936             : ScheduleDAGMILive::~ScheduleDAGMILive() {
     937             :   delete DFSResult;
     938             : }
     939             : 
     940             : void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
     941             :   const MachineInstr &MI = *SU.getInstr();
     942      812752 :   for (const MachineOperand &MO : MI.operands()) {
     943      167111 :     if (!MO.isReg())
     944      311419 :       continue;
     945      144308 :     if (!MO.readsReg())
     946             :       continue;
     947      144308 :     if (TrackLaneMasks && !MO.isUse())
     948      668444 :       continue;
     949      167111 : 
     950      167111 :     unsigned Reg = MO.getReg();
     951             :     if (!TargetRegisterInfo::isVirtualRegister(Reg))
     952     1536618 :       continue;
     953     1536618 : 
     954     9002609 :     // Ignore re-defs.
     955     7465991 :     if (TrackLaneMasks) {
     956             :       bool FoundDef = false;
     957             :       for (const MachineOperand &MO2 : MI.operands()) {
     958             :         if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
     959     3546597 :           FoundDef = true;
     960             :           break;
     961             :         }
     962     3425453 :       }
     963     3425453 :       if (FoundDef)
     964             :         continue;
     965             :     }
     966             : 
     967     1609173 :     // Record this local VReg use.
     968             :     VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
     969     2155029 :     for (; UI != VRegUses.end(); ++UI) {
     970     1806181 :       if (UI->SU == &SU)
     971             :         break;
     972             :     }
     973             :     if (UI == VRegUses.end())
     974             :       VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
     975      354591 :   }
     976             : }
     977             : 
     978             : /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
     979             : /// crossing a scheduling boundary. [begin, end) includes all instructions in
     980     1603430 : /// the region, including the boundary itself and single-instruction regions
     981             : /// that don't get scheduled.
     982    14779093 : void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
     983             :                                 MachineBasicBlock::iterator begin,
     984             :                                 MachineBasicBlock::iterator end,
     985             :                                 unsigned regioninstrs)
     986     1581057 : {
     987             :   // ScheduleDAGMI initializes SchedImpl's per-region policy.
     988     1536618 :   ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
     989             : 
     990             :   // For convenience remember the end of the liveness region.
     991             :   LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
     992             : 
     993             :   SUPressureDiffs.clear();
     994      979712 : 
     995             :   ShouldTrackPressure = SchedImpl->shouldTrackPressure();
     996             :   ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
     997             : 
     998             :   assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
     999             :          "ShouldTrackLaneMasks requires ShouldTrackPressure");
    1000      979712 : }
    1001             : 
    1002             : // Setup the register pressure trackers for the top scheduled top and bottom
    1003     1919806 : // scheduled regions.
    1004             : void ScheduleDAGMILive::initRegPressure() {
    1005             :   VRegUses.clear();
    1006             :   VRegUses.setUniverse(MRI.getNumVirtRegs());
    1007      979712 :   for (SUnit &SU : SUnits)
    1008      979712 :     collectVRegUses(SU);
    1009             : 
    1010             :   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
    1011             :                     ShouldTrackLaneMasks, false);
    1012      979712 :   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
    1013             :                     ShouldTrackLaneMasks, false);
    1014             : 
    1015             :   // Close the RPTracker to finalize live ins.
    1016      124424 :   RPTracker.closeRegion();
    1017             : 
    1018      248848 :   LLVM_DEBUG(RPTracker.dump());
    1019     1661042 : 
    1020     1536618 :   // Initialize the live ins and live outs.
    1021             :   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
    1022      124424 :   BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
    1023      124424 : 
    1024      124424 :   // Close one end of the tracker so we can call
    1025      124424 :   // getMaxUpward/DownwardPressureDelta before advancing across any
    1026             :   // instructions. This converts currently live regs into live ins/outs.
    1027             :   TopRPTracker.closeTop();
    1028      124424 :   BotRPTracker.closeBottom();
    1029             : 
    1030             :   BotRPTracker.initLiveThru(RPTracker);
    1031             :   if (!BotRPTracker.getLiveThru().empty()) {
    1032             :     TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
    1033      248848 :     LLVM_DEBUG(dbgs() << "Live Thru: ";
    1034      248848 :                dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
    1035             :   };
    1036             : 
    1037             :   // For each live out vreg reduce the pressure change associated with other
    1038             :   // uses of the same vreg below the live-out reaching def.
    1039      124424 :   updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
    1040      124424 : 
    1041             :   // Account for liveness generated by the region boundary.
    1042      124424 :   if (LiveRegionEnd != RegionEnd) {
    1043      124424 :     SmallVector<RegisterMaskPair, 8> LiveUses;
    1044             :     BotRPTracker.recede(&LiveUses);
    1045             :     updatePressureDiffs(LiveUses);
    1046             :   }
    1047             : 
    1048             :   LLVM_DEBUG(dbgs() << "Top Pressure:\n";
    1049             :              dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
    1050             :              dbgs() << "Bottom Pressure:\n";
    1051      248848 :              dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
    1052             : 
    1053             :   assert((BotRPTracker.getPos() == RegionEnd ||
    1054      124424 :           (RegionEnd->isDebugInstr() &&
    1055             :            BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
    1056      113476 :          "Can't find the region bottom");
    1057      113476 : 
    1058             :   // Cache the list of excess pressure sets in this region. This will also track
    1059             :   // the max pressure in the scheduled code for these sets.
    1060             :   RegionCriticalPSets.clear();
    1061             :   const std::vector<unsigned> &RegionPressure =
    1062             :     RPTracker.getPressure().MaxSetPressure;
    1063             :   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
    1064             :     unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
    1065             :     if (RegionPressure[i] > Limit) {
    1066             :       LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
    1067             :                         << " Actual " << RegionPressure[i] << "\n");
    1068             :       RegionCriticalPSets.push_back(PressureChange(i));
    1069             :     }
    1070             :   }
    1071             :   LLVM_DEBUG(dbgs() << "Excess PSets: ";
    1072      124424 :              for (const PressureChange &RCPS
    1073             :                   : RegionCriticalPSets) dbgs()
    1074      124424 :              << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
    1075     3811047 :              dbgs() << "\n");
    1076     3562199 : }
    1077     7124398 : 
    1078             : void ScheduleDAGMILive::
    1079             : updateScheduledPressure(const SUnit *SU,
    1080        4868 :                         const std::vector<unsigned> &NewMaxPressure) {
    1081             :   const PressureDiff &PDiff = getPressureDiff(SU);
    1082             :   unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
    1083             :   for (const PressureChange &PC : PDiff) {
    1084             :     if (!PC.isValid())
    1085             :       break;
    1086             :     unsigned ID = PC.getPSet();
    1087             :     while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
    1088      124424 :       ++CritIdx;
    1089             :     if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
    1090     1536618 :       if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
    1091             :           && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
    1092             :         RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
    1093     1536618 :     }
    1094     3073236 :     unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
    1095     4437033 :     if (NewMaxPressure[ID] >= Limit - 2) {
    1096     4378277 :       LLVM_DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
    1097             :                         << NewMaxPressure[ID]
    1098             :                         << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
    1099     2952903 :                         << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
    1100       52488 :                         << " livethru)\n");
    1101     2900415 :     }
    1102      279626 :   }
    1103      139813 : }
    1104             : 
    1105             : /// Update the PressureDiff array for liveness after scheduling this
    1106     2900415 : /// instruction.
    1107             : void ScheduleDAGMILive::updatePressureDiffs(
    1108             :     ArrayRef<RegisterMaskPair> LiveUses) {
    1109             :   for (const RegisterMaskPair &P : LiveUses) {
    1110             :     unsigned Reg = P.RegUnit;
    1111             :     /// FIXME: Currently assuming single-use physregs.
    1112             :     if (!TRI->isVirtualRegister(Reg))
    1113             :       continue;
    1114             : 
    1115     1536618 :     if (ShouldTrackLaneMasks) {
    1116             :       // If the register has just become live then other uses won't change
    1117             :       // this fact anymore => decrement pressure.
    1118             :       // If the register has just become dead then other uses make it come
    1119     1687613 :       // back to life => increment pressure.
    1120             :       bool Decrement = P.LaneMask.any();
    1121     3192715 : 
    1122     1505102 :       for (const VReg2SUnit &V2SU
    1123             :            : make_range(VRegUses.find(Reg), VRegUses.end())) {
    1124     1505102 :         SUnit &SU = *V2SU.SU;
    1125             :         if (SU.isScheduled || &SU == &ExitSU)
    1126             :           continue;
    1127     1213583 : 
    1128             :         PressureDiff &PDiff = getPressureDiff(&SU);
    1129             :         PDiff.addPressureChange(Reg, Decrement, &MRI);
    1130             :         LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU.NodeNum << ") "
    1131             :                           << printReg(Reg, TRI) << ':'
    1132      362864 :                           << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
    1133             :                    dbgs() << "              to "; PDiff.dump(*TRI););
    1134             :       }
    1135      362864 :     } else {
    1136      560019 :       assert(P.LaneMask.any());
    1137      560019 :       LLVM_DEBUG(dbgs() << "  LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
    1138             :       // This may be called before CurrentBottom has been initialized. However,
    1139             :       // BotRPTracker must have a valid position. We want the value live into the
    1140      318726 :       // instruction or live out of the block, so ask for the previous
    1141      318726 :       // instruction's live-out.
    1142             :       const LiveInterval &LI = LIS->getInterval(Reg);
    1143             :       VNInfo *VNI;
    1144             :       MachineBasicBlock::const_iterator I =
    1145             :         nextIfDebug(BotRPTracker.getPos(), BB->end());
    1146             :       if (I == BB->end())
    1147             :         VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
    1148             :       else {
    1149             :         LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
    1150             :         VNI = LRQ.valueIn();
    1151             :       }
    1152             :       // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
    1153             :       assert(VNI && "No live value at use.");
    1154      850719 :       for (const VReg2SUnit &V2SU
    1155             :            : make_range(VRegUses.find(Reg), VRegUses.end())) {
    1156             :         SUnit *SU = V2SU.SU;
    1157     1701438 :         // If this use comes before the reaching def, it cannot be a last use,
    1158     1701438 :         // so decrease its pressure change.
    1159      156904 :         if (!SU->isScheduled && SU != &ExitSU) {
    1160             :           LiveQueryResult LRQ =
    1161      772267 :               LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
    1162      772267 :           if (LRQ.valueIn() == VNI) {
    1163             :             PressureDiff &PDiff = getPressureDiff(SU);
    1164             :             PDiff.addPressureChange(Reg, true, &MRI);
    1165             :             LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
    1166             :                               << *SU->getInstr();
    1167      850719 :                        dbgs() << "              to "; PDiff.dump(*TRI););
    1168     1858852 :           }
    1169             :         }
    1170             :       }
    1171     1858852 :     }
    1172             :   }
    1173     1558529 : }
    1174     1558529 : 
    1175     1231535 : void ScheduleDAGMILive::dump() const {
    1176     1231535 : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    1177             :   if (EntrySU.getInstr() != nullptr)
    1178             :     dumpNodeAll(EntrySU);
    1179             :   for (const SUnit &SU : SUnits) {
    1180             :     dumpNodeAll(SU);
    1181             :     if (ShouldTrackPressure) {
    1182             :       dbgs() << "  Pressure Diff      : ";
    1183             :       getPressureDiff(&SU).dump(*TRI);
    1184             :     }
    1185     1687613 :     dbgs() << "  Single Issue       : ";
    1186             :     if (SchedModel.mustBeginGroup(SU.getInstr()) &&
    1187           0 :         SchedModel.mustEndGroup(SU.getInstr()))
    1188             :       dbgs() << "true;";
    1189             :     else
    1190             :       dbgs() << "false;";
    1191             :     dbgs() << '\n';
    1192             :   }
    1193             :   if (ExitSU.getInstr() != nullptr)
    1194             :     dumpNodeAll(ExitSU);
    1195             : #endif
    1196             : }
    1197             : 
    1198             : /// schedule - Called back from MachineScheduler::runOnMachineFunction
    1199             : /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
    1200             : /// only includes instructions that have DAG nodes, not scheduling boundaries.
    1201             : ///
    1202             : /// This is a skeletal driver, with all the functionality pushed into helpers,
    1203             : /// so that it can be easily extended by experimental schedulers. Generally,
    1204             : /// implementing MachineSchedStrategy should be sufficient to implement a new
    1205             : /// scheduling algorithm. However, if a scheduler further subclasses
    1206             : /// ScheduleDAGMILive then it will want to override this virtual method in order
    1207             : /// to update any specialized state.
    1208           0 : void ScheduleDAGMILive::schedule() {
    1209             :   LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
    1210             :   LLVM_DEBUG(SchedImpl->dumpPolicy());
    1211             :   buildDAGWithRegPressure();
    1212             : 
    1213             :   Topo.InitDAGTopologicalSorting();
    1214             : 
    1215             :   postprocessDAG();
    1216             : 
    1217             :   SmallVector<SUnit*, 8> TopRoots, BotRoots;
    1218             :   findRootsAndBiasEdges(TopRoots, BotRoots);
    1219             : 
    1220      427711 :   // Initialize the strategy before modifying the DAG.
    1221             :   // This may initialize a DFSResult to be used for queue priority.
    1222             :   SchedImpl->initialize(this);
    1223      427711 : 
    1224             :   LLVM_DEBUG(dump());
    1225      427711 :   if (PrintDAGs) dump();
    1226             :   if (ViewMISchedDAGs) viewGraph();
    1227      427711 : 
    1228             :   // Initialize ready queues now that the DAG and priority data are finalized.
    1229             :   initQueues(TopRoots, BotRoots);
    1230      427711 : 
    1231             :   bool IsTopNode = false;
    1232             :   while (true) {
    1233             :     LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
    1234      427711 :     SUnit *SU = SchedImpl->pickNode(IsTopNode);
    1235             :     if (!SU) break;
    1236             : 
    1237             :     assert(!SU->isScheduled && "Node already scheduled");
    1238             :     if (!checkSchedLimit())
    1239             :       break;
    1240             : 
    1241      427711 :     scheduleMI(SU, IsTopNode);
    1242             : 
    1243      427711 :     if (DFSResult) {
    1244             :       unsigned SubtreeID = DFSResult->getSubtreeID(SU);
    1245             :       if (!ScheduledTrees.test(SubtreeID)) {
    1246     2988665 :         ScheduledTrees.set(SubtreeID);
    1247     2988665 :         DFSResult->scheduleTree(SubtreeID);
    1248             :         SchedImpl->scheduleTree(SubtreeID);
    1249             :       }
    1250     2560954 :     }
    1251             : 
    1252             :     // Notify the scheduling strategy after updating the DAG.
    1253     2560954 :     SchedImpl->schedNode(SU, IsTopNode);
    1254             : 
    1255     2560954 :     updateQueues(SU, IsTopNode);
    1256             :   }
    1257         176 :   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
    1258             : 
    1259          38 :   placeDebugValues();
    1260          38 : 
    1261             :   LLVM_DEBUG({
    1262             :     dbgs() << "*** Final schedule for "
    1263             :            << printMBBReference(*begin()->getParent()) << " ***\n";
    1264             :     dumpSchedule();
    1265     2560954 :     dbgs() << '\n';
    1266             :   });
    1267     2560954 : }
    1268     2560954 : 
    1269             : /// Build the DAG and setup three register pressure trackers.
    1270             : void ScheduleDAGMILive::buildDAGWithRegPressure() {
    1271      427711 :   if (!ShouldTrackPressure) {
    1272             :     RPTracker.reset();
    1273             :     RegionCriticalPSets.clear();
    1274             :     buildSchedGraph(AA);
    1275             :     return;
    1276             :   }
    1277             : 
    1278             :   // Initialize the register pressure tracker used by buildSchedGraph.
    1279      427711 :   RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
    1280             :                  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
    1281             : 
    1282      432692 :   // Account for liveness generate by the region boundary.
    1283      432692 :   if (LiveRegionEnd != RegionEnd)
    1284      308268 :     RPTracker.recede();
    1285             : 
    1286      308268 :   // Build the DAG, and compute current register pressure.
    1287      308268 :   buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
    1288             : 
    1289             :   // Initialize top/bottom trackers after computing region pressure.
    1290             :   initRegPressure();
    1291      124424 : }
    1292      124424 : 
    1293             : void ScheduleDAGMILive::computeDFSResult() {
    1294             :   if (!DFSResult)
    1295      124424 :     DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
    1296      113476 :   DFSResult->clear();
    1297             :   ScheduledTrees.clear();
    1298             :   DFSResult->resize(SUnits.size());
    1299      124424 :   DFSResult->compute(SUnits);
    1300             :   ScheduledTrees.resize(DFSResult->getNumSubtrees());
    1301             : }
    1302      124424 : 
    1303             : /// Compute the max cyclic critical path through the DAG. The scheduling DAG
    1304             : /// only provides the critical path for single block loops. To handle loops that
    1305          10 : /// span blocks, we could use the vreg path latencies provided by
    1306          10 : /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
    1307           8 : /// available for use in the scheduler.
    1308          10 : ///
    1309             : /// The cyclic path estimation identifies a def-use pair that crosses the back
    1310          10 : /// edge and considers the depth and height of the nodes. For example, consider
    1311          20 : /// the following instruction sequence where each instruction has unit latency
    1312          20 : /// and defines an epomymous virtual register:
    1313          10 : ///
    1314             : /// a->b(a,c)->c(b)->d(c)->exit
    1315             : ///
    1316             : /// The cyclic critical path is a two cycles: b->c->b
    1317             : /// The acyclic critical path is four cycles: a->b->c->d->exit
    1318             : /// LiveOutHeight = height(c) = len(c->d->exit) = 2
    1319             : /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
    1320             : /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
    1321             : /// LiveInDepth = depth(b) = len(a->b) = 1
    1322             : ///
    1323             : /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
    1324             : /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
    1325             : /// CyclicCriticalPath = min(2, 2) = 2
    1326             : ///
    1327             : /// This could be relevant to PostRA scheduling, but is currently implemented
    1328             : /// assuming LiveIntervals.
    1329             : unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
    1330             :   // This only applies to single block loop.
    1331             :   if (!BB->isSuccessor(BB))
    1332             :     return 0;
    1333             : 
    1334             :   unsigned MaxCyclicLatency = 0;
    1335             :   // Visit each live out vreg def to find def/use pairs that cross iterations.
    1336             :   for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
    1337             :     unsigned Reg = P.RegUnit;
    1338             :     if (!TRI->isVirtualRegister(Reg))
    1339             :         continue;
    1340             :     const LiveInterval &LI = LIS->getInterval(Reg);
    1341      389637 :     const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
    1342             :     if (!DefVNI)
    1343      389637 :       continue;
    1344             : 
    1345             :     MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
    1346             :     const SUnit *DefSU = getSUnit(DefMI);
    1347             :     if (!DefSU)
    1348        6938 :       continue;
    1349        4133 : 
    1350        4133 :     unsigned LiveOutHeight = DefSU->getHeight();
    1351             :     unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
    1352        4127 :     // Visit all local users of the vreg def.
    1353        8254 :     for (const VReg2SUnit &V2SU
    1354        4127 :          : make_range(VRegUses.find(Reg), VRegUses.end())) {
    1355             :       SUnit *SU = V2SU.SU;
    1356             :       if (SU == &ExitSU)
    1357             :         continue;
    1358             : 
    1359        1694 :       // Only consider uses of the phi.
    1360        2290 :       LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
    1361             :       if (!LRQ.valueIn()->isPHIDef())
    1362             :         continue;
    1363        1694 : 
    1364             :       // Assume that a path spanning two iterations is a cycle, which could
    1365             :       // overestimate in strange cases. This allows cyclic latency to be
    1366        1694 :       // estimated as the minimum slack of the vreg's depth or height.
    1367        6417 :       unsigned CyclicLatency = 0;
    1368        6417 :       if (LiveOutDepth > SU->getDepth())
    1369         687 :         CyclicLatency = LiveOutDepth - SU->getDepth();
    1370             : 
    1371             :       unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
    1372        6417 :       if (LiveInHeight > LiveOutHeight) {
    1373       12834 :         if (LiveInHeight - LiveOutHeight < CyclicLatency)
    1374             :           CyclicLatency = LiveInHeight - LiveOutHeight;
    1375             :       } else
    1376             :         CyclicLatency = 0;
    1377             : 
    1378             :       LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
    1379             :                         << SU->NodeNum << ") = " << CyclicLatency << "c\n");
    1380        5730 :       if (CyclicLatency > MaxCyclicLatency)
    1381        5717 :         MaxCyclicLatency = CyclicLatency;
    1382             :     }
    1383        5730 :   }
    1384        5730 :   LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
    1385        5719 :   return MaxCyclicLatency;
    1386             : }
    1387             : 
    1388             : /// Release ExitSU predecessors and setup scheduler queues. Re-position
    1389             : /// the Top RP tracker in case the region beginning has changed.
    1390             : void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
    1391             :                                    ArrayRef<SUnit*> BotRoots) {
    1392        5730 :   ScheduleDAGMI::initQueues(TopRoots, BotRoots);
    1393             :   if (ShouldTrackPressure) {
    1394             :     assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
    1395             :     TopRPTracker.setPos(CurrentTop);
    1396             :   }
    1397             : }
    1398             : 
    1399             : /// Move an instruction and update register pressure.
    1400             : void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
    1401             :   // Move the instruction to its new location in the instruction stream.
    1402      432692 :   MachineInstr *MI = SU->getInstr();
    1403             : 
    1404      432692 :   if (IsTopNode) {
    1405      432692 :     assert(SU->isTopReady() && "node still has unscheduled dependencies");
    1406             :     if (&*CurrentTop == MI)
    1407             :       CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
    1408             :     else {
    1409      432692 :       moveInstruction(MI, CurrentTop);
    1410             :       TopRPTracker.setPos(MI);
    1411             :     }
    1412     2590387 : 
    1413             :     if (ShouldTrackPressure) {
    1414     2590387 :       // Update top scheduled pressure.
    1415             :       RegisterOperands RegOpers;
    1416     2590387 :       RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
    1417             :       if (ShouldTrackLaneMasks) {
    1418      100883 :         // Adjust liveness and add missing dead+read-undef flags.
    1419       85524 :         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
    1420             :         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
    1421       15359 :       } else {
    1422             :         // Adjust for missing dead-def flags.
    1423             :         RegOpers.detectDeadDefs(*MI, *LIS);
    1424             :       }
    1425      100883 : 
    1426             :       TopRPTracker.advance(RegOpers);
    1427       86905 :       assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
    1428       86905 :       LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
    1429       86905 :                      TopRPTracker.getRegSetPressureAtPos(), TRI););
    1430             : 
    1431       69909 :       updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
    1432       69909 :     }
    1433             :   } else {
    1434             :     assert(SU->isBottomReady() && "node still has unscheduled dependencies");
    1435       16996 :     MachineBasicBlock::iterator priorII =
    1436             :       priorNonDebug(CurrentBottom, CurrentTop);
    1437             :     if (&*priorII == MI)
    1438       86905 :       CurrentBottom = priorII;
    1439             :     else {
    1440             :       if (&*CurrentTop == MI) {
    1441             :         CurrentTop = nextIfDebug(++CurrentTop, priorII);
    1442             :         TopRPTracker.setPos(CurrentTop);
    1443       86905 :       }
    1444             :       moveInstruction(MI, CurrentBottom);
    1445             :       CurrentBottom = MI;
    1446             :       BotRPTracker.setPos(CurrentBottom);
    1447             :     }
    1448     2489504 :     if (ShouldTrackPressure) {
    1449     2489504 :       RegisterOperands RegOpers;
    1450     2304867 :       RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
    1451             :       if (ShouldTrackLaneMasks) {
    1452      184637 :         // Adjust liveness and add missing dead+read-undef flags.
    1453       23041 :         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
    1454             :         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
    1455             :       } else {
    1456      184637 :         // Adjust for missing dead-def flags.
    1457      184637 :         RegOpers.detectDeadDefs(*MI, *LIS);
    1458             :       }
    1459             : 
    1460     2489504 :       if (BotRPTracker.getPos() != CurrentBottom)
    1461     1449713 :         BotRPTracker.recedeSkipDebugValues();
    1462     1449713 :       SmallVector<RegisterMaskPair, 8> LiveUses;
    1463     1449713 :       BotRPTracker.recede(RegOpers, &LiveUses);
    1464             :       assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
    1465      271527 :       LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
    1466      271527 :                      BotRPTracker.getRegSetPressureAtPos(), TRI););
    1467             : 
    1468             :       updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
    1469     1178186 :       updatePressureDiffs(LiveUses);
    1470             :     }
    1471             :   }
    1472     1449713 : }
    1473     1294584 : 
    1474             : //===----------------------------------------------------------------------===//
    1475     1449713 : // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
    1476             : //===----------------------------------------------------------------------===//
    1477             : 
    1478             : namespace {
    1479             : 
    1480     1449713 : /// Post-process the DAG to create cluster edges between neighboring
    1481     1449713 : /// loads or between neighboring stores.
    1482             : class BaseMemOpClusterMutation : public ScheduleDAGMutation {
    1483             :   struct MemOpInfo {
    1484     2590387 :     SUnit *SU;
    1485             :     unsigned BaseReg;
    1486             :     int64_t Offset;
    1487             : 
    1488             :     MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
    1489             :         : SU(su), BaseReg(reg), Offset(ofs) {}
    1490             : 
    1491             :     bool operator<(const MemOpInfo&RHS) const {
    1492             :       return std::tie(BaseReg, Offset, SU->NodeNum) <
    1493             :              std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum);
    1494             :     }
    1495             :   };
    1496             : 
    1497             :   const TargetInstrInfo *TII;
    1498             :   const TargetRegisterInfo *TRI;
    1499             :   bool IsLoad;
    1500             : 
    1501           0 : public:
    1502             :   BaseMemOpClusterMutation(const TargetInstrInfo *tii,
    1503             :                            const TargetRegisterInfo *tri, bool IsLoad)
    1504           0 :       : TII(tii), TRI(tri), IsLoad(IsLoad) {}
    1505           0 : 
    1506             :   void apply(ScheduleDAGInstrs *DAGInstrs) override;
    1507             : 
    1508             : protected:
    1509             :   void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
    1510             : };
    1511             : 
    1512             : class StoreClusterMutation : public BaseMemOpClusterMutation {
    1513             : public:
    1514             :   StoreClusterMutation(const TargetInstrInfo *tii,
    1515             :                        const TargetRegisterInfo *tri)
    1516       66526 :       : BaseMemOpClusterMutation(tii, tri, false) {}
    1517             : };
    1518             : 
    1519             : class LoadClusterMutation : public BaseMemOpClusterMutation {
    1520             : public:
    1521             :   LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
    1522             :       : BaseMemOpClusterMutation(tii, tri, true) {}
    1523             : };
    1524           0 : 
    1525             : } // end anonymous namespace
    1526             : 
    1527             : namespace llvm {
    1528       33263 : 
    1529             : std::unique_ptr<ScheduleDAGMutation>
    1530             : createLoadClusterDAGMutation(const TargetInstrInfo *TII,
    1531           0 :                              const TargetRegisterInfo *TRI) {
    1532             :   return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
    1533             :                             : nullptr;
    1534       33263 : }
    1535             : 
    1536             : std::unique_ptr<ScheduleDAGMutation>
    1537             : createStoreClusterDAGMutation(const TargetInstrInfo *TII,
    1538             :                               const TargetRegisterInfo *TRI) {
    1539             :   return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
    1540             :                             : nullptr;
    1541             : }
    1542       33263 : 
    1543             : } // end namespace llvm
    1544       33263 : 
    1545       33263 : void BaseMemOpClusterMutation::clusterNeighboringMemOps(
    1546             :     ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
    1547             :   SmallVector<MemOpInfo, 32> MemOpRecords;
    1548             :   for (SUnit *SU : MemOps) {
    1549       33263 :     unsigned BaseReg;
    1550             :     int64_t Offset;
    1551       33263 :     if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
    1552       33263 :       MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
    1553             :   }
    1554             :   if (MemOpRecords.size() < 2)
    1555             :     return;
    1556             : 
    1557           0 :   llvm::sort(MemOpRecords);
    1558             :   unsigned ClusterLength = 1;
    1559           0 :   for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
    1560           0 :     SUnit *SUa = MemOpRecords[Idx].SU;
    1561             :     SUnit *SUb = MemOpRecords[Idx+1].SU;
    1562             :     if (TII->shouldClusterMemOps(*SUa->getInstr(), MemOpRecords[Idx].BaseReg,
    1563           0 :                                  *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg,
    1564           0 :                                  ClusterLength) &&
    1565             :         DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
    1566           0 :       LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
    1567           0 :                         << SUb->NodeNum << ")\n");
    1568             :       // Copy successor edges from SUa to SUb. Interleaving computation
    1569           0 :       // dependent on SUa can prevent load combining due to register reuse.
    1570             :       // Predecessor edges do not need to be copied from SUb to SUa since nearby
    1571           0 :       // loads should have effectively the same inputs.
    1572           0 :       for (const SDep &Succ : SUa->Succs) {
    1573           0 :         if (Succ.getSUnit() == SUb)
    1574           0 :           continue;
    1575           0 :         LLVM_DEBUG(dbgs() << "  Copy Succ SU(" << Succ.getSUnit()->NodeNum
    1576           0 :                           << ")\n");
    1577           0 :         DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
    1578             :       }
    1579             :       ++ClusterLength;
    1580             :     } else
    1581             :       ClusterLength = 1;
    1582             :   }
    1583             : }
    1584           0 : 
    1585           0 : /// Callback from DAG postProcessing to create cluster edges for loads.
    1586           0 : void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
    1587             :   ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
    1588             : 
    1589           0 :   // Map DAG NodeNum to store chain ID.
    1590             :   DenseMap<unsigned, unsigned> StoreChainIDs;
    1591           0 :   // Map each store chain to a set of dependent MemOps.
    1592             :   SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
    1593             :   for (SUnit &SU : DAG->SUnits) {
    1594             :     if ((IsLoad && !SU.getInstr()->mayLoad()) ||
    1595             :         (!IsLoad && !SU.getInstr()->mayStore()))
    1596             :       continue;
    1597             : 
    1598       72604 :     unsigned ChainPredID = DAG->SUnits.size();
    1599             :     for (const SDep &Pred : SU.Preds) {
    1600             :       if (Pred.isCtrl()) {
    1601             :         ChainPredID = Pred.getSUnit()->NodeNum;
    1602             :         break;
    1603             :       }
    1604       72604 :     }
    1605      910148 :     // Check if this chain-like pred has been seen
    1606      837544 :     // before. ChainPredID==MaxNodeID at the top of the schedule.
    1607      477159 :     unsigned NumChains = StoreChainDependents.size();
    1608      735426 :     std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
    1609             :       StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
    1610      204236 :     if (Result.second)
    1611      279140 :       StoreChainDependents.resize(NumChains + 1);
    1612      213983 :     StoreChainDependents[Result.first->second].push_back(&SU);
    1613       36961 :   }
    1614       36961 : 
    1615             :   // Iterate over the store chains.
    1616             :   for (auto &SCD : StoreChainDependents)
    1617             :     clusterNeighboringMemOps(SCD, DAG);
    1618             : }
    1619      102118 : 
    1620             : //===----------------------------------------------------------------------===//
    1621      102118 : // CopyConstrain - DAG post-processing to encourage copy elimination.
    1622      102118 : //===----------------------------------------------------------------------===//
    1623       60308 : 
    1624      204236 : namespace {
    1625             : 
    1626             : /// Post-process the DAG to create weak edges from all uses of a copy to
    1627             : /// the one use that defines the copy's source vreg, most likely an induction
    1628      132912 : /// variable increment.
    1629       60308 : class CopyConstrain : public ScheduleDAGMutation {
    1630       72604 :   // Transient state.
    1631             :   SlotIndex RegionBeginIdx;
    1632             : 
    1633             :   // RegionEndIdx is the slot index of the last non-debug instruction in the
    1634             :   // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
    1635             :   SlotIndex RegionEndIdx;
    1636             : 
    1637             : public:
    1638             :   CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
    1639             : 
    1640             :   void apply(ScheduleDAGInstrs *DAGInstrs) override;
    1641           0 : 
    1642             : protected:
    1643             :   void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
    1644             : };
    1645             : 
    1646             : } // end anonymous namespace
    1647             : 
    1648             : namespace llvm {
    1649             : 
    1650      145282 : std::unique_ptr<ScheduleDAGMutation>
    1651             : createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
    1652             :                                const TargetRegisterInfo *TRI) {
    1653             :   return llvm::make_unique<CopyConstrain>(TII, TRI);
    1654             : }
    1655             : 
    1656             : } // end namespace llvm
    1657             : 
    1658             : /// constrainLocalCopy handles two possibilities:
    1659             : /// 1) Local src:
    1660             : /// I0:     = dst
    1661             : /// I1: src = ...
    1662             : /// I2:     = dst
    1663      145282 : /// I3: dst = src (copy)
    1664             : /// (create pred->succ edges I0->I1, I2->I1)
    1665      145282 : ///
    1666             : /// 2) Local copy:
    1667             : /// I0: dst = src (copy)
    1668             : /// I1:     = dst
    1669             : /// I2: src = ...
    1670             : /// I3:     = dst
    1671             : /// (create pred->succ edges I1->I2, I3->I2)
    1672             : ///
    1673             : /// Although the MachineScheduler is currently constrained to single blocks,
    1674             : /// this algorithm should handle extended blocks. An EBB is a set of
    1675             : /// contiguously numbered blocks such that the previous block in the EBB is
    1676             : /// always the single predecessor.
    1677             : void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
    1678             :   LiveIntervals *LIS = DAG->getLIS();
    1679             :   MachineInstr *Copy = CopySU->getInstr();
    1680             : 
    1681             :   // Check for pure vreg copies.
    1682             :   const MachineOperand &SrcOp = Copy->getOperand(1);
    1683             :   unsigned SrcReg = SrcOp.getReg();
    1684             :   if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
    1685             :     return;
    1686             : 
    1687             :   const MachineOperand &DstOp = Copy->getOperand(0);
    1688             :   unsigned DstReg = DstOp.getReg();
    1689           0 :   if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
    1690           0 :     return;
    1691           0 : 
    1692             :   // Check if either the dest or source is local. If it's live across a back
    1693             :   // edge, it's not local. Note that if both vregs are live across the back
    1694           0 :   // edge, we cannot successfully contrain the copy without cyclic scheduling.
    1695           0 :   // If both the copy's source and dest are local live intervals, then we
    1696           0 :   // should treat the dest as the global for the purpose of adding
    1697           0 :   // constraints. This adds edges from source's other uses to the copy.
    1698             :   unsigned LocalReg = SrcReg;
    1699             :   unsigned GlobalReg = DstReg;
    1700           0 :   LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
    1701           0 :   if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
    1702           0 :     LocalReg = DstReg;
    1703             :     GlobalReg = SrcReg;
    1704             :     LocalLI = &LIS->getInterval(LocalReg);
    1705             :     if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
    1706             :       return;
    1707             :   }
    1708             :   LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
    1709             : 
    1710             :   // Find the global segment after the start of the local LI.
    1711             :   LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
    1712           0 :   // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
    1713           0 :   // local live range. We could create edges from other global uses to the local
    1714             :   // start, but the coalescer should have already eliminated these cases, so
    1715             :   // don't bother dealing with it.
    1716           0 :   if (GlobalSegment == GlobalLI->end())
    1717           0 :     return;
    1718           0 : 
    1719             :   // If GlobalSegment is killed at the LocalLI->start, the call to find()
    1720           0 :   // returned the next global segment. But if GlobalSegment overlaps with
    1721             :   // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
    1722             :   // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
    1723           0 :   if (GlobalSegment->contains(LocalLI->beginIndex()))
    1724             :     ++GlobalSegment;
    1725             : 
    1726             :   if (GlobalSegment == GlobalLI->end())
    1727             :     return;
    1728           0 : 
    1729           0 :   // Check if GlobalLI contains a hole in the vicinity of LocalLI.
    1730             :   if (GlobalSegment != GlobalLI->begin()) {
    1731             :     // Two address defs have no hole.
    1732             :     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
    1733             :                                GlobalSegment->start)) {
    1734             :       return;
    1735           0 :     }
    1736           0 :     // If the prior global segment may be defined by the same two-address
    1737             :     // instruction that also defines LocalLI, then can't make a hole here.
    1738           0 :     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
    1739           0 :                                LocalLI->beginIndex())) {
    1740             :       return;
    1741             :     }
    1742           0 :     // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
    1743             :     // it would be a disconnected component in the live range.
    1744           0 :     assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
    1745             :            "Disconnected LRG within the scheduling region.");
    1746           0 :   }
    1747             :   MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
    1748             :   if (!GlobalDef)
    1749             :     return;
    1750           0 : 
    1751             :   SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
    1752           0 :   if (!GlobalSU)
    1753             :     return;
    1754             : 
    1755             :   // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
    1756             :   // constraining the uses of the last local def to precede GlobalDef.
    1757             :   SmallVector<SUnit*,8> LocalUses;
    1758             :   const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
    1759             :   MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
    1760           0 :   SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
    1761           0 :   for (const SDep &Succ : LastLocalSU->Succs) {
    1762             :     if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
    1763             :       continue;
    1764           0 :     if (Succ.getSUnit() == GlobalSU)
    1765           0 :       continue;
    1766             :     if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
    1767             :       return;
    1768             :     LocalUses.push_back(Succ.getSUnit());
    1769             :   }
    1770           0 :   // Open the top of the GlobalLI hole by constraining any earlier global uses
    1771             :   // to precede the start of LocalLI.
    1772             :   SmallVector<SUnit*,8> GlobalUses;
    1773           0 :   MachineInstr *FirstLocalDef =
    1774           0 :     LIS->getInstructionFromIndex(LocalLI->beginIndex());
    1775           0 :   SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
    1776           0 :   for (const SDep &Pred : GlobalSU->Preds) {
    1777           0 :     if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
    1778           0 :       continue;
    1779           0 :     if (Pred.getSUnit() == FirstLocalSU)
    1780           0 :       continue;
    1781             :     if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
    1782             :       return;
    1783             :     GlobalUses.push_back(Pred.getSUnit());
    1784             :   }
    1785             :   LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
    1786             :   // Add the weak edges.
    1787             :   for (SmallVectorImpl<SUnit*>::const_iterator
    1788           0 :          I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
    1789           0 :     LLVM_DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU("
    1790           0 :                       << GlobalSU->NodeNum << ")\n");
    1791           0 :     DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
    1792           0 :   }
    1793           0 :   for (SmallVectorImpl<SUnit*>::const_iterator
    1794           0 :          I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
    1795           0 :     LLVM_DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU("
    1796             :                       << FirstLocalSU->NodeNum << ")\n");
    1797             :     DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
    1798             :   }
    1799           0 : }
    1800           0 : 
    1801             : /// Callback from DAG postProcessing to create weak edges to encourage
    1802             : /// copy elimination.
    1803           0 : void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
    1804             :   ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
    1805           0 :   assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
    1806           0 : 
    1807             :   MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
    1808             :   if (FirstPos == DAG->end())
    1809           0 :     return;
    1810             :   RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
    1811             :   RegionEndIdx = DAG->getLIS()->getInstructionIndex(
    1812             :       *priorNonDebug(DAG->end(), DAG->begin()));
    1813             : 
    1814             :   for (SUnit &SU : DAG->SUnits) {
    1815      408886 :     if (!SU.getInstr()->isCopy())
    1816             :       continue;
    1817             : 
    1818             :     constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
    1819      408886 :   }
    1820      408886 : }
    1821             : 
    1822      408407 : //===----------------------------------------------------------------------===//
    1823             : // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
    1824      816814 : // and possibly other custom schedulers.
    1825             : //===----------------------------------------------------------------------===//
    1826     2602894 : 
    1827     4388974 : static const unsigned InvalidCycle = ~0U;
    1828             : 
    1829             : SchedBoundary::~SchedBoundary() { delete HazardRec; }
    1830      584643 : 
    1831             : /// Given a Count of resource usage and a Latency value, return true if a
    1832             : /// SchedBoundary becomes resource limited.
    1833             : static bool checkResourceLimit(unsigned LFactor, unsigned Count,
    1834             :                                unsigned Latency) {
    1835             :   return (int)(Count - (Latency * LFactor)) > (int)LFactor;
    1836             : }
    1837             : 
    1838             : void SchedBoundary::reset() {
    1839             :   // A new HazardRec is created for each DAG and owned by SchedBoundary.
    1840             :   // Destroying and reconstructing it is very expensive though. So keep
    1841      685000 :   // invalid, placeholder HazardRecs.
    1842             :   if (HazardRec && HazardRec->isEnabled()) {
    1843             :     delete HazardRec;
    1844             :     HazardRec = nullptr;
    1845             :   }
    1846             :   Available.clear();
    1847     3897554 :   Pending.clear();
    1848             :   CheckPending = false;
    1849             :   CurrCycle = 0;
    1850     1210938 :   CurrMOps = 0;
    1851             :   MinReadyCycle = std::numeric_limits<unsigned>::max();
    1852             :   ExpectedLatency = 0;
    1853             :   DependentLatency = 0;
    1854     1210938 :   RetiredMOps = 0;
    1855        7970 :   MaxExecutedResCount = 0;
    1856        7970 :   ZoneCritResIdx = 0;
    1857             :   IsResourceLimited = false;
    1858             :   ReservedCycles.clear();
    1859             : #ifndef NDEBUG
    1860     1210938 :   // Track the maximum number of stall cycles that could arise either from the
    1861     1210938 :   // latency of a DAG edge or the number of cycles that a processor resource is
    1862     1210938 :   // reserved (SchedBoundary::ReservedCycles).
    1863     1210938 :   MaxObservedStall = 0;
    1864     1210938 : #endif
    1865     1210938 :   // Reserve a zero-count for invalid CritResIdx.
    1866     1210938 :   ExecutedResCounts.resize(1);
    1867     1210938 :   assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
    1868     1210938 : }
    1869     1210938 : 
    1870             : void SchedRemainder::
    1871             : init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
    1872             :   reset();
    1873             :   if (!SchedModel->hasInstrSchedModel())
    1874             :     return;
    1875             :   RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
    1876             :   for (SUnit &SU : DAG->SUnits) {
    1877             :     const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
    1878     1210938 :     RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
    1879             :       * SchedModel->getMicroOpFactor();
    1880     1210938 :     for (TargetSchedModel::ProcResIter
    1881             :            PI = SchedModel->getWriteProcResBegin(SC),
    1882      443077 :            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
    1883             :       unsigned PIdx = PI->ProcResourceIdx;
    1884             :       unsigned Factor = SchedModel->getResourceFactor(PIdx);
    1885      443077 :       RemainingCounts[PIdx] += (Factor * PI->Cycles);
    1886             :     }
    1887      375370 :   }
    1888     1275815 : }
    1889     1088130 : 
    1890     1088130 : void SchedBoundary::
    1891     1088130 : init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
    1892     2118523 :   reset();
    1893     1088130 :   DAG = dag;
    1894     3206653 :   SchedModel = smodel;
    1895     2118523 :   Rem = rem;
    1896             :   if (SchedModel->hasInstrSchedModel()) {
    1897     2118523 :     ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
    1898             :     ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
    1899             :   }
    1900             : }
    1901             : 
    1902      868438 : /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
    1903             : /// these "soft stalls" differently than the hard stall cycles based on CPU
    1904      868438 : /// resources and computed by checkHazard(). A fully in-order model
    1905      868438 : /// (MicroOpBufferSize==0) will not make use of this since instructions are not
    1906      868438 : /// available for scheduling until they are ready. However, a weaker in-order
    1907      868438 : /// model may use this for heuristics. For example, if a processor has in-order
    1908      868438 : /// behavior when reading certain resources, this may come into play.
    1909      746070 : unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
    1910      746070 :   if (!SU->isUnbuffered)
    1911             :     return 0;
    1912      868438 : 
    1913             :   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
    1914             :   if (ReadyCycle > CurrCycle)
    1915             :     return ReadyCycle - CurrCycle;
    1916             :   return 0;
    1917             : }
    1918             : 
    1919             : /// Compute the next cycle at which the given processor resource can be
    1920             : /// scheduled.
    1921    22732034 : unsigned SchedBoundary::
    1922    22732034 : getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
    1923             :   unsigned NextUnreserved = ReservedCycles[PIdx];
    1924             :   // If this resource has never been used, always return cycle zero.
    1925     2781651 :   if (NextUnreserved == InvalidCycle)
    1926     2781651 :     return 0;
    1927       79489 :   // For bottom-up scheduling add the cycles needed for the current operation.
    1928             :   if (!isTop())
    1929             :     NextUnreserved += Cycles;
    1930             :   return NextUnreserved;
    1931             : }
    1932             : 
    1933     2121405 : /// Does this SU have a hazard within the current instruction group.
    1934             : ///
    1935     2121405 : /// The scheduler supports two modes of hazard recognition. The first is the
    1936             : /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
    1937     2121405 : /// supports highly complicated in-order reservation tables
    1938             : /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
    1939             : ///
    1940        2232 : /// The second is a streamlined mechanism that checks for hazards based on
    1941         944 : /// simple counters that the scheduler itself maintains. It explicitly checks
    1942             : /// for instruction dispatch limitations, including the number of micro-ops that
    1943             : /// can dispatch per cycle.
    1944             : ///
    1945             : /// TODO: Also check whether the SU must start a new group.
    1946             : bool SchedBoundary::checkHazard(SUnit *SU) {
    1947             :   if (HazardRec->isEnabled()
    1948             :       && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
    1949             :     return true;
    1950             :   }
    1951             : 
    1952             :   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
    1953             :   if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
    1954             :     LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
    1955             :                       << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
    1956             :     return true;
    1957             :   }
    1958    11558048 : 
    1959    11558048 :   if (CurrMOps > 0 &&
    1960    11558048 :       ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
    1961             :        (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
    1962             :     LLVM_DEBUG(dbgs() << "  hazard: SU(" << SU->NodeNum << ") must "
    1963             :                       << (isTop() ? "begin" : "end") << " group\n");
    1964    11533520 :     return true;
    1965    11533520 :   }
    1966             : 
    1967             :   if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
    1968             :     const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
    1969             :     for (const MCWriteProcResEntry &PE :
    1970             :           make_range(SchedModel->getWriteProcResBegin(SC),
    1971    11435096 :                      SchedModel->getWriteProcResEnd(SC))) {
    1972     8497979 :       unsigned ResIdx = PE.ProcResourceIdx;
    1973     8444773 :       unsigned Cycles = PE.Cycles;
    1974             :       unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles);
    1975             :       if (NRCycle > CurrCycle) {
    1976         287 : #ifndef NDEBUG
    1977             :         MaxObservedStall = std::max(Cycles, MaxObservedStall);
    1978             : #endif
    1979    11434809 :         LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "
    1980        1891 :                           << SchedModel->getResourceName(ResIdx) << "="
    1981        1927 :                           << NRCycle << "c\n");
    1982             :         return true;
    1983        3818 :       }
    1984        2521 :     }
    1985        2521 :   }
    1986        2521 :   return false;
    1987        2521 : }
    1988             : 
    1989             : // Find the unscheduled node in ReadySUs with the highest latency.
    1990             : unsigned SchedBoundary::
    1991             : findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
    1992             :   SUnit *LateSU = nullptr;
    1993             :   unsigned RemLatency = 0;
    1994             :   for (SUnit *SU : ReadySUs) {
    1995             :     unsigned L = getUnscheduledLatency(SU);
    1996             :     if (L > RemLatency) {
    1997             :       RemLatency = L;
    1998             :       LateSU = SU;
    1999             :     }
    2000             :   }
    2001             :   if (LateSU) {
    2002     1013716 :     LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
    2003             :                       << LateSU->NodeNum << ") " << RemLatency << "c\n");
    2004             :   }
    2005             :   return RemLatency;
    2006     5873676 : }
    2007     4859960 : 
    2008     4859960 : // Count resources in this zone and the remaining unscheduled
    2009             : // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
    2010             : // resource index, or zero if the zone is issue limited.
    2011             : unsigned SchedBoundary::
    2012             : getOtherResourceCount(unsigned &OtherCritIdx) {
    2013             :   OtherCritIdx = 0;
    2014             :   if (!SchedModel->hasInstrSchedModel())
    2015             :     return 0;
    2016             : 
    2017     1013716 :   unsigned OtherCritCount = Rem->RemIssueCount
    2018             :     + (RetiredMOps * SchedModel->getMicroOpFactor());
    2019             :   LLVM_DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
    2020             :                     << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
    2021             :   for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
    2022             :        PIdx != PEnd; ++PIdx) {
    2023      638914 :     unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
    2024             :     if (OtherCount > OtherCritCount) {
    2025      638914 :       OtherCritCount = OtherCount;
    2026      638914 :       OtherCritIdx = PIdx;
    2027             :     }
    2028             :   }
    2029      443466 :   if (OtherCritIdx) {
    2030      443466 :     LLVM_DEBUG(
    2031             :         dbgs() << "  " << Available.getName() << " + Remain CritRes: "
    2032             :                << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
    2033     2832242 :                << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
    2034     3275708 :   }
    2035     2832242 :   return OtherCritCount;
    2036     2832242 : }
    2037             : 
    2038       16902 : void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
    2039             :   assert(SU->getInstr() && "Scheduled SUnit must have instr");
    2040             : 
    2041             : #ifndef NDEBUG
    2042             :   // ReadyCycle was been bumped up to the CurrCycle when this node was
    2043             :   // scheduled, but CurrCycle may have been eagerly advanced immediately after
    2044             :   // scheduling, so may now be greater than ReadyCycle.
    2045             :   if (ReadyCycle > CurrCycle)
    2046             :     MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
    2047      443466 : #endif
    2048             : 
    2049             :   if (ReadyCycle < MinReadyCycle)
    2050     3733002 :     MinReadyCycle = ReadyCycle;
    2051             : 
    2052             :   // Check for interlocks first. For the purpose of other heuristics, an
    2053             :   // instruction that cannot issue appears as if it's not in the ReadyQueue.
    2054             :   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
    2055             :   if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
    2056             :       Available.size() >= ReadyListLimit)
    2057             :     Pending.push(SU);
    2058             :   else
    2059             :     Available.push(SU);
    2060             : }
    2061     3733002 : 
    2062      876399 : /// Move the boundary of scheduled code by one cycle.
    2063             : void SchedBoundary::bumpCycle(unsigned NextCycle) {
    2064             :   if (SchedModel->getMicroOpBufferSize() == 0) {
    2065             :     assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
    2066     3733002 :            "MinReadyCycle uninitialized");
    2067     3733002 :     if (MinReadyCycle > NextCycle)
    2068             :       NextCycle = MinReadyCycle;
    2069      178750 :   }
    2070             :   // Update the current micro-ops, which will issue in the next cycle.
    2071     3554252 :   unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
    2072     3733002 :   CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
    2073             : 
    2074             :   // Decrement DependentLatency based on the next cycle.
    2075      887518 :   if ((NextCycle - CurrCycle) > DependentLatency)
    2076      887518 :     DependentLatency = 0;
    2077             :   else
    2078             :     DependentLatency -= (NextCycle - CurrCycle);
    2079      277038 : 
    2080             :   if (!HazardRec->isEnabled()) {
    2081             :     // Bypass HazardRec virtual calls.
    2082             :     CurrCycle = NextCycle;
    2083      887518 :   } else {
    2084      887518 :     // Bypass getHazardType calls in case of long latency.
    2085             :     for (; CurrCycle != NextCycle; ++CurrCycle) {
    2086             :       if (isTop())
    2087      887518 :         HazardRec->AdvanceCycle();
    2088      133628 :       else
    2089             :         HazardRec->RecedeCycle();
    2090      753890 :     }
    2091             :   }
    2092      887518 :   CheckPending = true;
    2093             :   IsResourceLimited =
    2094      814961 :       checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
    2095             :                          getScheduledLatency());
    2096             : 
    2097      170308 :   LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
    2098       97751 :                     << '\n');
    2099       44226 : }
    2100             : 
    2101       53525 : void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
    2102             :   ExecutedResCounts[PIdx] += Count;
    2103             :   if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
    2104      887518 :     MaxExecutedResCount = ExecutedResCounts[PIdx];
    2105      887518 : }
    2106      887518 : 
    2107             : /// Add the given processor resource to this scheduled zone.
    2108             : ///
    2109             : /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
    2110             : /// during which this resource is consumed.
    2111      887518 : ///
    2112             : /// \return the next cycle at which the instruction may execute without
    2113     2118523 : /// oversubscribing resources.
    2114     2118523 : unsigned SchedBoundary::
    2115     2118523 : countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
    2116      687938 :   unsigned Factor = SchedModel->getResourceFactor(PIdx);
    2117     2118523 :   unsigned Count = Factor * Cycles;
    2118             :   LLVM_DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx) << " +"
    2119             :                     << Cycles << "x" << Factor << "u\n");
    2120             : 
    2121             :   // Update Executed resources counts.
    2122             :   incExecutedResources(PIdx, Count);
    2123             :   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
    2124             :   Rem->RemainingCounts[PIdx] -= Count;
    2125             : 
    2126     2118523 :   // Check if this resource exceeds the current critical resource. If so, it
    2127             :   // becomes the critical resource.
    2128     2118523 :   if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
    2129     2118523 :     ZoneCritResIdx = PIdx;
    2130             :     LLVM_DEBUG(dbgs() << "  *** Critical resource "
    2131             :                       << SchedModel->getResourceName(PIdx) << ": "
    2132             :                       << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
    2133             :                       << "c\n");
    2134     2118523 :   }
    2135             :   // For reserved resources, record the highest cycle using the resource.
    2136     2118523 :   unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
    2137             :   if (NextAvailable > CurrCycle) {
    2138             :     LLVM_DEBUG(dbgs() << "  Resource conflict: "
    2139             :                       << SchedModel->getProcResource(PIdx)->Name
    2140     4050755 :                       << " reserved until @" << NextAvailable << "\n");
    2141      290115 :   }
    2142             :   return NextAvailable;
    2143             : }
    2144             : 
    2145             : /// Move the boundary of scheduled code by one SUnit.
    2146             : void SchedBoundary::bumpNode(SUnit *SU) {
    2147             :   // Update the reservation table.
    2148     2118523 :   if (HazardRec->isEnabled()) {
    2149             :     if (!isTop() && SU->isCall) {
    2150             :       // Calls are scheduled with their preceding instructions. For bottom-up
    2151             :       // scheduling, clear the pipeline state before emitting.
    2152             :       HazardRec->Reset();
    2153             :     }
    2154     2118523 :     HazardRec->EmitInstruction(SU);
    2155             :   }
    2156             :   // checkHazard should prevent scheduling multiple instructions per cycle that
    2157             :   // exceed the issue width.
    2158     2591852 :   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
    2159             :   unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
    2160     2591852 :   assert(
    2161       90705 :       (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
    2162             :       "Cannot schedule this instruction's MicroOps in the current cycle.");
    2163             : 
    2164           0 :   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
    2165             :   LLVM_DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
    2166       90705 : 
    2167             :   unsigned NextCycle = CurrCycle;
    2168             :   switch (SchedModel->getMicroOpBufferSize()) {
    2169             :   case 0:
    2170     2591852 :     assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
    2171     2591852 :     break;
    2172             :   case 1:
    2173             :     if (ReadyCycle > NextCycle) {
    2174             :       NextCycle = ReadyCycle;
    2175             :       LLVM_DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
    2176     2591852 :     }
    2177             :     break;
    2178             :   default:
    2179     2591852 :     // We don't currently model the OOO reorder buffer, so consider all
    2180     2591852 :     // scheduled MOps to be "retired". We do loosely model in-order resource
    2181             :     // latency. If this instruction uses an in-order resource, account for any
    2182             :     // likely stall cycles.
    2183             :     if (SU->isUnbuffered && ReadyCycle > NextCycle)
    2184      268267 :       NextCycle = ReadyCycle;
    2185      268267 :     break;
    2186             :   }
    2187             :   RetiredMOps += IncMOps;
    2188             : 
    2189             :   // Update resource counts and critical resource.
    2190     1995601 :   if (SchedModel->hasInstrSchedModel()) {
    2191             :     unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
    2192             :     assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
    2193             :     Rem->RemIssueCount -= DecRemIssue;
    2194             :     if (ZoneCritResIdx) {
    2195     1995601 :       // Scale scheduled micro-ops for comparing with the critical resource.
    2196             :       unsigned ScaledMOps =
    2197             :         RetiredMOps * SchedModel->getMicroOpFactor();
    2198             : 
    2199     2591852 :       // If scaled micro-ops are now more than the previous critical resource by
    2200             :       // a full cycle, then micro-ops issue becomes critical.
    2201             :       if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
    2202     2591852 :           >= (int)SchedModel->getLatencyFactor()) {
    2203     1088130 :         ZoneCritResIdx = 0;
    2204             :         LLVM_DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
    2205     1088130 :                           << ScaledMOps / SchedModel->getLatencyFactor()
    2206     1088130 :                           << "c\n");
    2207             :       }
    2208             :     }
    2209      522272 :     for (TargetSchedModel::ProcResIter
    2210             :            PI = SchedModel->getWriteProcResBegin(SC),
    2211             :            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
    2212             :       unsigned RCycle =
    2213     1044544 :         countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
    2214      522272 :       if (RCycle > NextCycle)
    2215        1649 :         NextCycle = RCycle;
    2216             :     }
    2217             :     if (SU->hasReservedResource) {
    2218             :       // For reserved resources, record the highest cycle using the resource.
    2219             :       // For top-down scheduling, this is the cycle in which we schedule this
    2220             :       // instruction plus the number of cycles the operations reserves the
    2221     2118523 :       // resource. For bottom-up is it simply the instruction's cycle.
    2222     1088130 :       for (TargetSchedModel::ProcResIter
    2223     3206653 :              PI = SchedModel->getWriteProcResBegin(SC),
    2224             :              PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
    2225     2118523 :         unsigned PIdx = PI->ProcResourceIdx;
    2226     2118523 :         if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
    2227             :           if (isTop()) {
    2228             :             ReservedCycles[PIdx] =
    2229     1088130 :               std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
    2230             :           }
    2231             :           else
    2232             :             ReservedCycles[PIdx] = NextCycle;
    2233             :         }
    2234         809 :       }
    2235         661 :     }
    2236        1470 :   }
    2237         809 :   // Update ExpectedLatency and DependentLatency.
    2238        1618 :   unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
    2239         661 :   unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
    2240         361 :   if (SU->getDepth() > TopLatency) {
    2241         722 :     TopLatency = SU->getDepth();
    2242             :     LLVM_DEBUG(dbgs() << "  " << Available.getName() << " TopLatency SU("
    2243             :                       << SU->NodeNum << ") " << TopLatency << "c\n");
    2244         300 :   }
    2245             :   if (SU->getHeight() > BotLatency) {
    2246             :     BotLatency = SU->getHeight();
    2247             :     LLVM_DEBUG(dbgs() << "  " << Available.getName() << " BotLatency SU("
    2248             :                       << SU->NodeNum << ") " << BotLatency << "c\n");
    2249             :   }
    2250     2591852 :   // If we stall for any reason, bump the cycle.
    2251     2591852 :   if (NextCycle > CurrCycle)
    2252     2591852 :     bumpCycle(NextCycle);
    2253      504026 :   else
    2254             :     // After updating ZoneCritResIdx and ExpectedLatency, check if we're
    2255             :     // resource limited. If a stall occurred, bumpCycle does this.
    2256             :     IsResourceLimited =
    2257     2591852 :         checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
    2258      928748 :                            getScheduledLatency());
    2259             : 
    2260             :   // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
    2261             :   // resets CurrMOps. Loop to handle instructions with more MOps than issue in
    2262             :   // one cycle.  Since we commonly reach the max MOps here, opportunistically
    2263     2591852 :   // bump the cycle to avoid uselessly checking everything in the readyQ.
    2264       25280 :   CurrMOps += IncMOps;
    2265             : 
    2266             :   // Bump the cycle count for issue group constraints.
    2267             :   // This must be done after NextCycle has been adjust for all other stalls.
    2268     2566572 :   // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
    2269     2566572 :   // currCycle to X.
    2270             :   if ((isTop() &&  SchedModel->mustEndGroup(SU->getInstr())) ||
    2271             :       (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
    2272             :     LLVM_DEBUG(dbgs() << "  Bump cycle to " << (isTop() ? "end" : "begin")
    2273             :                       << " group\n");
    2274             :     bumpCycle(++NextCycle);
    2275             :   }
    2276     2591852 : 
    2277             :   while (CurrMOps >= SchedModel->getIssueWidth()) {
    2278             :     LLVM_DEBUG(dbgs() << "  *** Max MOps " << CurrMOps << " at cycle "
    2279             :                       << CurrCycle << '\n');
    2280             :     bumpCycle(++NextCycle);
    2281             :   }
    2282     2591852 :   LLVM_DEBUG(dumpScheduledState());
    2283     2410567 : }
    2284             : 
    2285             : /// Release pending ready nodes in to the available queue. This makes them
    2286         332 : /// visible to heuristics.
    2287             : void SchedBoundary::releasePending() {
    2288             :   // If the available queue is empty, it is safe to reset MinReadyCycle.
    2289     3295746 :   if (Available.empty())
    2290             :     MinReadyCycle = std::numeric_limits<unsigned>::max();
    2291             : 
    2292      703894 :   // Check to see if any of the pending instructions are ready to issue.  If
    2293             :   // so, add them to the available queue.
    2294             :   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
    2295     2591852 :   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
    2296             :     SUnit *SU = *(Pending.begin()+i);
    2297             :     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
    2298             : 
    2299      767468 :     if (ReadyCycle < MinReadyCycle)
    2300             :       MinReadyCycle = ReadyCycle;
    2301      767468 : 
    2302      181432 :     if (!IsBuffered && ReadyCycle > CurrCycle)
    2303             :       continue;
    2304             : 
    2305             :     if (checkHazard(SU))
    2306      767468 :       continue;
    2307     1104626 : 
    2308      340524 :     if (Available.size() >= ReadyListLimit)
    2309      340524 :       break;
    2310             : 
    2311      340524 :     Available.push(SU);
    2312      190208 :     Pending.remove(Pending.begin()+i);
    2313             :     --i; --e;
    2314      340524 :   }
    2315             :   CheckPending = false;
    2316             : }
    2317      258649 : 
    2318             : /// Remove SU from the ready set for this boundary.
    2319             : void SchedBoundary::removeReady(SUnit *SU) {
    2320      250626 :   if (Available.isInQueue(SU))
    2321             :     Available.remove(Available.find(SU));
    2322             :   else {
    2323      247260 :     assert(Pending.isInQueue(SU) && "bad ready count");
    2324             :     Pending.remove(Pending.find(SU));
    2325      247260 :   }
    2326             : }
    2327      767468 : 
    2328      767468 : /// If this queue only has one ready candidate, return it. As a side effect,
    2329             : /// defer any nodes that now hit a hazard, and advance the cycle until at least
    2330             : /// one node is ready. If multiple instructions are ready, return NULL.
    2331     3732992 : SUnit *SchedBoundary::pickOnlyChoice() {
    2332     7465984 :   if (CheckPending)
    2333     3720406 :     releasePending();
    2334             : 
    2335             :   if (CurrMOps > 0) {
    2336       12586 :     // Defer any ready instrs that now have a hazard.
    2337             :     for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
    2338     3732992 :       if (checkHazard(*I)) {
    2339             :         Pending.push(*I);
    2340             :         I = Available.remove(I);
    2341             :         continue;
    2342             :       }
    2343     2921829 :       ++I;
    2344     2921829 :     }
    2345      609456 :   }
    2346             :   for (unsigned i = 0; Available.empty(); ++i) {
    2347     2921829 : //  FIXME: Re-enable assert once PR20057 is resolved.
    2348             : //    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
    2349     8912445 : //           "permanent hazard");
    2350     7686454 :     (void)i;
    2351       81099 :     bumpCycle(CurrCycle + 1);
    2352             :     releasePending();
    2353       81099 :   }
    2354             : 
    2355             :   LLVM_DEBUG(Pending.dump());
    2356             :   LLVM_DEBUG(Available.dump());
    2357             : 
    2358     3079841 :   if (Available.size() == 1)
    2359             :     return *Available.begin();
    2360             :   return nullptr;
    2361             : }
    2362             : 
    2363      158012 : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    2364      158012 : // This is useful information to dump after bumpNode.
    2365             : // Note that the Queue contents are more useful before pickNodeFromQueue.
    2366             : LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
    2367             :   unsigned ResFactor;
    2368             :   unsigned ResCount;
    2369             :   if (ZoneCritResIdx) {
    2370     2921829 :     ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
    2371     1145662 :     ResCount = getResourceCount(ZoneCritResIdx);
    2372             :   } else {
    2373             :     ResFactor = SchedModel->getMicroOpFactor();
    2374             :     ResCount = RetiredMOps * ResFactor;
    2375             :   }
    2376             :   unsigned LFactor = SchedModel->getLatencyFactor();
    2377             :   dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
    2378             :          << "  Retired: " << RetiredMOps;
    2379             :   dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
    2380             :   dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
    2381             :          << ResCount / ResFactor << " "
    2382             :          << SchedModel->getResourceName(ZoneCritResIdx)
    2383             :          << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
    2384             :          << (IsResourceLimited ? "  - Resource" : "  - Latency")
    2385             :          << " limited.\n";
    2386             : }
    2387             : #endif
    2388             : 
    2389             : //===----------------------------------------------------------------------===//
    2390             : // GenericScheduler - Generic implementation of MachineSchedStrategy.
    2391             : //===----------------------------------------------------------------------===//
    2392             : 
    2393             : void GenericSchedulerBase::SchedCandidate::
    2394             : initResourceDelta(const ScheduleDAGMI *DAG,
    2395             :                   const TargetSchedModel *SchedModel) {
    2396             :   if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
    2397             :     return;
    2398             : 
    2399             :   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
    2400             :   for (TargetSchedModel::ProcResIter
    2401             :          PI = SchedModel->getWriteProcResBegin(SC),
    2402             :          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
    2403             :     if (PI->ProcResourceIdx == Policy.ReduceResIdx)
    2404             :       ResDelta.CritResources += PI->Cycles;
    2405    15479857 :     if (PI->ProcResourceIdx == Policy.DemandResIdx)
    2406             :       ResDelta.DemandedResources += PI->Cycles;
    2407             :   }
    2408    15479857 : }
    2409             : 
    2410             : /// Compute remaining latency. We need this both to determine whether the
    2411       16993 : /// overall schedule has become latency-limited and whether the instructions
    2412       41698 : /// outside this zone are resource or latency limited.
    2413       16993 : ///
    2414       58691 : /// The "dependent" latency is updated incrementally during scheduling as the
    2415       41698 : /// max height/depth of scheduled nodes minus the cycles since it was
    2416        4093 : /// scheduled:
    2417       41698 : ///   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
    2418        6376 : ///
    2419             : /// The "independent" latency is the max ready queue depth:
    2420             : ///   ILat = max N.depth for N in Available|Pending
    2421             : ///
    2422             : /// RemainingLatency is the greater of independent and dependent latency.
    2423             : ///
    2424             : /// These computations are expensive, especially in DAGs with many edges, so
    2425             : /// only do them if necessary.
    2426             : static unsigned computeRemLatency(SchedBoundary &CurrZone) {
    2427             :   unsigned RemLatency = CurrZone.getDependentLatency();
    2428             :   RemLatency = std::max(RemLatency,
    2429             :                         CurrZone.findMaxLatency(CurrZone.Available.elements()));
    2430             :   RemLatency = std::max(RemLatency,
    2431             :                         CurrZone.findMaxLatency(CurrZone.Pending.elements()));
    2432             :   return RemLatency;
    2433             : }
    2434             : 
    2435             : /// Returns true if the current cycle plus remaning latency is greater than
    2436             : /// the cirtical path in the scheduling region.
    2437             : bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
    2438      506858 :                                                SchedBoundary &CurrZone,
    2439      506858 :                                                bool ComputeRemLatency,
    2440      506858 :                                                unsigned &RemLatency) const {
    2441      695732 :   // The current cycle is already greater than the critical path, so we are
    2442      506858 :   // already latnecy limited and don't need to compute the remaining latency.
    2443      506858 :   if (CurrZone.getCurrCycle() > Rem.CriticalPath)
    2444      506858 :     return true;
    2445             : 
    2446             :   // If we haven't scheduled anything yet, then we aren't latency limited.
    2447             :   if (CurrZone.getCurrCycle() == 0)
    2448             :     return false;
    2449      424003 : 
    2450             :   if (ComputeRemLatency)
    2451             :     RemLatency = computeRemLatency(CurrZone);
    2452             : 
    2453             :   return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
    2454             : }
    2455      424003 : 
    2456             : /// Set the CandPolicy given a scheduling zone given the current resources and
    2457             : /// latencies inside and outside the zone.
    2458             : void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
    2459      378034 :                                      SchedBoundary &CurrZone,
    2460             :                                      SchedBoundary *OtherZone) {
    2461             :   // Apply preemptive heuristics based on the total latency and resources
    2462      250106 :   // inside and outside this zone. Potential stalls should be considered before
    2463       63394 :   // following this policy.
    2464             : 
    2465      250106 :   // Compute the critical resource outside the zone.
    2466             :   unsigned OtherCritIdx = 0;
    2467             :   unsigned OtherCount =
    2468             :     OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
    2469             : 
    2470      665875 :   bool OtherResLimited = false;
    2471             :   unsigned RemLatency = 0;
    2472             :   bool RemLatencyComputed = false;
    2473             :   if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
    2474             :     RemLatency = computeRemLatency(CurrZone);
    2475             :     RemLatencyComputed = true;
    2476             :     OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
    2477             :                                          OtherCount, RemLatency);
    2478      665875 :   }
    2479             : 
    2480      665875 :   // Schedule aggressively for latency in PostRA mode. We don't check for
    2481             :   // acyclic latency during PostRA, and highly out-of-order processors will
    2482             :   // skip PostRA scheduling.
    2483      665875 :   if (!OtherResLimited &&
    2484             :       (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
    2485      665875 :                                        RemLatency))) {
    2486      443464 :     Policy.ReduceLatency |= true;
    2487             :     LLVM_DEBUG(dbgs() << "  " << CurrZone.Available.getName()
    2488      443464 :                       << " RemainingLatency " << RemLatency << " + "
    2489             :                       << CurrZone.getCurrCycle() << "c > CritPath "
    2490             :                       << Rem.CriticalPath << "\n");
    2491             :   }
    2492             :   // If the same resource is limiting inside and outside the zone, do nothing.
    2493             :   if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
    2494             :     return;
    2495      665875 : 
    2496      424003 :   LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
    2497             :     dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "
    2498      191550 :            << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
    2499             :   } if (OtherResLimited) dbgs()
    2500             :                  << "  RemainingLimit: "
    2501             :                  << SchedModel->getResourceName(OtherCritIdx) << "\n";
    2502             :              if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
    2503             :              << "  Latency limited both directions.\n");
    2504             : 
    2505      665875 :   if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
    2506      653310 :     Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
    2507             : 
    2508             :   if (OtherResLimited)
    2509             :     Policy.DemandResIdx = OtherCritIdx;
    2510             : }
    2511             : 
    2512             : #ifndef NDEBUG
    2513             : const char *GenericSchedulerBase::getReasonStr(
    2514             :   GenericSchedulerBase::CandReason Reason) {
    2515             :   switch (Reason) {
    2516             :   case NoCand:         return "NOCAND    ";
    2517       12565 :   case Only1:          return "ONLY1     ";
    2518        1525 :   case PhysRegCopy:    return "PREG-COPY ";
    2519             :   case RegExcess:      return "REG-EXCESS";
    2520       12565 :   case RegCritical:    return "REG-CRIT  ";
    2521        1444 :   case Stall:          return "STALL     ";
    2522             :   case Cluster:        return "CLUSTER   ";
    2523             :   case Weak:           return "WEAK      ";
    2524             :   case RegMax:         return "REG-MAX   ";
    2525             :   case ResourceReduce: return "RES-REDUCE";
    2526             :   case ResourceDemand: return "RES-DEMAND";
    2527             :   case TopDepthReduce: return "TOP-DEPTH ";
    2528             :   case TopPathReduce:  return "TOP-PATH  ";
    2529             :   case BotHeightReduce:return "BOT-HEIGHT";
    2530             :   case BotPathReduce:  return "BOT-PATH  ";
    2531             :   case NextDefUse:     return "DEF-USE   ";
    2532             :   case NodeOrder:      return "ORDER     ";
    2533             :   };
    2534             :   llvm_unreachable("Unknown reason!");
    2535             : }
    2536             : 
    2537             : void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
    2538             :   PressureChange P;
    2539             :   unsigned ResIdx = 0;
    2540             :   unsigned Latency = 0;
    2541             :   switch (Cand.Reason) {
    2542             :   default:
    2543             :     break;
    2544             :   case RegExcess:
    2545             :     P = Cand.RPDelta.Excess;
    2546             :     break;
    2547             :   case RegCritical:
    2548             :     P = Cand.RPDelta.CriticalMax;
    2549             :     break;
    2550             :   case RegMax:
    2551             :     P = Cand.RPDelta.CurrentMax;
    2552             :     break;
    2553             :   case ResourceReduce:
    2554             :     ResIdx = Cand.Policy.ReduceResIdx;
    2555             :     break;
    2556             :   case ResourceDemand:
    2557             :     ResIdx = Cand.Policy.DemandResIdx;
    2558             :     break;
    2559             :   case TopDepthReduce:
    2560             :     Latency = Cand.SU->getDepth();
    2561             :     break;
    2562             :   case TopPathReduce:
    2563             :     Latency = Cand.SU->getHeight();
    2564             :     break;
    2565             :   case BotHeightReduce:
    2566             :     Latency = Cand.SU->getHeight();
    2567             :     break;
    2568             :   case BotPathReduce:
    2569             :     Latency = Cand.SU->getDepth();
    2570             :     break;
    2571             :   }
    2572             :   dbgs() << "  Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
    2573             :   if (P.isValid())
    2574             :     dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
    2575             :            << ":" << P.getUnitInc() << " ";
    2576             :   else
    2577             :     dbgs() << "      ";
    2578             :   if (ResIdx)
    2579             :     dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
    2580             :   else
    2581             :     dbgs() << "         ";
    2582             :   if (Latency)
    2583             :     dbgs() << " " << Latency << " cycles ";
    2584             :   else
    2585             :     dbgs() << "          ";
    2586             :   dbgs() << '\n';
    2587             : }
    2588             : #endif
    2589             : 
    2590             : namespace llvm {
    2591             : /// Return true if this heuristic determines order.
    2592             : bool tryLess(int TryVal, int CandVal,
    2593             :              GenericSchedulerBase::SchedCandidate &TryCand,
    2594             :              GenericSchedulerBase::SchedCandidate &Cand,
    2595             :              GenericSchedulerBase::CandReason Reason) {
    2596             :   if (TryVal < CandVal) {
    2597             :     TryCand.Reason = Reason;
    2598             :     return true;
    2599             :   }
    2600             :   if (TryVal > CandVal) {
    2601             :     if (Cand.Reason > Reason)
    2602             :       Cand.Reason = Reason;
    2603             :     return true;
    2604    66527702 :   }
    2605             :   return false;
    2606             : }
    2607             : 
    2608    66527702 : bool tryGreater(int TryVal, int CandVal,
    2609       51343 :                 GenericSchedulerBase::SchedCandidate &TryCand,
    2610       51343 :                 GenericSchedulerBase::SchedCandidate &Cand,
    2611             :                 GenericSchedulerBase::CandReason Reason) {
    2612    66476359 :   if (TryVal > CandVal) {
    2613      255865 :     TryCand.Reason = Reason;
    2614       58814 :     return true;
    2615      255865 :   }
    2616             :   if (TryVal < CandVal) {
    2617             :     if (Cand.Reason > Reason)
    2618             :       Cand.Reason = Reason;
    2619             :     return true;
    2620    69823791 :   }
    2621             :   return false;
    2622             : }
    2623             : 
    2624    69823791 : bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
    2625      406228 :                 GenericSchedulerBase::SchedCandidate &Cand,
    2626      406228 :                 SchedBoundary &Zone) {
    2627             :   if (Zone.isTop()) {
    2628    69417563 :     if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
    2629      963365 :       if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
    2630      315029 :                   TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
    2631      963365 :         return true;
    2632             :     }
    2633             :     if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
    2634             :                    TryCand, Cand, GenericSchedulerBase::TopPathReduce))
    2635             :       return true;
    2636      989361 :   } else {
    2637             :     if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
    2638             :       if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
    2639      989361 :                   TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
    2640      548746 :         return true;
    2641        6883 :     }
    2642             :     if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
    2643             :                    TryCand, Cand, GenericSchedulerBase::BotPathReduce))
    2644             :       return true;
    2645      293671 :   }
    2646             :   return false;
    2647       88276 : }
    2648             : } // end namespace llvm
    2649     1429976 : 
    2650        8014 : static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
    2651             :   LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
    2652             :                     << GenericSchedulerBase::getReasonStr(Reason) << '\n');
    2653             : }
    2654      711950 : 
    2655             : static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
    2656      283496 :   tracePick(Cand.Reason, Cand.AtTop);
    2657             : }
    2658             : 
    2659             : void GenericScheduler::initialize(ScheduleDAGMI *dag) {
    2660             :   assert(dag->hasVRegLiveness() &&
    2661             :          "(PreRA)GenericScheduler needs vreg liveness");
    2662             :   DAG = static_cast<ScheduleDAGMILive*>(dag);
    2663             :   SchedModel = DAG->getSchedModel();
    2664             :   TRI = DAG->TRI;
    2665             : 
    2666             :   Rem.init(DAG, SchedModel);
    2667           0 :   Top.init(DAG, SchedModel, &Rem);
    2668             :   Bot.init(DAG, SchedModel, &Rem);
    2669           0 : 
    2670             :   // Initialize resource counts.
    2671      425361 : 
    2672             :   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
    2673             :   // are disabled, then these HazardRecs will be disabled.
    2674      425361 :   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
    2675      425361 :   if (!Top.HazardRec) {
    2676      425361 :     Top.HazardRec =
    2677             :         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
    2678      425361 :             Itin, DAG);
    2679      425361 :   }
    2680      425361 :   if (!Bot.HazardRec) {
    2681             :     Bot.HazardRec =
    2682             :         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
    2683             :             Itin, DAG);
    2684             :   }
    2685             :   TopCand.SU = nullptr;
    2686      425361 :   BotCand.SU = nullptr;
    2687      425361 : }
    2688      157109 : 
    2689      157109 : /// Initialize the per-region scheduling policy.
    2690      157109 : void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
    2691             :                                   MachineBasicBlock::iterator End,
    2692      425361 :                                   unsigned NumRegionInstrs) {
    2693      157109 :   const MachineFunction &MF = *Begin->getMF();
    2694      157109 :   const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
    2695      157109 : 
    2696             :   // Avoid setting up the register pressure tracker for small regions to save
    2697      425361 :   // compile time. As a rough heuristic, only track pressure when the number of
    2698      425361 :   // schedulable instructions exceeds half the integer register file.
    2699      425361 :   RegionPolicy.ShouldTrackPressure = true;
    2700             :   for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
    2701             :     MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
    2702      970270 :     if (TLI->isTypeLegal(LegalIntVT)) {
    2703             :       unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
    2704             :         TLI->getRegClassFor(LegalIntVT));
    2705             :       RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
    2706      970270 :     }
    2707             :   }
    2708             : 
    2709             :   // For generic targets, we default to bottom-up, because it's simpler and more
    2710             :   // compile-time optimizations have been implemented in that direction.
    2711      970270 :   RegionPolicy.OnlyBottomUp = true;
    2712     3881080 : 
    2713     2910810 :   // Allow the subtarget to override default policy.
    2714     2910810 :   MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
    2715     2730022 : 
    2716     2730022 :   // After subtarget overrides, apply command line options.
    2717     2730022 :   if (!EnableRegPressure)
    2718             :     RegionPolicy.ShouldTrackPressure = false;
    2719             : 
    2720             :   // Check -misched-topdown/bottomup can force or unforce scheduling direction.
    2721             :   // e.g. -misched-bottomup=false allows scheduling in both directions.
    2722             :   assert((!ForceTopDown || !ForceBottomUp) &&
    2723      970270 :          "-misched-topdown incompatible with -misched-bottomup");
    2724             :   if (ForceBottomUp.getNumOccurrences() > 0) {
    2725             :     RegionPolicy.OnlyBottomUp = ForceBottomUp;
    2726      970270 :     if (RegionPolicy.OnlyBottomUp)
    2727             :       RegionPolicy.OnlyTopDown = false;
    2728             :   }
    2729      970270 :   if (ForceTopDown.getNumOccurrences() > 0) {
    2730           0 :     RegionPolicy.OnlyTopDown = ForceTopDown;
    2731             :     if (RegionPolicy.OnlyTopDown)
    2732             :       RegionPolicy.OnlyBottomUp = false;
    2733             :   }
    2734             : }
    2735             : 
    2736      970270 : void GenericScheduler::dumpPolicy() const {
    2737           0 :   // Cannot completely remove virtual function even in release mode.
    2738           0 : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    2739           0 :   dbgs() << "GenericScheduler RegionPolicy: "
    2740             :          << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
    2741      970270 :          << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
    2742           4 :          << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
    2743           4 :          << "\n";
    2744           4 : #endif
    2745             : }
    2746      970270 : 
    2747             : /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
    2748           0 : /// critical path by more cycles than it takes to drain the instruction buffer.
    2749             : /// We estimate an upper bounds on in-flight instructions as:
    2750             : ///
    2751             : /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
    2752             : /// InFlightIterations = AcyclicPath / CyclesPerIteration
    2753             : /// InFlightResources = InFlightIterations * LoopResources
    2754             : ///
    2755             : /// TODO: Check execution resources in addition to IssueCount.
    2756             : void GenericScheduler::checkAcyclicLatency() {
    2757           0 :   if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
    2758             :     return;
    2759             : 
    2760             :   // Scaled number of cycles per loop iteration.
    2761             :   unsigned IterCount =
    2762             :     std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
    2763             :              Rem.RemIssueCount);
    2764             :   // Scaled acyclic critical path.
    2765             :   unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
    2766             :   // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
    2767             :   unsigned InFlightCount =
    2768      389637 :     (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
    2769      389637 :   unsigned BufferLimit =
    2770             :     SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
    2771             : 
    2772             :   Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
    2773             : 
    2774         748 :   LLVM_DEBUG(
    2775         374 :       dbgs() << "IssueCycles="
    2776             :              << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
    2777         374 :              << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
    2778             :              << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
    2779         374 :              << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
    2780         374 :              << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
    2781             :       if (Rem.IsAcyclicLatencyLimited) dbgs() << "  ACYCLIC LATENCY LIMIT\n");
    2782         374 : }
    2783             : 
    2784         374 : void GenericScheduler::registerRoots() {
    2785             :   Rem.CriticalPath = DAG->ExitSU.getDepth();
    2786             : 
    2787             :   // Some roots may not feed into ExitSU. Check all of them in case.
    2788             :   for (const SUnit *SU : Bot.Available) {
    2789             :     if (SU->getDepth() > Rem.CriticalPath)
    2790             :       Rem.CriticalPath = SU->getDepth();
    2791             :   }
    2792             :   LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
    2793             :   if (DumpCriticalPathLength) {
    2794             :     errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
    2795             :   }
    2796      425361 : 
    2797      850722 :   if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
    2798             :     Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
    2799             :     checkAcyclicLatency();
    2800     1269998 :   }
    2801      844637 : }
    2802       65030 : 
    2803             : namespace llvm {
    2804             : bool tryPressure(const PressureChange &TryP,
    2805      425361 :                  const PressureChange &CandP,
    2806           0 :                  GenericSchedulerBase::SchedCandidate &TryCand,
    2807             :                  GenericSchedulerBase::SchedCandidate &Cand,
    2808             :                  GenericSchedulerBase::CandReason Reason,
    2809      425361 :                  const TargetRegisterInfo *TRI,
    2810      389637 :                  const MachineFunction &MF) {
    2811      389637 :   // If one candidate decreases and the other increases, go with it.
    2812             :   // Invalid candidates have UnitInc==0.
    2813      425361 :   if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
    2814             :                  Reason)) {
    2815             :     return true;
    2816    33601048 :   }
    2817             :   // Do not compare the magnitude of pressure changes between top and bottom
    2818             :   // boundary.
    2819             :   if (Cand.AtTop != TryCand.AtTop)
    2820             :     return false;
    2821             : 
    2822             :   // If both candidates affect the same set in the same boundary, go with the
    2823             :   // smallest increase.
    2824             :   unsigned TryPSet = TryP.getPSetOrMax();
    2825    33601048 :   unsigned CandPSet = CandP.getPSetOrMax();
    2826             :   if (TryPSet == CandPSet) {
    2827             :     return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
    2828             :                    Reason);
    2829             :   }
    2830             : 
    2831    33584844 :   int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
    2832             :                                  std::numeric_limits<int>::max();
    2833             : 
    2834             :   int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
    2835             :                                    std::numeric_limits<int>::max();
    2836    33226222 : 
    2837    33226222 :   // If the candidates are decreasing pressure, reverse priority.
    2838    33226222 :   if (TryP.getUnitInc() < 0)
    2839    98680212 :     std::swap(TryRank, CandRank);
    2840    32893404 :   return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
    2841             : }
    2842             : 
    2843      332818 : unsigned getWeakLeft(const SUnit *SU, bool isTop) {
    2844             :   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
    2845             : }
    2846      332818 : 
    2847             : /// Minimize physical register live ranges. Regalloc wants them adjacent to
    2848             : /// their physreg def/use.
    2849             : ///
    2850      332818 : /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
    2851             : /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
    2852      332818 : /// with the operation that produces or consumes the physreg. We'll do this when
    2853             : /// regalloc has support for parallel copies.
    2854             : int biasPhysRegCopy(const SUnit *SU, bool isTop) {
    2855    22624375 :   const MachineInstr *MI = SU->getInstr();
    2856    22624375 :   if (!MI->isCopy())
    2857             :     return 0;
    2858             : 
    2859             :   unsigned ScheduledOper = isTop ? 1 : 0;
    2860             :   unsigned UnscheduledOper = isTop ? 0 : 1;
    2861             :   // If we have already scheduled the physreg produce/consumer, immediately
    2862             :   // schedule the copy.
    2863             :   if (TargetRegisterInfo::isPhysicalRegister(
    2864             :         MI->getOperand(ScheduledOper).getReg()))
    2865             :     return 1;
    2866    24783226 :   // If the physreg is at the boundary, defer it. Otherwise schedule it
    2867    24783226 :   // immediately to free the dependent. We can hoist the copy later.
    2868    24783226 :   bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
    2869             :   if (TargetRegisterInfo::isPhysicalRegister(
    2870             :         MI->getOperand(UnscheduledOper).getReg()))
    2871     3092773 :     return AtBoundary ? -1 : 1;
    2872     3092773 :   return 0;
    2873             : }
    2874             : } // end namespace llvm
    2875     6185546 : 
    2876     3092773 : void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
    2877             :                                      bool AtTop,
    2878             :                                      const RegPressureTracker &RPTracker,
    2879             :                                      RegPressureTracker &TempTracker) {
    2880     2067869 :   Cand.SU = SU;
    2881     4135738 :   Cand.AtTop = AtTop;
    2882             :   if (DAG->isTrackingPressure()) {
    2883      939715 :     if (AtTop) {
    2884             :       TempTracker.getMaxDownwardPressureDelta(
    2885             :         Cand.SU->getInstr(),
    2886             :         Cand.RPDelta,
    2887             :         DAG->getRegionCriticalPSets(),
    2888    10379769 :         DAG->getRegPressure().MaxSetPressure);
    2889             :     } else {
    2890             :       if (VerifyScheduling) {
    2891             :         TempTracker.getMaxUpwardPressureDelta(
    2892    10379769 :           Cand.SU->getInstr(),
    2893    10379769 :           &DAG->getPressureDiff(Cand.SU),
    2894    10379769 :           Cand.RPDelta,
    2895     9355488 :           DAG->getRegionCriticalPSets(),
    2896       72571 :           DAG->getRegPressure().MaxSetPressure);
    2897       72571 :       } else {
    2898       72571 :         RPTracker.getUpwardPressureDelta(
    2899             :           Cand.SU->getInstr(),
    2900             :           DAG->getPressureDiff(Cand.SU),
    2901             :           Cand.RPDelta,
    2902     9282917 :           DAG->getRegionCriticalPSets(),
    2903          19 :           DAG->getRegPressure().MaxSetPressure);
    2904          19 :       }
    2905          19 :     }
    2906          19 :   }
    2907             :   LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
    2908             :              << "  Try  SU(" << Cand.SU->NodeNum << ") "
    2909             :              << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
    2910     9282898 :              << Cand.RPDelta.Excess.getUnitInc() << "\n");
    2911     9282898 : }
    2912             : 
    2913     9282898 : /// Apply a set of heuristics to a new candidate. Heuristics are currently
    2914             : /// hierarchical. This may be more efficient than a graduated cost model because
    2915             : /// we don't need to evaluate all aspects of the model for each node in the
    2916             : /// queue. But it's really done to make the heuristics easier to debug and
    2917             : /// statistically analyze.
    2918             : ///
    2919             : /// \param Cand provides the policy and current best candidate.
    2920             : /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
    2921             : /// \param Zone describes the scheduled zone that we are extending, or nullptr
    2922             : //              if Cand is from a different zone than TryCand.
    2923    10379769 : void GenericScheduler::tryCandidate(SchedCandidate &Cand,
    2924             :                                     SchedCandidate &TryCand,
    2925             :                                     SchedBoundary *Zone) const {
    2926             :   // Initialize the candidate if needed.
    2927             :   if (!Cand.isValid()) {
    2928             :     TryCand.Reason = NodeOrder;
    2929             :     return;
    2930             :   }
    2931             : 
    2932             :   if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
    2933             :                  biasPhysRegCopy(Cand.SU, Cand.AtTop),
    2934             :                  TryCand, Cand, PhysRegCopy))
    2935    13874679 :     return;
    2936             : 
    2937             :   // Avoid exceeding the target's limit.
    2938             :   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
    2939    13874679 :                                                Cand.RPDelta.Excess,
    2940     1483066 :                                                TryCand, Cand, RegExcess, TRI,
    2941     1483066 :                                                DAG->MF))
    2942             :     return;
    2943             : 
    2944    12391613 :   // Avoid increasing the max critical pressure in the scheduled region.
    2945    12391613 :   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
    2946             :                                                Cand.RPDelta.CriticalMax,
    2947             :                                                TryCand, Cand, RegCritical, TRI,
    2948             :                                                DAG->MF))
    2949             :     return;
    2950    23177916 : 
    2951    11394939 :   // We only compare a subset of features when comparing nodes between
    2952    11394939 :   // Top and Bottom boundary. Some properties are simply incomparable, in many
    2953    11394939 :   // other instances we should only override the other boundary if something
    2954             :   // is a clear good pick on one boundary. Skip heuristics that are more
    2955             :   // "tie-breaking" in nature.
    2956             :   bool SameBoundary = Zone != nullptr;
    2957    23086334 :   if (SameBoundary) {
    2958    11349148 :     // For loops that are acyclic path limited, aggressively schedule for
    2959    11349148 :     // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
    2960    11349148 :     // heuristics to take precedence.
    2961             :     if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
    2962             :         tryLatency(TryCand, Cand, *Zone))
    2963             :       return;
    2964             : 
    2965             :     // Prioritize instructions that read unbuffered resources by stall cycles.
    2966             :     if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
    2967             :                 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
    2968             :       return;
    2969    11400077 :   }
    2970             : 
    2971             :   // Keep clustered nodes together to encourage downstream peephole
    2972             :   // optimizations which may reduce resource requirements.
    2973    11274438 :   //
    2974         893 :   // This is a best effort to set things up for a post-RA pass. Optimizations
    2975             :   // like generating loads of multiple registers should ideally be done within
    2976             :   // the scheduler pass by combining the loads during DAG postprocessing.
    2977             :   const SUnit *CandNextClusterSU =
    2978    11272922 :     Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
    2979    11272922 :   const SUnit *TryCandNextClusterSU =
    2980             :     TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
    2981             :   if (tryGreater(TryCand.SU == TryCandNextClusterSU,
    2982             :                  Cand.SU == CandNextClusterSU,
    2983             :                  TryCand, Cand, Cluster))
    2984             :     return;
    2985             : 
    2986             :   if (SameBoundary) {
    2987             :     // Weak edges are for clustering and other constraints.
    2988             :     if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
    2989             :                 getWeakLeft(Cand.SU, Cand.AtTop),
    2990    11362948 :                 TryCand, Cand, Weak))
    2991             :       return;
    2992    11362948 :   }
    2993    11362948 : 
    2994    11362948 :   // Avoid increasing the max pressure of the entire region.
    2995             :   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
    2996             :                                                Cand.RPDelta.CurrentMax,
    2997             :                                                TryCand, Cand, RegMax, TRI,
    2998    11323005 :                                                DAG->MF))
    2999             :     return;
    3000    11196825 : 
    3001    11196825 :   if (SameBoundary) {
    3002             :     // Avoid critical resource consumption and balance the schedule.
    3003             :     TryCand.initResourceDelta(DAG, SchedModel);
    3004             :     if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
    3005             :                 TryCand, Cand, ResourceReduce))
    3006             :       return;
    3007    22100863 :     if (tryGreater(TryCand.ResDelta.DemandedResources,
    3008    10856961 :                    Cand.ResDelta.DemandedResources,
    3009    10856961 :                    TryCand, Cand, ResourceDemand))
    3010    10856961 :       return;
    3011             : 
    3012             :     // Avoid serializing long latency dependence chains.
    3013    11089832 :     // For acyclic path limited loops, latency was already checked above.
    3014             :     if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
    3015    10963652 :         !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
    3016    10963652 :       return;
    3017             : 
    3018             :     // Fall through to original instruction order.
    3019    10963576 :     if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
    3020    10963576 :         || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
    3021             :       TryCand.Reason = NodeOrder;
    3022             :     }
    3023             :   }
    3024             : }
    3025             : 
    3026    10961289 : /// Pick the best candidate from the queue.
    3027    11859611 : ///
    3028             : /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
    3029             : /// DAG building. To adjust for the current scheduling location we need to
    3030             : /// maintain the number of vreg uses remaining to be top-scheduled.
    3031      329875 : void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
    3032    10815186 :                                          const CandPolicy &ZonePolicy,
    3033     2505609 :                                          const RegPressureTracker &RPTracker,
    3034             :                                          SchedCandidate &Cand) {
    3035             :   // getMaxPressureDelta temporarily modifies the tracker.
    3036             :   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
    3037             : 
    3038             :   ReadyQueue &Q = Zone.Available;
    3039             :   for (SUnit *SU : Q) {
    3040             : 
    3041             :     SchedCandidate TryCand(ZonePolicy);
    3042             :     initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
    3043     1168450 :     // Pass SchedBoundary only when comparing nodes from the same boundary.
    3044             :     SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
    3045             :     tryCandidate(Cand, TryCand, ZoneArg);
    3046             :     if (TryCand.Reason != NoCand) {
    3047             :       // Initialize resource delta if needed in case future heuristics query it.
    3048             :       if (TryCand.ResDelta == SchedResourceDelta())
    3049             :         TryCand.initResourceDelta(DAG, SchedModel);
    3050             :       Cand.setBest(TryCand);
    3051    11548219 :       LLVM_DEBUG(traceCandidate(Cand));
    3052             :     }
    3053             :   }
    3054    10379769 : }
    3055             : 
    3056    10379769 : /// Pick the best candidate node from either the top or bottom queue.
    3057    10379769 : SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
    3058    10379769 :   // Schedule as far as possible in the direction of no choice. This is most
    3059             :   // efficient, but also provides the best heuristics for CriticalPSets.
    3060     3231962 :   if (SUnit *SU = Bot.pickOnlyChoice()) {
    3061     3231166 :     IsTopNode = false;
    3062             :     tracePick(Only1, false);
    3063             :     return SU;
    3064             :   }
    3065             :   if (SUnit *SU = Top.pickOnlyChoice()) {
    3066     1168450 :     IsTopNode = true;
    3067             :     tracePick(Only1, true);
    3068             :     return SU;
    3069      143498 :   }
    3070             :   // Set the bottom-up policy based on the state of the current bottom zone and
    3071             :   // the instructions outside the zone, including the top zone.
    3072      143498 :   CandPolicy BotPolicy;
    3073       90790 :   setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
    3074             :   // Set the top-down policy based on the state of the current top zone and
    3075       90790 :   // the instructions outside the zone, including the bottom zone.
    3076             :   CandPolicy TopPolicy;
    3077       52708 :   setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
    3078        3618 : 
    3079             :   // See if BotCand is still valid (because we previously scheduled from Top).
    3080        3618 :   LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
    3081             :   if (!BotCand.isValid() || BotCand.SU->isScheduled ||
    3082             :       BotCand.Policy != BotPolicy) {
    3083             :     BotCand.reset(CandPolicy());
    3084       49090 :     pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
    3085       49090 :     assert(BotCand.Reason != NoCand && "failed to find the first candidate");
    3086             :   } else {
    3087             :     LLVM_DEBUG(traceCandidate(BotCand));
    3088       49090 : #ifndef NDEBUG
    3089       49090 :     if (VerifyScheduling) {
    3090             :       SchedCandidate TCand;
    3091             :       TCand.reset(CandPolicy());
    3092             :       pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
    3093       49090 :       assert(TCand.SU == BotCand.SU &&
    3094             :              "Last pick result should correspond to re-picking right now");
    3095       37342 :     }
    3096       74684 : #endif
    3097             :   }
    3098             : 
    3099             :   // Check if the top Q has a better candidate.
    3100             :   LLVM_DEBUG(dbgs() << "Picking from Top:\n");
    3101             :   if (!TopCand.isValid() || TopCand.SU->isScheduled ||
    3102             :       TopCand.Policy != TopPolicy) {
    3103             :     TopCand.reset(CandPolicy());
    3104             :     pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
    3105             :     assert(TopCand.Reason != NoCand && "failed to find the first candidate");
    3106             :   } else {
    3107             :     LLVM_DEBUG(traceCandidate(TopCand));
    3108             : #ifndef NDEBUG
    3109             :     if (VerifyScheduling) {
    3110             :       SchedCandidate TCand;
    3111             :       TCand.reset(CandPolicy());
    3112             :       pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
    3113       49090 :       assert(TCand.SU == TopCand.SU &&
    3114             :            "Last pick result should correspond to re-picking right now");
    3115       31336 :     }
    3116       62672 : #endif
    3117             :   }
    3118             : 
    3119             :   // Pick best from BotCand and TopCand.
    3120             :   assert(BotCand.isValid());
    3121             :   assert(TopCand.isValid());
    3122             :   SchedCandidate Cand = BotCand;
    3123             :   TopCand.Reason = NoCand;
    3124             :   tryCandidate(Cand, TopCand, nullptr);
    3125             :   if (TopCand.Reason != NoCand) {
    3126             :     Cand.setBest(TopCand);
    3127             :     LLVM_DEBUG(traceCandidate(Cand));
    3128             :   }
    3129             : 
    3130             :   IsTopNode = Cand.AtTop;
    3131             :   tracePick(Cand);
    3132             :   return Cand.SU;
    3133             : }
    3134       49090 : 
    3135       49090 : /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
    3136       49090 : SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
    3137       49090 :   if (DAG->top() == DAG->bottom()) {
    3138             :     assert(Top.Available.empty() && Top.Pending.empty() &&
    3139             :            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
    3140             :     return nullptr;
    3141             :   }
    3142       49090 :   SUnit *SU;
    3143             :   do {
    3144       49090 :     if (RegionPolicy.OnlyTopDown) {
    3145             :       SU = Top.pickOnlyChoice();
    3146             :       if (!SU) {
    3147             :         CandPolicy NoPolicy;
    3148     2568977 :         TopCand.reset(NoPolicy);
    3149     2568977 :         pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
    3150             :         assert(TopCand.Reason != NoCand && "failed to find a candidate");
    3151             :         tracePick(TopCand);
    3152             :         SU = TopCand.SU;
    3153             :       }
    3154             :       IsTopNode = true;
    3155             :     } else if (RegionPolicy.OnlyBottomUp) {
    3156     2165070 :       SU = Bot.pickOnlyChoice();
    3157          54 :       if (!SU) {
    3158          54 :         CandPolicy NoPolicy;
    3159          45 :         BotCand.reset(NoPolicy);
    3160          45 :         pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
    3161          90 :         assert(BotCand.Reason != NoCand && "failed to find a candidate");
    3162             :         tracePick(BotCand);
    3163             :         SU = BotCand.SU;
    3164          45 :       }
    3165             :       IsTopNode = false;
    3166          54 :     } else {
    3167     2165016 :       SU = pickNodeBidirectional(IsTopNode);
    3168     2021518 :     }
    3169     2021518 :   } while (SU->isScheduled);
    3170     1099727 : 
    3171     1099727 :   if (SU->isTopReady())
    3172     2199454 :     Top.removeReady(SU);
    3173             :   if (SU->isBottomReady())
    3174             :     Bot.removeReady(SU);
    3175     1099727 : 
    3176             :   LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
    3177     2021518 :                     << *SU->getInstr());
    3178             :   return SU;
    3179      143498 : }
    3180             : 
    3181     2165070 : void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
    3182             :   MachineBasicBlock::iterator InsertPos = SU->getInstr();
    3183     2165070 :   if (!isTop)
    3184     1053517 :     ++InsertPos;
    3185     2165070 :   SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
    3186     2152743 : 
    3187             :   // Find already scheduled copies with a single physreg dependence and move
    3188             :   // them just above the scheduled instruction.
    3189             :   for (SDep &Dep : Deps) {
    3190             :     if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg()))
    3191             :       continue;
    3192             :     SUnit *DepSU = Dep.getSUnit();
    3193      200566 :     if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
    3194      200566 :       continue;
    3195      200566 :     MachineInstr *Copy = DepSU->getInstr();
    3196             :     if (!Copy->isCopy())
    3197      200566 :       continue;
    3198             :     LLVM_DEBUG(dbgs() << "  Rescheduling physreg copy ";
    3199             :                DAG->dumpNode(*Dep.getSUnit()));
    3200             :     DAG->moveInstruction(Copy, InsertPos);
    3201      711328 :   }
    3202      510762 : }
    3203             : 
    3204             : /// Update the scheduler's state after scheduling a node. This is the same node
    3205      144882 : /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
    3206             : /// update it's state based on the current cycle before MachineSchedStrategy
    3207       10936 : /// does.
    3208       10936 : ///
    3209             : /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
    3210             : /// them here. See comments in biasPhysRegCopy.
    3211             : void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
    3212        5046 :   if (IsTopNode) {
    3213             :     SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
    3214      200566 :     Top.bumpNode(SU);
    3215             :     if (SU->hasPhysRegUses)
    3216             :       reschedulePhysRegCopies(SU, true);
    3217             :   } else {
    3218             :     SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
    3219             :     Bot.bumpNode(SU);
    3220             :     if (SU->hasPhysRegDefs)
    3221             :       reschedulePhysRegCopies(SU, false);
    3222             :   }
    3223     2506506 : }
    3224     2506506 : 
    3225       95939 : /// Create the standard converging machine scheduler. This will be used as the
    3226       95939 : /// default scheduler if the target does not set a default.
    3227       95939 : ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
    3228       63162 :   ScheduleDAGMILive *DAG =
    3229             :       new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
    3230     2410567 :   // Register DAG post-processors.
    3231     2410567 :   //
    3232     2410567 :   // FIXME: extend the mutation API to allow earlier mutations to instantiate
    3233      137404 :   // data and pass it to later mutations. Have a single mutation that gathers
    3234             :   // the interesting nodes in one pass.
    3235     2506506 :   DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
    3236             :   return DAG;
    3237             : }
    3238             : 
    3239      142003 : static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) {
    3240             :   return createGenericSchedLive(C);
    3241      142003 : }
    3242             : 
    3243             : static MachineSchedRegistry
    3244             : GenericSchedRegistry("converge", "Standard converging scheduler.",
    3245             :                      createConveringSched);
    3246             : 
    3247      142003 : //===----------------------------------------------------------------------===//
    3248      142003 : // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
    3249             : //===----------------------------------------------------------------------===//
    3250             : 
    3251           0 : void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
    3252           0 :   DAG = Dag;
    3253             :   SchedModel = DAG->getSchedModel();
    3254             :   TRI = DAG->TRI;
    3255             : 
    3256             :   Rem.init(DAG, SchedModel);
    3257             :   Top.init(DAG, SchedModel, &Rem);
    3258             :   BotRoots.clear();
    3259             : 
    3260             :   // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
    3261             :   // or are disabled, then these HazardRecs will be disabled.
    3262             :   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
    3263       17716 :   if (!Top.HazardRec) {
    3264       17716 :     Top.HazardRec =
    3265       17716 :         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
    3266       17716 :             Itin, DAG);
    3267             :   }
    3268       17716 : }
    3269       17716 : 
    3270             : void PostGenericScheduler::registerRoots() {
    3271             :   Rem.CriticalPath = DAG->ExitSU.getDepth();
    3272             : 
    3273             :   // Some roots may not feed into ExitSU. Check all of them in case.
    3274       17716 :   for (const SUnit *SU : BotRoots) {
    3275       17716 :     if (SU->getDepth() > Rem.CriticalPath)
    3276       16195 :       Rem.CriticalPath = SU->getDepth();
    3277       16195 :   }
    3278       16195 :   LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
    3279             :   if (DumpCriticalPathLength) {
    3280       17716 :     errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
    3281             :   }
    3282       17716 : }
    3283       35432 : 
    3284             : /// Apply a set of heuristics to a new candidate for PostRA scheduling.
    3285             : ///
    3286       44090 : /// \param Cand provides the policy and current best candidate.
    3287       26374 : /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
    3288        4120 : void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
    3289             :                                         SchedCandidate &TryCand) {
    3290             :   // Initialize the candidate if needed.
    3291       17716 :   if (!Cand.isValid()) {
    3292           0 :     TryCand.Reason = NodeOrder;
    3293             :     return;
    3294       17716 :   }
    3295             : 
    3296             :   // Prioritize instructions that read unbuffered resources by stall cycles.
    3297             :   if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
    3298             :               Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
    3299             :     return;
    3300      120056 : 
    3301             :   // Keep clustered nodes together.
    3302             :   if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
    3303      120056 :                  Cand.SU == DAG->getNextClusterSucc(),
    3304       26961 :                  TryCand, Cand, Cluster))
    3305       26961 :     return;
    3306             : 
    3307             :   // Avoid critical resource consumption and balance the schedule.
    3308             :   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
    3309       93095 :               TryCand, Cand, ResourceReduce))
    3310       93095 :     return;
    3311             :   if (tryGreater(TryCand.ResDelta.DemandedResources,
    3312             :                  Cand.ResDelta.DemandedResources,
    3313             :                  TryCand, Cand, ResourceDemand))
    3314       93070 :     return;
    3315       93070 : 
    3316             :   // Avoid serializing long latency dependence chains.
    3317             :   if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
    3318             :     return;
    3319             :   }
    3320       92907 : 
    3321             :   // Fall through to original instruction order.
    3322             :   if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
    3323       92492 :     TryCand.Reason = NodeOrder;
    3324       92492 : }
    3325             : 
    3326             : void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
    3327             :   ReadyQueue &Q = Top.Available;
    3328             :   for (SUnit *SU : Q) {
    3329       92492 :     SchedCandidate TryCand(Cand.Policy);
    3330             :     TryCand.SU = SU;
    3331             :     TryCand.AtTop = true;
    3332             :     TryCand.initResourceDelta(DAG, SchedModel);
    3333             :     tryCandidate(Cand, TryCand);
    3334       60747 :     if (TryCand.Reason != NoCand) {
    3335       24251 :       Cand.setBest(TryCand);
    3336             :       LLVM_DEBUG(traceCandidate(Cand));
    3337             :     }
    3338       26961 :   }
    3339             : }
    3340      147017 : 
    3341             : /// Pick the next node to schedule.
    3342      120056 : SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
    3343      120056 :   if (DAG->top() == DAG->bottom()) {
    3344      120056 :     assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
    3345      120056 :     return nullptr;
    3346      120056 :   }
    3347             :   SUnit *SU;
    3348             :   do {
    3349             :     SU = Top.pickOnlyChoice();
    3350             :     if (SU) {
    3351       26961 :       tracePick(Only1, true);
    3352             :     } else {
    3353             :       CandPolicy NoPolicy;
    3354      103062 :       SchedCandidate TopCand(NoPolicy);
    3355      103062 :       // Set the top-down policy based on the state of the current top zone and
    3356             :       // the instructions outside the zone, including the bottom zone.
    3357             :       setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
    3358             :       pickNodeFromQueue(TopCand);
    3359             :       assert(TopCand.Reason != NoCand && "failed to find a candidate");
    3360             :       tracePick(TopCand);
    3361       85346 :       SU = TopCand.SU;
    3362       85346 :     }
    3363             :   } while (SU->isScheduled);
    3364             : 
    3365             :   IsTopNode = true;
    3366             :   Top.removeReady(SU);
    3367             : 
    3368             :   LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
    3369       26961 :                     << *SU->getInstr());
    3370       26961 :   return SU;
    3371             : }
    3372             : 
    3373       26961 : /// Called after ScheduleDAGMI has scheduled an instruction and updated
    3374             : /// scheduled/remaining flags in the DAG nodes.
    3375       85346 : void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
    3376             :   SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
    3377       85346 :   Top.bumpNode(SU);
    3378       85346 : }
    3379             : 
    3380             : ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
    3381             :   return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
    3382       85346 :                            /*RemoveKillFlags=*/true);
    3383             : }
    3384             : 
    3385             : //===----------------------------------------------------------------------===//
    3386             : // ILP Scheduler. Currently for experimental analysis of heuristics.
    3387       85346 : //===----------------------------------------------------------------------===//
    3388       85346 : 
    3389       85346 : namespace {
    3390       85346 : 
    3391             : /// Order nodes by the ILP metric.
    3392       19456 : struct ILPOrder {
    3393             :   const SchedDFSResult *DFSResult = nullptr;
    3394       19456 :   const BitVector *ScheduledTrees = nullptr;
    3395             :   bool MaximizeILP;
    3396             : 
    3397             :   ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
    3398             : 
    3399             :   /// Apply a less-than relation on node priority.
    3400             :   ///
    3401             :   /// (Return true if A comes after B in the Q.)
    3402             :   bool operator()(const SUnit *A, const SUnit *B) const {
    3403             :     unsigned SchedTreeA = DFSResult->getSubtreeID(A);
    3404             :     unsigned SchedTreeB = DFSResult->getSubtreeID(B);
    3405             :     if (SchedTreeA != SchedTreeB) {
    3406             :       // Unscheduled trees have lower priority.
    3407             :       if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
    3408             :         return ScheduledTrees->test(SchedTreeB);
    3409           0 : 
    3410             :       // Trees with shallower connections have have lower priority.
    3411             :       if (DFSResult->getSubtreeLevel(SchedTreeA)
    3412             :           != DFSResult->getSubtreeLevel(SchedTreeB)) {
    3413             :         return DFSResult->getSubtreeLevel(SchedTreeA)
    3414         300 :           < DFSResult->getSubtreeLevel(SchedTreeB);
    3415         300 :       }
    3416             :     }
    3417         300 :     if (MaximizeILP)
    3418             :       return DFSResult->getILP(A) < DFSResult->getILP(B);
    3419         318 :     else
    3420             :       return DFSResult->getILP(A) > DFSResult->getILP(B);
    3421             :   }
    3422             : };
    3423          58 : 
    3424             : /// Schedule based on the ILP metric.
    3425             : class ILPScheduler : public MachineSchedStrategy {
    3426           0 :   ScheduleDAGMILive *DAG = nullptr;
    3427             :   ILPOrder Cmp;
    3428             : 
    3429         199 :   std::vector<SUnit*> ReadyQ;
    3430         116 : 
    3431             : public:
    3432          83 :   ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
    3433             : 
    3434             :   void initialize(ScheduleDAGMI *dag) override {
    3435             :     assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
    3436             :     DAG = static_cast<ScheduleDAGMILive*>(dag);
    3437             :     DAG->computeDFSResult();
    3438             :     Cmp.DFSResult = DAG->getDFSResult();
    3439             :     Cmp.ScheduledTrees = &DAG->getScheduledTrees();
    3440             :     ReadyQ.clear();
    3441             :   }
    3442             : 
    3443             :   void registerRoots() override {
    3444           0 :     // Restore the heap in ReadyQ with the updated DFS results.
    3445             :     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
    3446          10 :   }
    3447             : 
    3448          10 :   /// Implement MachineSchedStrategy interface.
    3449          10 :   /// -----------------------------------------
    3450          10 : 
    3451          10 :   /// Callback to select the highest priority node from the ready Q.
    3452             :   SUnit *pickNode(bool &IsTopNode) override {
    3453          10 :     if (ReadyQ.empty()) return nullptr;
    3454             :     std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
    3455          10 :     SUnit *SU = ReadyQ.back();
    3456             :     ReadyQ.pop_back();
    3457          10 :     IsTopNode = false;
    3458          10 :     LLVM_DEBUG(dbgs() << "Pick node "
    3459             :                       << "SU(" << SU->NodeNum << ") "
    3460             :                       << " ILP: " << DAG->getDFSResult()->getILP(SU)
    3461             :                       << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
    3462             :                       << " @"
    3463             :                       << DAG->getDFSResult()->getSubtreeLevel(
    3464         186 :                              DAG->getDFSResult()->getSubtreeID(SU))
    3465         186 :                       << '\n'
    3466         176 :                       << "Scheduling " << *SU->getInstr());
    3467         176 :     return SU;
    3468             :   }
    3469         176 : 
    3470             :   /// Scheduler callback to notify that a new subtree is scheduled.
    3471             :   void scheduleTree(unsigned SubtreeID) override {
    3472             :     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
    3473             :   }
    3474             : 
    3475             :   /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
    3476             :   /// DFSResults, and resort the priority Q.
    3477             :   void schedNode(SUnit *SU, bool IsTopNode) override {
    3478             :     assert(!IsTopNode && "SchedDFSResult needs bottom-up");
    3479         176 :   }
    3480             : 
    3481             :   void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
    3482             : 
    3483          38 :   void releaseBottomNode(SUnit *SU) override {
    3484          38 :     ReadyQ.push_back(SU);
    3485          38 :     std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
    3486             :   }
    3487             : };
    3488             : 
    3489         176 : } // end anonymous namespace
    3490             : 
    3491         176 : static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
    3492             :   return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
    3493          64 : }
    3494             : static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
    3495         176 :   return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
    3496         176 : }
    3497         176 : 
    3498         176 : static MachineSchedRegistry ILPMaxRegistry(
    3499             :   "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
    3500             : static MachineSchedRegistry ILPMinRegistry(
    3501             :   "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
    3502             : 
    3503           6 : //===----------------------------------------------------------------------===//
    3504          12 : // Machine Instruction Shuffler for Correctness Testing
    3505             : //===----------------------------------------------------------------------===//
    3506           2 : 
    3507           4 : #ifndef NDEBUG
    3508             : namespace {
    3509             : 
    3510             : /// Apply a less-than relation on the node order, which corresponds to the
    3511             : /// instruction order prior to scheduling. IsReverse implements greater-than.
    3512             : template<bool IsReverse>
    3513             : struct SUnitOrder {
    3514             :   bool operator()(SUnit *A, SUnit *B) const {
    3515             :     if (IsReverse)
    3516             :       return A->NodeNum > B->NodeNum;
    3517             :     else
    3518             :       return A->NodeNum < B->NodeNum;
    3519             :   }
    3520             : };
    3521             : 
    3522             : /// Reorder instructions as much as possible.
    3523             : class InstructionShuffler : public MachineSchedStrategy {
    3524             :   bool IsAlternating;
    3525             :   bool IsTopDown;
    3526             : 
    3527             :   // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
    3528             :   // gives nodes with a higher number higher priority causing the latest
    3529             :   // instructions to be scheduled first.
    3530             :   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
    3531             :     TopQ;
    3532             : 
    3533             :   // When scheduling bottom-up, use greater-than as the queue priority.
    3534             :   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
    3535             :     BottomQ;
    3536             : 
    3537             : public:
    3538             :   InstructionShuffler(bool alternate, bool topdown)
    3539             :     : IsAlternating(alternate), IsTopDown(topdown) {}
    3540             : 
    3541             :   void initialize(ScheduleDAGMI*) override {
    3542             :     TopQ.clear();
    3543             :     BottomQ.clear();
    3544             :   }
    3545             : 
    3546             :   /// Implement MachineSchedStrategy interface.
    3547             :   /// -----------------------------------------
    3548             : 
    3549             :   SUnit *pickNode(bool &IsTopNode) override {
    3550             :     SUnit *SU;
    3551             :     if (IsTopDown) {
    3552             :       do {
    3553             :         if (TopQ.empty()) return nullptr;
    3554             :         SU = TopQ.top();
    3555             :         TopQ.pop();
    3556             :       } while (SU->isScheduled);
    3557             :       IsTopNode = true;
    3558             :     } else {
    3559             :       do {
    3560             :         if (BottomQ.empty()) return nullptr;
    3561             :         SU = BottomQ.top();
    3562             :         BottomQ.pop();
    3563             :       } while (SU->isScheduled);
    3564             :       IsTopNode = false;
    3565             :     }
    3566             :     if (IsAlternating)
    3567             :       IsTopDown = !IsTopDown;
    3568             :     return SU;
    3569             :   }
    3570             : 
    3571             :   void schedNode(SUnit *SU, bool IsTopNode) override {}
    3572             : 
    3573             :   void releaseTopNode(SUnit *SU) override {
    3574             :     TopQ.push(SU);
    3575             :   }
    3576             :   void releaseBottomNode(SUnit *SU) override {
    3577             :     BottomQ.push(SU);
    3578             :   }
    3579             : };
    3580             : 
    3581             : } // end anonymous namespace
    3582             : 
    3583             : static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
    3584             :   bool Alternate = !ForceTopDown && !ForceBottomUp;
    3585             :   bool TopDown = !ForceBottomUp;
    3586             :   assert((TopDown || !ForceTopDown) &&
    3587             :          "-misched-topdown incompatible with -misched-bottomup");
    3588             :   return new ScheduleDAGMILive(
    3589             :       C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
    3590             : }
    3591             : 
    3592             : static MachineSchedRegistry ShufflerRegistry(
    3593             :   "shuffle", "Shuffle machine instructions alternating directions",
    3594             :   createInstructionShuffler);
    3595             : #endif // !NDEBUG
    3596             : 
    3597             : //===----------------------------------------------------------------------===//
    3598             : // GraphWriter support for ScheduleDAGMILive.
    3599             : //===----------------------------------------------------------------------===//
    3600             : 
    3601             : #ifndef NDEBUG
    3602             : namespace llvm {
    3603             : 
    3604             : template<> struct GraphTraits<
    3605             :   ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
    3606             : 
    3607             : template<>
    3608             : struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
    3609             :   DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
    3610             : 
    3611             :   static std::string getGraphName(const ScheduleDAG *G) {
    3612             :     return G->MF.getName();
    3613             :   }
    3614             : 
    3615             :   static bool renderGraphFromBottomUp() {
    3616             :     return true;
    3617             :   }
    3618             : 
    3619             :   static bool isNodeHidden(const SUnit *Node) {
    3620             :     if (ViewMISchedCutoff == 0)
    3621             :       return false;
    3622             :     return (Node->Preds.size() > ViewMISchedCutoff
    3623             :          || Node->Succs.size() > ViewMISchedCutoff);
    3624             :   }
    3625             : 
    3626             :   /// If you want to override the dot attributes printed for a particular
    3627             :   /// edge, override this method.
    3628             :   static std::string getEdgeAttributes(const SUnit *Node,
    3629             :                                        SUnitIterator EI,
    3630             :                                        const ScheduleDAG *Graph) {
    3631             :     if (EI.isArtificialDep())
    3632             :       return "color=cyan,style=dashed";
    3633             :     if (EI.isCtrlDep())
    3634             :       return "color=blue,style=dashed";
    3635             :     return "";
    3636             :   }
    3637             : 
    3638             :   static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
    3639             :     std::string Str;
    3640             :     raw_string_ostream SS(Str);
    3641             :     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
    3642             :     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
    3643             :       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
    3644             :     SS << "SU:" << SU->NodeNum;
    3645             :     if (DFS)
    3646             :       SS << " I:" << DFS->getNumInstrs(SU);
    3647             :     return SS.str();
    3648             :   }
    3649             : 
    3650             :   static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
    3651             :     return G->getGraphNodeLabel(SU);
    3652             :   }
    3653             : 
    3654             :   static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
    3655             :     std::string Str("shape=Mrecord");
    3656             :     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
    3657             :     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
    3658             :       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
    3659             :     if (DFS) {
    3660             :       Str += ",style=filled,fillcolor=\"#";
    3661             :       Str += DOT::getColorString(DFS->getSubtreeID(N));
    3662             :       Str += '"';
    3663             :     }
    3664             :     return Str;
    3665             :   }
    3666             : };
    3667             : 
    3668             : } // end namespace llvm
    3669             : #endif // NDEBUG
    3670             : 
    3671             : /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
    3672             : /// rendered using 'dot'.
    3673             : void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
    3674             : #ifndef NDEBUG
    3675             :   ViewGraph(this, Name, false, Title);
    3676             : #else
    3677             :   errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
    3678             :          << "systems with Graphviz or gv!\n";
    3679             : #endif  // NDEBUG
    3680             : }
    3681             : 
    3682             : /// Out-of-line implementation with no arguments is handy for gdb.
    3683             : void ScheduleDAGMI::viewGraph() {
    3684             :   viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
    3685           0 : }

Generated by: LCOV version 1.13